jaulib v1.3.0
Jau Support Library (C++, Java, ..)
base_codec.cpp
Go to the documentation of this file.
1/*
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2022 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#include <jau/base_codec.hpp>
26#include <jau/basic_algos.hpp>
27#include <jau/cpp_pragma.hpp>
28#include <jau/debug.hpp>
29
30using namespace jau;
31using namespace jau::codec::base;
32
33std::string jau::codec::base::encode(int num, const alphabet& aspec, const unsigned int min_width) noexcept
34{
35 const int base = aspec.base();
36 if( 0 > num || 1 >= base ) {
37 return "";
38 }
39 std::string res;
40 do {
41 std::div_t quotient = std::div(num, base);
42 res.insert( res.begin(), aspec[ quotient.rem ] ); // safe: base <= alphabet.length()
43 num = quotient.quot;
44 } while ( 0 != num );
45
46 const char s0 = aspec[0];
47 for(unsigned int i=res.length(); i<min_width; ++i) {
48 res.insert(res.begin(), s0);
49 }
50 return res;
51}
52
53std::string jau::codec::base::encode(int64_t num, const alphabet& aspec, const unsigned int min_width) noexcept {
54 const int base = aspec.base();
55 if( 0 > num || 1 >= base ) {
56 return "";
57 }
58 std::string res;
59 do {
60 std::lldiv_t quotient = std::lldiv(num, (int64_t)base);
61 res.insert( res.begin(), aspec[ quotient.rem ] ); // safe: base <= alphabet.length()
62 num = quotient.quot;
63 } while ( 0 != num );
64
65 const char s0 = aspec[0];
66 for(unsigned int i=res.length(); i<min_width; ++i) {
67 res.insert(res.begin(), s0);
68 }
69 return res;
70}
71
72int64_t jau::codec::base::decode(const std::string_view& str, const alphabet& aspec) noexcept
73{
74 const int base = aspec.base();
75 if( 1 >= base ) {
76 return -1;
77 }
78 std::string::size_type str_len = str.length();
79 int64_t res = 0;
80 for (std::string::size_type pos = 0; pos < str_len; ++pos) {
81 const int cp = aspec.code_point( str[pos] );
82 if( 0 > cp ) {
83 return -1; // encoded value not found
84 }
85 res = res * base + static_cast<int64_t>(cp);
86 }
87 return res;
88}
89
90std::string jau::codec::base::encode64(const void* in_octets, size_t in_len, const alphabet& aspec) noexcept {
91 if( 64 != aspec.base() ) {
92 return "";
93 }
94 const char padding = aspec.padding64();
95 const uint8_t* in_bytes = (const uint8_t*)in_octets;
96
97 size_t out_len = ( in_len + 2 ) / 3 * 4; // estimate ..
98 std::string res;
99 res.reserve(out_len);
100
101 while( 0 < in_len && 0 < out_len ) {
102 // Note: Addition is basically a bitwise XOR, plus carry bit
103
104 // 1st symbol
105 res.push_back( aspec[ ( in_bytes[0] >> 2 ) & 0x3f ] ); // take in[0] 6 bits[7..2] -> symbol[5..0]
106 if( 0 == --in_len ) {
107 // len == 1 bytes
108 // 2nd symbol
109 res.push_back( aspec[ ( in_bytes[0] << 4 ) & 0x3f ] ); // take in[0] 2 bits[1..0] -> symbol[5..4]
110 if( 0 != padding ) {
111 res.push_back(padding);
112 res.push_back(padding);
113 }
114 break;
115 } else {
116 // len >= 2 bytes
117 // 2nd symbol
118 res.push_back( aspec[ ( ( in_bytes[0] << 4 ) + ( in_bytes[1] >> 4) ) & 0x3f ] ); // take ( in[0] 2 bits[1..0] -> symbol[5..4] ) + ( int[1] 4 bits[7..4] -> symbol[3..0] )
119 }
120 if( 0 == --in_len ) {
121 // len == 2 bytes
122 // 3rd symbol
123 res.push_back( aspec[ ( in_bytes[1] << 2 ) & 0x3f ] ); // take in[1] 4 bits[3..0] -> symbol[5..2]
124 if( 0 != padding ) {
125 res.push_back(padding);
126 }
127 break;
128 } else {
129 // len >= 3 bytes
130 // 3rd symbol
131 res.push_back( aspec[ ( ( in_bytes[1] << 2 ) + ( in_bytes[2] >> 6) ) & 0x3f ] ); // take ( in[1] 4 bits[3..0] -> symbol[5..2] ) + ( int[2] 2 bits[7..6] -> symbol[1..0] )
132 // 4th symbol
133 res.push_back( aspec[ in_bytes[2] & 0x3f ] ); // take in[2] 6 bits[5..0] -> symbol[5..0]
134 --in_len;
135 in_bytes+=3;
136 }
137 }
138 return res;
139}
140
141std::vector<uint8_t> jau::codec::base::decode64(const std::string_view& in_code, const alphabet& aspec) noexcept {
142 if( 64 != aspec.base() ) {
143 return std::vector<uint8_t>(); // Error
144 }
145 size_t in_len = in_code.length();
146 if( 0 == in_len ) {
147 return std::vector<uint8_t>(); // OK
148 }
149 const char padding = aspec.padding64();
150 const char* in_chars = in_code.data();
151
152 const size_t out_len = 3 * ( in_len / 4 ) + 2; // estimate w/ potentially up to 2 additional bytes
153 std::vector<uint8_t> res;
154 res.reserve(out_len);
155
156 while( in_len >= 2 ) {
157 const int cp0 = aspec.code_point( in_chars[0] );
158 const int cp1 = aspec.code_point( in_chars[1] );
159 if( 0 > cp0 || 0 > cp1 ) {
160 break;
161 }
162 res.push_back( cp0 << 2 | cp1 >> 4 );
163 if( 2 == in_len ) {
164 if( 0 == padding ) {
165 in_len = 0; // accept w/o padding
166 }
167 break;
168 }
169 if( padding == in_chars[2] ) {
170 if( 4 != in_len ) {
171 break;
172 }
173 if( padding != in_chars[3] ) {
174 break;
175 }
176 } else {
177 const int cp2 = aspec.code_point( in_chars[2] );
178 if( 0 > cp2 ) {
179 break;
180 }
181 res.push_back( ( ( cp1 << 4 ) & 0xf0 ) | ( cp2 >> 2 ) );
182 if( 3 == in_len ) {
183 if( 0 == padding ) {
184 in_len = 0; // accept w/o padding
185 }
186 break;
187 }
188 if( padding == in_chars[3] ) {
189 if( 4 != in_len ) {
190 break;
191 }
192 } else {
193 const int cp3 = aspec.code_point( in_chars[3] );
194 if( 0 > cp3 ) {
195 break;
196 }
197 res.push_back( ( ( cp2 << 6 ) & 0xc0 ) | cp3 );
198 }
199 }
200 in_chars += 4;
201 in_len -= 4;
202 }
203
204 if( 0 != in_len ) {
205 DBG_PRINT("in_len %zu/%zu at '%s', out_len %zu/%zu\n", (in_code.length()-in_len), in_code.length(), std::string(in_code).c_str(), res.size(), out_len);
206 return std::vector<uint8_t>(); // decoding error
207 } else {
208 return res;
209 }
210}
211
212size_t jau::codec::base::insert_lf(std::string& str, const size_t period) noexcept {
213 size_t count = 0;
214 for(size_t i = period; i < str.length(); i += period + 1) {
216 PRAGMA_DISABLE_WARNING_RESTRICT
217 str.insert(i, "\n"); // bogus gcc 12.2 'may overlap'
219 ++count;
220 }
221 return count;
222}
223
224size_t jau::codec::base::remove_lf(std::string& str) noexcept {
225 size_t count = 0;
226 auto it = jau::remove_if( str.begin(), str.end(), [&count](char c){
227 if( c == 0x0a ) {
228 ++count;
229 return true;
230 } else {
231 return false;
232 }
233 });
234 str.erase(it, str.end()); // erase empty tail
235 return count;
236}
Base Alphabet Specification providing the alphabet for encode() and decode().
Definition: base_codec.hpp:56
#define DBG_PRINT(...)
Use for environment-variable environment::DEBUG conditional debug messages, prefix '[elapsed_time] De...
Definition: debug.hpp:52
ForwardIt remove_if(ForwardIt first, ForwardIt last, UnaryPredicate p)
Identical to C++20 std::remove_if() of algorithm
size_t insert_lf(std::string &str, const size_t period) noexcept
Inserts a line feed (LF) character \n (ASCII 0x0a) after every period of characters.
Definition: base_codec.cpp:212
std::string encode(int num, const alphabet &aspec, const unsigned int min_width=0) noexcept
Encodes a given positive decimal number to a symbolic string representing a given alphabet and its ba...
Definition: base_codec.cpp:33
std::string encode64(const void *in_octets, size_t in_len, const alphabet &aspec) noexcept
Encodes given octets using the given alphabet and fixed base 64 encoding according to base64 RFC 4648...
Definition: base_codec.cpp:90
std::vector< uint8_t > decode64(const std::string_view &str, const alphabet &aspec) noexcept
Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets accordi...
Definition: base_codec.cpp:141
size_t remove_lf(std::string &str) noexcept
Removes line feed character from str.
Definition: base_codec.cpp:224
int64_t decode(const std::string_view &str, const alphabet &aspec) noexcept
Decodes a given symbolic string representing a given alphabet and its base to a positive decimal numb...
Definition: base_codec.cpp:72
#define PRAGMA_DISABLE_WARNING_PUSH
Definition: cpp_pragma.hpp:76
#define PRAGMA_DISABLE_WARNING_POP
Definition: cpp_pragma.hpp:77
Base codecs, i.e.
Definition: base_codec.hpp:37
__pack(...): Produces MSVC, clang and gcc compatible lead-in and -out macros.
Definition: backtrace.hpp:32