jaulib v1.3.0
Jau Support Library (C++, Java, ..)
BaseCodec.java
Go to the documentation of this file.
1/**
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2022 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24package org.jau.util;
25
26import java.nio.ByteBuffer;
27
28/**
29 * Base codecs, i.e. changing the decimal or binary values' base for a different representation.
30 */
31public class BaseCodec {
32
33 /**
34 * Base Alphabet Specification providing the alphabet for encode() and decode().
35 *
36 * @see {@link BaseCodec#encode(int, int, Alphabet, int)}
37 * @see {@link BaseCodec#encode(long, int, Alphabet, int)}
38 * @see {@link BaseCodec#decode(String, int, Alphabet)}
39 */
40 public static abstract class Alphabet {
41 private final String name_;
42 private final int base_;
43 private final String symbols_;
44 private final char padding64_;
45
46 protected Alphabet(final String name, final int base, final String symbols, final char passing64) {
47 this.name_ = name;
48 this.base_ = base;
49 this.symbols_ = symbols;
50 this.padding64_ = passing64;
51 }
52
53 /** Human readable name for this alphabet instance. */
54 public final String name() { return name_; }
55
56 /** The fixed base used for this alphabet. */
57 public final int base() { return base_; }
58
59 /** The string of symbols of this alphabet. */
60 public final String symbols() { return symbols_; }
61
62 /** Padding symbol for base <= 64 and block encoding only. May return zero for no padding. */
63 public final char padding64() { return padding64_; }
64
65 /** Returns the code-point of the given character or -1 if not element of this alphabet. */
66 public abstract int code_point(final char c);
67
68 /** Retrieve the character at given code-point of this alphabet. */
69 public final char charAt( final int cp ) { return symbols().charAt(cp); }
70
71 @Override
72 public boolean equals(final Object o) {
73 if( this == o ) {
74 return true;
75 }
76 if( o instanceof Alphabet ) {
77 final Alphabet oa = (Alphabet)o;
78 return base() == base() && name().equals(oa.name()) && symbols().equals(oa.symbols());
79 }
80 return false;
81 }
82
83 @Override
84 public String toString() {
85 return "Alphabet["+name_+", base <= "+base_+"]";
86 }
87 };
88
89 /**
90 * Safe canonical `base64` alphabet, without ASCII code-point sorting order.
91 *
92 * Representing the canonical `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html) *Base 64 Alphabet*
93 * including its code-point order `A` < `a` < `0` < `/`.
94 *
95 * - Value: `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/`
96 * - Padding: `=`
97 *
98 * ### Properties
99 * - Base 64
100 * - 7-bit ASCII
101 * - Code page 437 compatible
102 * - [`base64` alphabet](https://www.rfc-editor.org/rfc/rfc4648.html), identical order
103 * - Excludes quoting chars: "'$ and space
104 * - Not supporting ASCII code-point sorting.
105 * - Order: `A` < `a` < `0` < `/`
106 */
107 public static class Base64Alphabet extends Alphabet {
108 private static final String data = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
109
110 @Override
111 public int code_point(final char c) {
112 if ('A' <= c && c <= 'Z') {
113 return c - 'A';
114 } else if ('a' <= c && c <= 'z') {
115 return c - 'a' + 26;
116 } else if ('0' <= c && c <= '9') {
117 return c - '0' + 52;
118 } else if ('+' == c) {
119 return 62;
120 } else if ('/' == c) {
121 return 63;
122 } else {
123 return -1;
124 }
125 }
126
127 public Base64Alphabet() {
128 super("base64", 64, data, '=');
129 }
130 }
131
132 /**
133 * Safe canonical `base64url` alphabet, without ASCII code-point sorting order.
134 *
135 * Representing the canonical `base64url` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html) `URL and Filename safe` *Base 64 Alphabet*
136 * including its code-point order `A` < `a` < `0` < `_`.
137 *
138 * - Value: `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_`
139 * - Padding: `=`
140 *
141 * ### Properties
142 * - Base 64
143 * - 7-bit ASCII
144 * - Code page 437 compatible
145 * - [`base64url` alphabet](https://www.rfc-editor.org/rfc/rfc4648.html), identical order
146 * - Safe URL and filename use
147 * - Excludes forbidden [v]fat chars: `<>:"/\|?*`
148 * - Excludes quoting chars: "'$ and space
149 * - Not supporting ASCII code-point sorting.
150 * - Order: `A` < `a` < `0` < `_`
151 */
152 public static class Base64urlAlphabet extends Alphabet {
153 private static final String data = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
154
155 @Override
156 public int code_point(final char c) {
157 if ('A' <= c && c <= 'Z') {
158 return c - 'A';
159 } else if ('a' <= c && c <= 'z') {
160 return c - 'a' + 26;
161 } else if ('0' <= c && c <= '9') {
162 return c - '0' + 52;
163 } else if ('-' == c) {
164 return 62;
165 } else if ('_' == c) {
166 return 63;
167 } else {
168 return -1;
169 }
170 }
171
173 super("base64url", 64, data, '=');
174 }
175 }
176
177 /**
178 * Safe natural base 64 alphabet, both without ASCII code-point sorting order.
179 *
180 * Order is considered a natural extension of decimal symbols, i.e. `0` < `a` < `A` < `_`.
181 *
182 * - Value: `0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_`
183 * - Padding: `=`
184 *
185 * ### Properties
186 * - Base 64
187 * - 7-bit ASCII
188 * - Code page 437 compatible
189 * - [`base64url` alphabet](https://www.rfc-editor.org/rfc/rfc4648.html), but different order
190 * - Safe URL and filename use
191 * - Excludes forbidden [v]fat chars: `<>:"/\|?*`
192 * - Excludes quoting chars: "'$ and space
193 * - Not supporting ASCII code-point sorting.
194 * - Order: `0` < `a` < `A` < `_`
195 */
196 public static class Natural64Alphabet extends Alphabet {
197 private static final String data = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_";
198
199 @Override
200 public int code_point(final char c) {
201 if ('0' <= c && c <= '9') {
202 return c - '0';
203 } else if ('a' <= c && c <= 'z') {
204 return c - 'a' + 10;
205 } else if ('A' <= c && c <= 'Z') {
206 return c - 'A' + 36;
207 } else if ('-' == c) {
208 return 62;
209 } else if ('_' == c) {
210 return 63;
211 } else {
212 return -1;
213 }
214 }
215
217 super("natural64", 64, data, '=');
218 }
219 }
220
221 /**
222 * Natural base 86 alphabet, without ASCII code-point sorting order.
223 *
224 * Order is considered a natural extension of decimal symbols, i.e. `0` < `a` < `A` < `_` < `~`
225 *
226 * - Value: `0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_!#%&()+,/:;<=>?@[]^{}~`
227 * - Padding: none
228 *
229 * ### Properties
230 * - Base 86
231 * - 7-bit ASCII
232 * - Code page 437 compatible
233 * - Excludes quoting chars: "'$ and space
234 * - Not supporting ASCII code-point sorting.
235 * - Order: `0` < `a` < `A` < `_` < `~`
236 *
237 * @see encodeBase()
238 * @see decodeBase()
239 */
240 public static class Natural86Alphabet extends Alphabet {
241 private static final String data = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_!#%&()+,/:;<=>?@[]^{}~";
242
243 @Override
244 public int code_point(final char c) {
245 if ('0' <= c && c <= '9') {
246 return c - '0';
247 } else if ('a' <= c && c <= 'z') {
248 return c - 'a' + 10;
249 } else if ('A' <= c && c <= 'Z') {
250 return c - 'A' + 36;
251 } else {
252 switch( c ) {
253 case '-': return 62;
254 case '_': return 63;
255 case '!': return 64;
256 case '#': return 65;
257 case '%': return 66;
258 case '&': return 67;
259 case '(': return 68;
260 case ')': return 69;
261 case '+': return 70;
262 case ',': return 71;
263 case '/': return 72;
264 case ':': return 73;
265 case ';': return 74;
266 case '<': return 75;
267 case '=': return 76;
268 case '>': return 77;
269 case '?': return 78;
270 case '@': return 79;
271 case '[': return 80;
272 case ']': return 81;
273 case '^': return 82;
274 case '{': return 83;
275 case '}': return 84;
276 case '~': return 85;
277 default: return -1;
278 }
279 }
280 }
281
283 super("natural86", 86, data, (char)0);
284 }
285 }
286
287 /**
288 * Safe base 38 alphabet with ASCII code-point sorting order.
289 *
290 * - Value: `-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_`
291 * - Padding: `=`
292 *
293 * ### Properties
294 * - Base 38
295 * - 7-bit ASCII
296 * - Code page 437 compatible
297 * - Safe URL and filename use
298 * - Excludes forbidden [v]fat chars: `<>:"/\|?*`
299 * - Only using upper-case letters for unique filename under vfat
300 * - Excludes quoting chars: "'$ and space
301 * - Supporting ASCII code-point sorting.
302 * - Order: `-` < `0` < `A` < `a` < `z`
303 *
304 * @see encodeBase()
305 * @see decodeBase()
306 */
307 public static class Ascii38Alphabet extends Alphabet {
308 private static final String data = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_";
309
310 @Override
311 public int code_point(final char c) {
312 if ('0' <= c && c <= '9') {
313 return c - '0' + 1;
314 } else if ('A' <= c && c <= 'Z') {
315 return c - 'A' + 11;
316 } else if ('-' == c) {
317 return 0;
318 } else if ('_' == c) {
319 return 37;
320 } else {
321 return -1;
322 }
323 }
324
326 super("ascii38", 38, data, '=');
327 }
328 }
329
330 /**
331 * Safe base 64 alphabet with ASCII code-point sorting order.
332 *
333 * - Value: `-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz`
334 * - Padding: `=`
335 *
336 * ### Properties
337 * - Base 64
338 * - 7-bit ASCII
339 * - Code page 437 compatible
340 * - [`base64url` alphabet](https://www.rfc-editor.org/rfc/rfc4648.html), but different order
341 * - Safe URL and filename use
342 * - Excludes forbidden [v]fat chars: `<>:"/\|?*`
343 * - Excludes quoting chars: "'$ and space
344 * - Supporting ASCII code-point sorting.
345 * - Order: `-` < `0` < `A` < `a` < `z`
346 *
347 * @see encodeBase()
348 * @see decodeBase()
349 */
350 public static class Ascii64Alphabet extends Alphabet {
351 private static final String data = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
352
353 @Override
354 public int code_point(final char c) {
355 if ('0' <= c && c <= '9') {
356 return c - '0' + 1;
357 } else if ('A' <= c && c <= 'Z') {
358 return c - 'A' + 11;
359 } else if ('a' <= c && c <= 'z') {
360 return c - 'a' + 38;
361 } else if ('-' == c) {
362 return 0;
363 } else if ('_' == c) {
364 return 37;
365 } else {
366 return -1;
367 }
368 }
369
371 super("ascii64", 64, data, '=');
372 }
373 }
374
375 /**
376 * Base 86 alphabet with ASCII code-point sorting order.
377 *
378 * - Value: `!#%&()+,-/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{}~`
379 * - Padding: None
380 *
381 * ### Properties
382 * - Base 86
383 * - 7-bit ASCII
384 * - Code page 437 compatible
385 * - Excludes quoting chars: "'$ and space
386 * - Supporting ASCII code-point sorting.
387 * - Order: `!` < `0` < `:` < `A` < `[` < `a` < `{` < `~`
388 *
389 * @see encodeBase()
390 * @see decodeBase()
391 */
392 public static class Ascii86Alphabet extends Alphabet {
393 private static final String data = "!#%&()+,-/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{}~";
394
395 @Override
396 public int code_point(final char c) {
397 if ('0' <= c && c <= '9') {
398 return c - '0' + 10;
399 } else if ('A' <= c && c <= 'Z') {
400 return c - 'A' + 27;
401 } else if ('a' <= c && c <= 'z') {
402 return c - 'a' + 57;
403 } else {
404 switch( c ) {
405 case '!': return 0;
406 case '#': return 1;
407 case '%': return 2;
408 case '&': return 3;
409 case '(': return 4;
410 case ')': return 5;
411 case '+': return 6;
412 case ',': return 7;
413 case '-': return 8;
414 case '/': return 9;
415
416 case ':': return 20;
417 case ';': return 21;
418 case '<': return 22;
419 case '=': return 23;
420 case '>': return 24;
421 case '?': return 25;
422 case '@': return 26;
423
424 case '[': return 53;
425 case ']': return 54;
426 case '^': return 55;
427 case '_': return 56;
428
429 case '{': return 83;
430 case '}': return 84;
431 case '~': return 85;
432 default: return -1;
433 }
434 }
435 }
436
438 super("ascii86", 86, data, (char)0);
439 }
440 }
441
442 /**
443 * Encodes a given positive decimal number to a symbolic string representing a given alphabet and its base.
444 *
445 * Besides using a custom alphabet, the following build-in alphabets are provided
446 * - {@link BaseCodec.Base64Alphabet}
447 * - {@link BaseCodec.Base64urlAlphabet}
448 * - {@link BaseCodec.Natural86Alphabet}
449 * - {@link BaseCodec.Ascii64Alphabet}
450 * - {@link BaseCodec.Ascii86Alphabet}
451 *
452 * @param num a positive decimal number
453 * @param aspec the used alphabet specification
454 * @param min_width minimum width of the encoded string, encoded zero is used for padding
455 * @return the encoded string or an empty string if base exceeds Alphabet.max_base() or invalid arguments
456 *
457 * @see {@link BaseCodec#encode(long, int, Alphabet, int)}
458 * @see {@link BaseCodec#decode(String, int, Alphabet)}
459 */
460 public static String encode(int num, final Alphabet aspec, final int min_width) {
461 final int base = aspec.base();
462 if( 0 > num || 1 >= base ) {
463 return "";
464 }
465 final StringBuilder res = new StringBuilder();
466 do {
467 res.insert( 0, aspec.charAt( num % base ) ); // safe: base <= alphabet.length()
468 num /= base;
469 } while ( 0 != num );
470
471 final char s0 = aspec.charAt(0);
472 for(int i=res.length(); i<min_width; ++i) {
473 res.insert(0, s0);
474 }
475 return res.toString();
476 }
477
478 /**
479 * Encodes a given positive decimal number to a symbolic string representing given alphabet and its base.
480 *
481 * Besides using a custom alphabet, the following build-in alphabets are provided
482 * - {@link BaseCodec.Base64Alphabet}
483 * - {@link BaseCodec.Base64urlAlphabet}
484 * - {@link BaseCodec.Natural86Alphabet}
485 * - {@link BaseCodec.Ascii64Alphabet}
486 * - {@link BaseCodec.Ascii86Alphabet}
487 *
488 * @param num a positive decimal number
489 * @param aspec the used alphabet specification
490 * @param min_width minimum width of the encoded string, encoded zero is used for padding
491 * @return the encoded string or an empty string if base exceeds Alphabet.max_base() or invalid arguments
492 *
493 * @see {@link BaseCodec#encode(int, int, Alphabet, int)}
494 * @see {@link BaseCodec#decode(String, int, Alphabet)}
495 */
496 public static String encode(long num, final Alphabet aspec, final int min_width) {
497 final long base = aspec.base();
498 if( 0 > num || 1 >= base ) {
499 return "";
500 }
501 final StringBuilder res = new StringBuilder();
502 do {
503 res.insert( 0, aspec.charAt( (int)( num % base ) ) ); // safe: base <= alphabet.length()
504 num /= base;
505 } while ( 0 != num );
506
507 final char s0 = aspec.charAt(0);
508 for(int i=res.length(); i<min_width; ++i) {
509 res.insert(0, s0);
510 }
511 return res.toString();
512 }
513
514 /**
515 * Encodes a given positive decimal number to a symbolic string representing a given alphabet and its base.
516 *
517 * Besides using a custom alphabet, the following build-in alphabets are provided
518 * - {@link BaseCodec.Base64Alphabet}
519 * - {@link BaseCodec.Base64urlAlphabet}
520 * - {@link BaseCodec.Natural86Alphabet}
521 * - {@link BaseCodec.Ascii64Alphabet}
522 * - {@link BaseCodec.Ascii86Alphabet}
523 *
524 * @param num a positive decimal number
525 * @param aspec the used alphabet specification
526 * @return the encoded string or an empty string if base exceeds Alphabet.max_base() or invalid arguments
527 *
528 * @see {@link BaseCodec#encode(int, int, Alphabet, int)}
529 * @see {@link BaseCodec#decode(String, int, Alphabet)}
530 */
531 public static String encode(final int num, final Alphabet aspec) {
532 return encode(num, aspec, 0 /* min_width */);
533 }
534
535 /**
536 * Encodes a given positive decimal number to a symbolic string representing a given alphabet and its base.
537 *
538 * Besides using a custom alphabet, the following build-in alphabets are provided
539 * - {@link BaseCodec.Base64Alphabet}
540 * - {@link BaseCodec.Base64urlAlphabet}
541 * - {@link BaseCodec.Natural86Alphabet}
542 * - {@link BaseCodec.Ascii64Alphabet}
543 * - {@link BaseCodec.Ascii86Alphabet}
544 *
545 * @param num a positive decimal number
546 * @param aspec the used alphabet specification
547 * @return the encoded string or an empty string if base exceeds Alphabet.max_base() or invalid arguments
548 *
549 * @see {@link BaseCodec#encode(long, int, Alphabet, int)}
550 * @see {@link BaseCodec#decode(String, int, Alphabet)}
551 */
552 public static String encode(final long num, final Alphabet aspec) {
553 return encode(num, aspec, 0 /* min_width */);
554 }
555
556 /**
557 * Decodes a given symbolic string representing a given alphabet and its base to a positive decimal number.
558 *
559 * Besides using a custom alphabet, the following build-in alphabets are provided
560 * - {@link BaseCodec.Base64Alphabet}
561 * - {@link BaseCodec.Base64urlAlphabet}
562 * - {@link BaseCodec.Natural86Alphabet}
563 * - {@link BaseCodec.Ascii64Alphabet}
564 * - {@link BaseCodec.Ascii86Alphabet}
565 *
566 * @param str an encoded string
567 * @param aspec the used alphabet specification
568 * @return the decoded radix decimal value or -1 if base exceeds Alphabet.max_base(), unknown code-point or invalid arguments
569 *
570 * @see {@link BaseCodec#encode(int, int, Alphabet, int)}
571 * @see {@link BaseCodec#encode(long, int, Alphabet, int)}
572 */
573 public static long decode(final String str, final Alphabet aspec) {
574 final int base = aspec.base();
575 if( 1 >= base ) {
576 return -1;
577 }
578 final int str_len = str.length();
579 long res = 0;
580 for (int i = 0; i < str_len; ++i) {
581 final int d = aspec.code_point( str.charAt(i) );
582 if( 0 > d ) {
583 return -1; // encoded value not found
584 }
585 res = res * base + d;
586 }
587 return res;
588 }
589
590 private static int to_int(final byte b) { return b & 0xff; }
591
592 /**
593 * Encodes given octets using the given alphabet and fixed base 64 encoding
594 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html).
595 *
596 * An error only occurs if in_len > 0 and resulting encoded string is empty.
597 *
598 * @param in_octets source byte array
599 * @param in_pos index to octets start
600 * @param in_len length of octets in bytes
601 * @param aspec the used base 64 alphabet specification
602 * @return the encoded string, empty if base exceeds alphabet::max_base() or invalid arguments
603 */
604 public static StringBuilder encode64(final byte[] in_octets, int in_pos, int in_len, final Alphabet aspec) {
605 if( 64 != aspec.base() || in_pos + in_len > in_octets.length ) {
606 return new StringBuilder(0);
607 }
608 final char padding = aspec.padding64();
609
610 final int out_len = ( in_len + 2 ) / 3 * 4; // estimate ..
611 final StringBuilder res = new StringBuilder(out_len);
612
613 while( 0 < in_len && 0 < out_len ) {
614 // Note: Addition is basically a bitwise XOR, plus carry bit
615
616 // 1st symbol
617 res.append( aspec.charAt( ( to_int(in_octets[in_pos+0]) >> 2 ) & 0x3f ) ); // take in[0] 6 bits[7..2] -> symbol[5..0]
618 if( 0 == --in_len ) {
619 // len == 1 bytes
620 // 2nd symbol
621 res.append( aspec.charAt( ( to_int(in_octets[in_pos+0]) << 4 ) & 0x3f ) ); // take in[0] 2 bits[1..0] -> symbol[5..4]
622 if( 0 != padding ) {
623 res.append(padding);
624 res.append(padding);
625 }
626 break;
627 } else {
628 // len >= 2 bytes
629 // 2nd symbol
630 res.append( aspec.charAt( ( ( to_int(in_octets[in_pos+0]) << 4 ) + ( to_int(in_octets[in_pos+1]) >> 4) ) & 0x3f ) ); // take ( in[0] 2 bits[1..0] -> symbol[5..4] ) + ( int[1] 4 bits[7..4] -> symbol[3..0] )
631 }
632 if( 0 == --in_len ) {
633 // len == 2 bytes
634 // 3rd symbol
635 res.append( aspec.charAt( ( to_int(in_octets[in_pos+1]) << 2 ) & 0x3f ) ); // take in[1] 4 bits[3..0] -> symbol[5..2]
636 if( 0 != padding ) {
637 res.append(padding);
638 }
639 break;
640 } else {
641 // len >= 3 bytes
642 // 3rd symbol
643 res.append( aspec.charAt( ( ( to_int(in_octets[in_pos+1]) << 2 ) + ( to_int(in_octets[in_pos+2]) >> 6) ) & 0x3f ) ); // take ( in[1] 4 bits[3..0] -> symbol[5..2] ) + ( int[2] 2 bits[7..6] -> symbol[1..0] )
644 // 4th symbol
645 res.append( aspec.charAt( to_int(in_octets[in_pos+2]) & 0x3f ) ); // take in[2] 6 bits[5..0] -> symbol[5..0]
646 --in_len;
647 in_pos+=3;
648 }
649 }
650 return res;
651 }
652
653 /**
654 * Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets
655 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html).
656 *
657 * An error only occurs if the encoded string length > 0 and resulting decoded octets size is empty.
658 *
659 * @param in_code encoded string
660 * @param aspec the used base 64 alphabet specification
661 * @return the decoded octets, empty if base exceeds alphabet::max_base(), unknown code-point or invalid arguments
662 */
663 public static ByteBuffer decode64(final String in_code, final Alphabet aspec) {
664 if( 64 != aspec.base() ) {
665 return ByteBuffer.allocate(0); // Error
666 }
667 int in_len = in_code.length();
668 if( in_len == 0 ) {
669 return ByteBuffer.allocate(0); // OK
670 }
671 final char padding = aspec.padding64();
672
673 final int out_len = 3 * ( in_len / 4 ) + 2; // estimate w/ potentially up to 2 additional bytes
674 final ByteBuffer res = ByteBuffer.allocate(out_len);
675 int in_pos = 0;
676
677 while( in_len >= 2 ) {
678 final int cp0 = aspec.code_point( in_code.charAt( in_pos + 0 ) );
679 final int cp1 = aspec.code_point( in_code.charAt( in_pos + 1 ) );
680 if( 0 > cp0 || 0 > cp1 ) {
681 break;
682 }
683 res.put( (byte)(cp0 << 2 | cp1 >> 4) );
684 if( 2 == in_len ) {
685 if( 0 == padding ) {
686 in_len = 0; // accept w/o padding
687 }
688 break;
689 }
690 if( padding == in_code.charAt( in_pos + 2 ) ) {
691 if( 4 != in_len ) {
692 break;
693 }
694 if( padding != in_code.charAt( in_pos + 3 ) ) {
695 break;
696 }
697 } else {
698 final int cp2 = aspec.code_point( in_code.charAt( in_pos + 2 ) );
699 if( 0 > cp2 ) {
700 break;
701 }
702 res.put( (byte)( ( ( cp1 << 4 ) & 0xf0 ) | ( cp2 >> 2 ) ) );
703 if( 3 == in_len ) {
704 if( 0 == padding ) {
705 in_len = 0; // accept w/o padding
706 }
707 break;
708 }
709 if( padding == in_code.charAt( in_pos + 3 ) ) {
710 if( 4 != in_len ) {
711 break;
712 }
713 } else {
714 final int cp3 = aspec.code_point( in_code.charAt( in_pos + 3 ) );
715 if( 0 > cp3 ) {
716 break;
717 }
718 res.put( (byte)( ( ( cp2 << 6 ) & 0xc0 ) | cp3 ) );
719 }
720 }
721 in_pos += 4;
722 in_len -= 4;
723 }
724
725 if( 0 != in_len ) {
726 // System.err.printf("in_len %d/%d at '%s', out_len %d/%d\n", in_pos, in_code.length(), in_code, res.position(), out_len);
727 res.clear(); // decoding error, position = 0, limit = capacity
728 }
729 res.flip(); // limit = position, position = 0, remaining() = limit - position
730 return res;
731 }
732
733 /**
734 * Inserts a line feed (LF) character `\n` (ASCII 0x0a) after every period of characters.
735 *
736 * @param str the input string of characters, which will be mutated.
737 * @param period period of characters after which one LF will be inserted.
738 * @return count of inserted LF characters
739 */
740 public static int insert_lf(final StringBuilder str, final int period) {
741 int count = 0;
742 for(int i = period; i < str.length(); i += period + 1) {
743 str.insert(i, "\n");
744 ++count;
745 }
746 return count;
747 }
748
749 /**
750 * Removes line feed character from str.
751 *
752 * @param str the input string of characters, which will be mutated.
753 * @return count of removed LF characters
754 */
755 public static int remove_lf(final StringBuilder str) {
756 int count = 0;
757 int pos = 0;
758 pos = str.indexOf("\n", 0);
759 while( 0 < pos && pos <= str.length() ) {
760 str.replace(pos, pos+1, "");
761 ++count;
762 pos = str.indexOf("\n", pos);
763 }
764 return count;
765 }
766
767 /**
768 * Encodes given octets using the given alphabet and fixed base 64 encoding
769 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html)
770 * and adds line-feeds every 64 characters as required for PEM.
771 *
772 * An error only occurs if in_len > 0 and resulting encoded string is empty.
773 *
774 * @param in_octets pointer to octets start
775 * @param in_len length of octets in bytes
776 * @param aspec the used alphabet specification
777 * @return the encoded string, empty if base exceeds alphabet::max_base() or invalid arguments
778 */
779 public static StringBuilder encode64_pem(final byte[] in_octets, final int in_pos, final int in_len, final Alphabet aspec) {
780 final StringBuilder e = encode64(in_octets, in_pos, in_len, aspec);
781 insert_lf(e, 64);
782 return e;
783 }
784
785 /**
786 * Encodes given octets using the given alphabet and fixed base 64 encoding
787 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html)
788 * and adds line-feeds every 76 characters as required for MIME.
789 *
790 * An error only occurs if in_len > 0 and resulting encoded string is empty.
791 *
792 * @param in_octets pointer to octets start
793 * @param in_len length of octets in bytes
794 * @param aspec the used base 64 alphabet specification
795 * @return the encoded string, empty if base exceeds alphabet::max_base() or invalid arguments
796 */
797 public static StringBuilder encode64_mime(final byte[] in_octets, final int in_pos, final int in_len, final Alphabet aspec) {
798 final StringBuilder e = encode64(in_octets, in_pos, in_len, aspec);
799 insert_lf(e, 76);
800 return e;
801 }
802
803 /**
804 * Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets
805 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html)
806 * and removes all linefeeds before decoding as required for PEM and MIME.
807 *
808 * An error only occurs if the encoded string length > 0 and resulting decoded octets size is empty.
809 *
810 * @param str and encoded string, will be copied
811 * @param aspec the used base 64 alphabet specification
812 * @return the decoded octets, empty if base exceeds alphabet::max_base(), unknown code-point or invalid arguments
813 */
814 public static ByteBuffer decode64_lf(final String str, final Alphabet aspec) {
815 final StringBuilder e = new StringBuilder(str); // costly copy
816 remove_lf(e);
817 return decode64(e.toString(), aspec);
818 }
819
820 /**
821 * Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets
822 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html)
823 * and removes all linefeeds before decoding as required for PEM and MIME.
824 *
825 * An error only occurs if the encoded string length > 0 and resulting decoded octets size is empty.
826 *
827 * @param str and encoded string, no copy, will be mutated
828 * @param aspec the used base 64 alphabet specification
829 * @return the decoded octets, empty if base exceeds alphabet::max_base(), unknown code-point or invalid arguments
830 */
831 public static ByteBuffer decode64_lf(final StringBuilder str, final Alphabet aspec) {
832 remove_lf(str);
833 return decode64(str.toString(), aspec);
834 }
835
836}
Base Alphabet Specification providing the alphabet for encode() and decode().
Definition: BaseCodec.java:40
final String name()
Human readable name for this alphabet instance.
Definition: BaseCodec.java:54
abstract int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
final char charAt(final int cp)
Retrieve the character at given code-point of this alphabet.
Definition: BaseCodec.java:69
final char padding64()
Padding symbol for base <= 64 and block encoding only.
Definition: BaseCodec.java:63
final int base()
The fixed base used for this alphabet.
Definition: BaseCodec.java:57
boolean equals(final Object o)
Definition: BaseCodec.java:72
final String symbols()
The string of symbols of this alphabet.
Definition: BaseCodec.java:60
Alphabet(final String name, final int base, final String symbols, final char passing64)
Definition: BaseCodec.java:46
Safe base 38 alphabet with ASCII code-point sorting order.
Definition: BaseCodec.java:307
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
Definition: BaseCodec.java:311
Safe base 64 alphabet with ASCII code-point sorting order.
Definition: BaseCodec.java:350
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
Definition: BaseCodec.java:354
Base 86 alphabet with ASCII code-point sorting order.
Definition: BaseCodec.java:392
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
Definition: BaseCodec.java:396
Safe canonical base64 alphabet, without ASCII code-point sorting order.
Definition: BaseCodec.java:107
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
Definition: BaseCodec.java:111
Safe canonical base64url alphabet, without ASCII code-point sorting order.
Definition: BaseCodec.java:152
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
Definition: BaseCodec.java:156
Safe natural base 64 alphabet, both without ASCII code-point sorting order.
Definition: BaseCodec.java:196
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
Definition: BaseCodec.java:200
Natural base 86 alphabet, without ASCII code-point sorting order.
Definition: BaseCodec.java:240
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
Definition: BaseCodec.java:244
Base codecs, i.e.
Definition: BaseCodec.java:31
static StringBuilder encode64_pem(final byte[] in_octets, final int in_pos, final int in_len, final Alphabet aspec)
Encodes given octets using the given alphabet and fixed base 64 encoding according to base64 RFC 4648...
Definition: BaseCodec.java:779
static StringBuilder encode64_mime(final byte[] in_octets, final int in_pos, final int in_len, final Alphabet aspec)
Encodes given octets using the given alphabet and fixed base 64 encoding according to base64 RFC 4648...
Definition: BaseCodec.java:797
static String encode(long num, final Alphabet aspec, final int min_width)
Encodes a given positive decimal number to a symbolic string representing given alphabet and its base...
Definition: BaseCodec.java:496
static ByteBuffer decode64_lf(final StringBuilder str, final Alphabet aspec)
Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets accordi...
Definition: BaseCodec.java:831
static String encode(final long num, final Alphabet aspec)
Encodes a given positive decimal number to a symbolic string representing a given alphabet and its ba...
Definition: BaseCodec.java:552
static long decode(final String str, final Alphabet aspec)
Decodes a given symbolic string representing a given alphabet and its base to a positive decimal numb...
Definition: BaseCodec.java:573
static String encode(int num, final Alphabet aspec, final int min_width)
Encodes a given positive decimal number to a symbolic string representing a given alphabet and its ba...
Definition: BaseCodec.java:460
static StringBuilder encode64(final byte[] in_octets, int in_pos, int in_len, final Alphabet aspec)
Encodes given octets using the given alphabet and fixed base 64 encoding according to base64 RFC 4648...
Definition: BaseCodec.java:604
static int remove_lf(final StringBuilder str)
Removes line feed character from str.
Definition: BaseCodec.java:755
static ByteBuffer decode64_lf(final String str, final Alphabet aspec)
Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets accordi...
Definition: BaseCodec.java:814
static int insert_lf(final StringBuilder str, final int period)
Inserts a line feed (LF) character \n (ASCII 0x0a) after every period of characters.
Definition: BaseCodec.java:740
static ByteBuffer decode64(final String in_code, final Alphabet aspec)
Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets accordi...
Definition: BaseCodec.java:663
static String encode(final int num, final Alphabet aspec)
Encodes a given positive decimal number to a symbolic string representing a given alphabet and its ba...
Definition: BaseCodec.java:531