jaulib v1.3.6
Jau Support Library (C++, Java, ..)
Loading...
Searching...
No Matches
BaseCodec.java
Go to the documentation of this file.
1/**
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2022 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24package org.jau.util;
25
26import java.nio.ByteBuffer;
27
28/**
29 * Base codecs, i.e. changing the decimal or binary values' base for a different representation.
30 */
31public class BaseCodec {
32
33 /**
34 * Base Alphabet Specification providing the alphabet for encode() and decode().
35 *
36 * @see {@link BaseCodec#encode(int, int, Alphabet, int)}
37 * @see {@link BaseCodec#encode(long, int, Alphabet, int)}
38 * @see {@link BaseCodec#decode(String, int, Alphabet)}
39 */
40 public static abstract class Alphabet {
41 private final String name_;
42 private final int base_;
43 private final String symbols_;
44 private final char padding64_;
45
46 protected Alphabet(final String name, final int base, final String symbols, final char passing64) {
47 this.name_ = name;
48 this.base_ = base;
49 this.symbols_ = symbols;
50 this.padding64_ = passing64;
51 }
52
53 /** Human readable name for this alphabet instance. */
54 public final String name() { return name_; }
55
56 /** The fixed base used for this alphabet. */
57 public final int base() { return base_; }
58
59 /** The string of symbols of this alphabet. */
60 public final String symbols() { return symbols_; }
61
62 /** Padding symbol for base <= 64 and block encoding only. May return zero for no padding. */
63 public final char padding64() { return padding64_; }
64
65 /** Returns the code-point of the given character or -1 if not element of this alphabet. */
66 public abstract int code_point(final char c);
67
68 /** Retrieve the character at given code-point of this alphabet. */
69 public final char charAt( final int cp ) { return symbols().charAt(cp); }
70
71 @Override
72 public boolean equals(final Object o) {
73 if( this == o ) {
74 return true;
75 }
76 if( o instanceof Alphabet ) {
77 final Alphabet oa = (Alphabet)o;
78 return base() == base() && name().equals(oa.name()) && symbols().equals(oa.symbols());
79 }
80 return false;
81 }
82
83 @Override
84 public String toString() {
85 return "Alphabet["+name_+", base <= "+base_+"]";
86 }
87 };
88
89 /**
90 * Safe canonical `base64` alphabet, without ASCII code-point sorting order.
91 *
92 * Representing the canonical `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html) *Base 64 Alphabet*
93 * including its code-point order `A` < `a` < `0` < `/`.
94 *
95 * - Value: `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/`
96 * - Padding: `=`
97 *
98 * ### Properties
99 * - Base 64
100 * - 7-bit ASCII
101 * - Code page 437 compatible
102 * - [`base64` alphabet](https://www.rfc-editor.org/rfc/rfc4648.html), identical order
103 * - Excludes quoting chars: "'$ and space
104 * - Not supporting ASCII code-point sorting.
105 * - Order: `A` < `a` < `0` < `/`
106 */
107 public static class Base64Alphabet extends Alphabet {
108 private static final String data = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
109
110 @Override
111 public int code_point(final char c) {
112 if ('A' <= c && c <= 'Z') {
113 return c - 'A';
114 } else if ('a' <= c && c <= 'z') {
115 return c - 'a' + 26;
116 } else if ('0' <= c && c <= '9') {
117 return c - '0' + 52;
118 } else if ('+' == c) {
119 return 62;
120 } else if ('/' == c) {
121 return 63;
122 } else {
123 return -1;
124 }
125 }
126
127 public Base64Alphabet() {
128 super("base64", 64, data, '=');
129 }
130 }
131
132 /**
133 * Safe canonical `base64url` alphabet, without ASCII code-point sorting order.
134 *
135 * Representing the canonical `base64url` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html) `URL and Filename safe` *Base 64 Alphabet*
136 * including its code-point order `A` < `a` < `0` < `_`.
137 *
138 * - Value: `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_`
139 * - Padding: `=`
140 *
141 * ### Properties
142 * - Base 64
143 * - 7-bit ASCII
144 * - Code page 437 compatible
145 * - [`base64url` alphabet](https://www.rfc-editor.org/rfc/rfc4648.html), identical order
146 * - Safe URL and filename use
147 * - Excludes forbidden [v]fat chars: `<>:"/\|?*`
148 * - Excludes quoting chars: "'$ and space
149 * - Not supporting ASCII code-point sorting.
150 * - Order: `A` < `a` < `0` < `_`
151 */
152 public static class Base64urlAlphabet extends Alphabet {
153 private static final String data = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
154
155 @Override
156 public int code_point(final char c) {
157 if ('A' <= c && c <= 'Z') {
158 return c - 'A';
159 } else if ('a' <= c && c <= 'z') {
160 return c - 'a' + 26;
161 } else if ('0' <= c && c <= '9') {
162 return c - '0' + 52;
163 } else if ('-' == c) {
164 return 62;
165 } else if ('_' == c) {
166 return 63;
167 } else {
168 return -1;
169 }
170 }
171
173 super("base64url", 64, data, '=');
174 }
175 }
176
177 /**
178 * Safe natural base 64 alphabet, both without ASCII code-point sorting order.
179 *
180 * Order is considered a natural extension of decimal symbols, i.e. `0` < `a` < `A` < `_`.
181 *
182 * - Value: `0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_`
183 * - Padding: `=`
184 *
185 * ### Properties
186 * - Base 64
187 * - 7-bit ASCII
188 * - Code page 437 compatible
189 * - [`base64url` alphabet](https://www.rfc-editor.org/rfc/rfc4648.html), but different order
190 * - Safe URL and filename use
191 * - Excludes forbidden [v]fat chars: `<>:"/\|?*`
192 * - Excludes quoting chars: "'$ and space
193 * - Not supporting ASCII code-point sorting.
194 * - Order: `0` < `a` < `A` < `_`
195 */
196 public static class Natural64Alphabet extends Alphabet {
197 private static final String data = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_";
198
199 @Override
200 public int code_point(final char c) {
201 if ('0' <= c && c <= '9') {
202 return c - '0';
203 } else if ('a' <= c && c <= 'z') {
204 return c - 'a' + 10;
205 } else if ('A' <= c && c <= 'Z') {
206 return c - 'A' + 36;
207 } else if ('-' == c) {
208 return 62;
209 } else if ('_' == c) {
210 return 63;
211 } else {
212 return -1;
213 }
214 }
215
217 super("natural64", 64, data, '=');
218 }
219 }
220
221 /**
222 * Natural base 86 alphabet, without ASCII code-point sorting order.
223 *
224 * Order is considered a natural extension of decimal symbols, i.e. `0` < `a` < `A` < `_` < `~`
225 *
226 * - Value: `0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_!#%&()+,/:;<=>?@[]^{}~`
227 * - Padding: none
228 *
229 * ### Properties
230 * - Base 86
231 * - 7-bit ASCII
232 * - Code page 437 compatible
233 * - Excludes quoting chars: "'$ and space
234 * - Not supporting ASCII code-point sorting.
235 * - Order: `0` < `a` < `A` < `_` < `~`
236 *
237 * @see encodeBase()
238 * @see decodeBase()
239 */
240 public static class Natural86Alphabet extends Alphabet {
241 private static final String data = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_!#%&()+,/:;<=>?@[]^{}~";
242
243 @Override
244 public int code_point(final char c) {
245 if ('0' <= c && c <= '9') {
246 return c - '0';
247 } else if ('a' <= c && c <= 'z') {
248 return c - 'a' + 10;
249 } else if ('A' <= c && c <= 'Z') {
250 return c - 'A' + 36;
251 } else {
252 switch( c ) {
253 case '-': return 62;
254 case '_': return 63;
255 case '!': return 64;
256 case '#': return 65;
257 case '%': return 66;
258 case '&': return 67;
259 case '(': return 68;
260 case ')': return 69;
261 case '+': return 70;
262 case ',': return 71;
263 case '/': return 72;
264 case ':': return 73;
265 case ';': return 74;
266 case '<': return 75;
267 case '=': return 76;
268 case '>': return 77;
269 case '?': return 78;
270 case '@': return 79;
271 case '[': return 80;
272 case ']': return 81;
273 case '^': return 82;
274 case '{': return 83;
275 case '}': return 84;
276 case '~': return 85;
277 default: return -1;
278 }
279 }
280 }
281
283 super("natural86", 86, data, (char)0);
284 }
285 }
286
287 /**
288 * Safe base 38 alphabet with ASCII code-point sorting order.
289 *
290 * - Value: `-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_`
291 * - Padding: `=`
292 *
293 * ### Properties
294 * - Base 38
295 * - 7-bit ASCII
296 * - Code page 437 compatible
297 * - Safe URL and filename use
298 * - Excludes forbidden [v]fat chars: `<>:"/\|?*`
299 * - Only using upper-case letters for unique filename under vfat
300 * - Excludes quoting chars: "'$ and space
301 * - Supporting ASCII code-point sorting.
302 * - Order: `-` < `0` < `A` < `a` < `z`
303 *
304 * @see encodeBase()
305 * @see decodeBase()
306 */
307 public static class Ascii38Alphabet extends Alphabet {
308 private static final String data = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_";
309
310 @Override
311 public int code_point(final char c) {
312 if ('0' <= c && c <= '9') {
313 return c - '0' + 1;
314 } else if ('A' <= c && c <= 'Z') {
315 return c - 'A' + 11;
316 } else if ('-' == c) {
317 return 0;
318 } else if ('_' == c) {
319 return 37;
320 } else {
321 return -1;
322 }
323 }
324
326 super("ascii38", 38, data, '=');
327 }
328 }
329
330 /**
331 * Safe base 64 alphabet with ASCII code-point sorting order.
332 *
333 * - Value: `-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz`
334 * - Padding: `=`
335 *
336 * ### Properties
337 * - Base 64
338 * - 7-bit ASCII
339 * - Code page 437 compatible
340 * - [`base64url` alphabet](https://www.rfc-editor.org/rfc/rfc4648.html), but different order
341 * - Safe URL and filename use
342 * - Excludes forbidden [v]fat chars: `<>:"/\|?*`
343 * - Excludes quoting chars: "'$ and space
344 * - Supporting ASCII code-point sorting.
345 * - Order: `-` < `0` < `A` < `a` < `z`
346 *
347 * @see encodeBase()
348 * @see decodeBase()
349 */
350 public static class Ascii64Alphabet extends Alphabet {
351 private static final String data = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
352
353 @Override
354 public int code_point(final char c) {
355 if ('0' <= c && c <= '9') {
356 return c - '0' + 1;
357 } else if ('A' <= c && c <= 'Z') {
358 return c - 'A' + 11;
359 } else if ('a' <= c && c <= 'z') {
360 return c - 'a' + 38;
361 } else if ('-' == c) {
362 return 0;
363 } else if ('_' == c) {
364 return 37;
365 } else {
366 return -1;
367 }
368 }
369
371 super("ascii64", 64, data, '=');
372 }
373 }
374
375 /**
376 * Base 86 alphabet with ASCII code-point sorting order.
377 *
378 * - Value: `!#%&()+,-/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{}~`
379 * - Padding: None
380 *
381 * ### Properties
382 * - Base 86
383 * - 7-bit ASCII
384 * - Code page 437 compatible
385 * - Excludes quoting chars: "'$ and space
386 * - Supporting ASCII code-point sorting.
387 * - Order: `!` < `0` < `:` < `A` < `[` < `a` < `{` < `~`
388 *
389 * @see encodeBase()
390 * @see decodeBase()
391 */
392 public static class Ascii86Alphabet extends Alphabet {
393 private static final String data = "!#%&()+,-/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{}~";
394
395 @Override
396 public int code_point(final char c) {
397 if ('0' <= c && c <= '9') {
398 return c - '0' + 10;
399 } else if ('A' <= c && c <= 'Z') {
400 return c - 'A' + 27;
401 } else if ('a' <= c && c <= 'z') {
402 return c - 'a' + 57;
403 } else {
404 switch( c ) {
405 case '!': return 0;
406 case '#': return 1;
407 case '%': return 2;
408 case '&': return 3;
409 case '(': return 4;
410 case ')': return 5;
411 case '+': return 6;
412 case ',': return 7;
413 case '-': return 8;
414 case '/': return 9;
415
416 case ':': return 20;
417 case ';': return 21;
418 case '<': return 22;
419 case '=': return 23;
420 case '>': return 24;
421 case '?': return 25;
422 case '@': return 26;
423
424 case '[': return 53;
425 case ']': return 54;
426 case '^': return 55;
427 case '_': return 56;
428
429 case '{': return 83;
430 case '}': return 84;
431 case '~': return 85;
432 default: return -1;
433 }
434 }
435 }
436
438 super("ascii86", 86, data, (char)0);
439 }
440 }
441
442 /**
443 * Encodes a given positive decimal number to a symbolic string representing a given alphabet and its base.
444 *
445 * Besides using a custom alphabet, the following build-in alphabets are provided
446 * - {@link BaseCodec.Base64Alphabet}
447 * - {@link BaseCodec.Base64urlAlphabet}
448 * - {@link BaseCodec.Natural86Alphabet}
449 * - {@link BaseCodec.Ascii64Alphabet}
450 * - {@link BaseCodec.Ascii86Alphabet}
451 *
452 * @param num a positive decimal number
453 * @param aspec the used alphabet specification
454 * @param min_width minimum width of the encoded string, encoded zero is used for padding
455 * @return the encoded string or an empty string if base exceeds Alphabet.max_base() or invalid arguments
456 *
457 * @see {@link BaseCodec#encode(long, int, Alphabet, int)}
458 * @see {@link BaseCodec#decode(String, int, Alphabet)}
459 */
460 public static String encode(int num, final Alphabet aspec, final int min_width) {
461 final int base = aspec.base();
462 if( 0 > num || 1 >= base ) {
463 return "";
464 }
465 final StringBuilder res = new StringBuilder();
466 do {
467 res.insert( 0, aspec.charAt( num % base ) ); // safe: base <= alphabet.length()
468 num /= base;
469 } while ( 0 != num );
470
471 final char s0 = aspec.charAt(0);
472 for(int i=res.length(); i<min_width; ++i) {
473 res.insert(0, s0);
474 }
475 return res.toString();
476 }
477
478 /**
479 * Encodes a given positive decimal number to a symbolic string representing given alphabet and its base.
480 *
481 * Besides using a custom alphabet, the following build-in alphabets are provided
482 * - {@link BaseCodec.Base64Alphabet}
483 * - {@link BaseCodec.Base64urlAlphabet}
484 * - {@link BaseCodec.Natural86Alphabet}
485 * - {@link BaseCodec.Ascii64Alphabet}
486 * - {@link BaseCodec.Ascii86Alphabet}
487 *
488 * @param num a positive decimal number
489 * @param aspec the used alphabet specification
490 * @param min_width minimum width of the encoded string, encoded zero is used for padding
491 * @return the encoded string or an empty string if base exceeds Alphabet.max_base() or invalid arguments
492 *
493 * @see {@link BaseCodec#encode(int, int, Alphabet, int)}
494 * @see {@link BaseCodec#decode(String, int, Alphabet)}
495 */
496 public static String encode(long num, final Alphabet aspec, final int min_width) {
497 final long base = aspec.base();
498 if( 0 > num || 1 >= base ) {
499 return "";
500 }
501 final StringBuilder res = new StringBuilder();
502 do {
503 res.insert( 0, aspec.charAt( (int)( num % base ) ) ); // safe: base <= alphabet.length()
504 num /= base;
505 } while ( 0 != num );
506
507 final char s0 = aspec.charAt(0);
508 for(int i=res.length(); i<min_width; ++i) {
509 res.insert(0, s0);
510 }
511 return res.toString();
512 }
513
514 /**
515 * Encodes a given positive decimal number to a symbolic string representing a given alphabet and its base.
516 *
517 * Besides using a custom alphabet, the following build-in alphabets are provided
518 * - {@link BaseCodec.Base64Alphabet}
519 * - {@link BaseCodec.Base64urlAlphabet}
520 * - {@link BaseCodec.Natural86Alphabet}
521 * - {@link BaseCodec.Ascii64Alphabet}
522 * - {@link BaseCodec.Ascii86Alphabet}
523 *
524 * @param num a positive decimal number
525 * @param aspec the used alphabet specification
526 * @return the encoded string or an empty string if base exceeds Alphabet.max_base() or invalid arguments
527 *
528 * @see {@link BaseCodec#encode(int, int, Alphabet, int)}
529 * @see {@link BaseCodec#decode(String, int, Alphabet)}
530 */
531 public static String encode(final int num, final Alphabet aspec) {
532 return encode(num, aspec, 0 /* min_width */);
533 }
534
535 /**
536 * Encodes a given positive decimal number to a symbolic string representing a given alphabet and its base.
537 *
538 * Besides using a custom alphabet, the following build-in alphabets are provided
539 * - {@link BaseCodec.Base64Alphabet}
540 * - {@link BaseCodec.Base64urlAlphabet}
541 * - {@link BaseCodec.Natural86Alphabet}
542 * - {@link BaseCodec.Ascii64Alphabet}
543 * - {@link BaseCodec.Ascii86Alphabet}
544 *
545 * @param num a positive decimal number
546 * @param aspec the used alphabet specification
547 * @return the encoded string or an empty string if base exceeds Alphabet.max_base() or invalid arguments
548 *
549 * @see {@link BaseCodec#encode(long, int, Alphabet, int)}
550 * @see {@link BaseCodec#decode(String, int, Alphabet)}
551 */
552 public static String encode(final long num, final Alphabet aspec) {
553 return encode(num, aspec, 0 /* min_width */);
554 }
555
556 /**
557 * Decodes a given symbolic string representing a given alphabet and its base to a positive decimal number.
558 *
559 * Besides using a custom alphabet, the following build-in alphabets are provided
560 * - {@link BaseCodec.Base64Alphabet}
561 * - {@link BaseCodec.Base64urlAlphabet}
562 * - {@link BaseCodec.Natural86Alphabet}
563 * - {@link BaseCodec.Ascii64Alphabet}
564 * - {@link BaseCodec.Ascii86Alphabet}
565 *
566 * @param str an encoded string
567 * @param aspec the used alphabet specification
568 * @return the decoded radix decimal value or -1 if base exceeds Alphabet.max_base(), unknown code-point or invalid arguments
569 *
570 * @see {@link BaseCodec#encode(int, int, Alphabet, int)}
571 * @see {@link BaseCodec#encode(long, int, Alphabet, int)}
572 */
573 public static long decode(final String str, final Alphabet aspec) {
574 final int base = aspec.base();
575 if( 1 >= base ) {
576 return -1;
577 }
578 final int str_len = str.length();
579 long res = 0;
580 for (int i = 0; i < str_len; ++i) {
581 final int d = aspec.code_point( str.charAt(i) );
582 if( 0 > d ) {
583 return -1; // encoded value not found
584 }
585 res = res * base + d;
586 }
587 return res;
588 }
589
590 private static int to_int(final byte b) { return b & 0xff; }
591
592 /**
593 * Encodes given octets using the given alphabet and fixed base 64 encoding
594 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html).
595 *
596 * An error only occurs if in_len > 0 and resulting encoded string is empty.
597 *
598 * @param in_octets source byte array
599 * @param in_pos index to octets start
600 * @param in_len length of octets in bytes
601 * @param aspec the used base 64 alphabet specification
602 * @return the encoded string, empty if base exceeds alphabet::max_base() or invalid arguments
603 */
604 public static StringBuilder encode64(final byte[] in_octets, int in_pos, int in_len, final Alphabet aspec) {
605 if( 64 != aspec.base() || in_pos + in_len > in_octets.length ) {
606 return new StringBuilder(0);
607 }
608 final char padding = aspec.padding64();
609
610 final int out_len = ( in_len + 2 ) / 3 * 4; // estimate ..
611 final StringBuilder res = new StringBuilder(out_len);
612
613 while( 0 < in_len && 0 < out_len ) {
614 // Note: Addition is basically a bitwise XOR, plus carry bit
615
616 // 1st symbol
617 res.append( aspec.charAt( ( to_int(in_octets[in_pos+0]) >> 2 ) & 0x3f ) ); // take in[0] 6 bits[7..2] -> symbol[5..0]
618 if( 0 == --in_len ) {
619 // len == 1 bytes
620 // 2nd symbol
621 res.append( aspec.charAt( ( to_int(in_octets[in_pos+0]) << 4 ) & 0x3f ) ); // take in[0] 2 bits[1..0] -> symbol[5..4]
622 if( 0 != padding ) {
623 res.append(padding);
624 res.append(padding);
625 }
626 break;
627 } else {
628 // len >= 2 bytes
629 // 2nd symbol
630 res.append( aspec.charAt( ( ( to_int(in_octets[in_pos+0]) << 4 ) + ( to_int(in_octets[in_pos+1]) >> 4) ) & 0x3f ) ); // take ( in[0] 2 bits[1..0] -> symbol[5..4] ) + ( int[1] 4 bits[7..4] -> symbol[3..0] )
631 }
632 if( 0 == --in_len ) {
633 // len == 2 bytes
634 // 3rd symbol
635 res.append( aspec.charAt( ( to_int(in_octets[in_pos+1]) << 2 ) & 0x3f ) ); // take in[1] 4 bits[3..0] -> symbol[5..2]
636 if( 0 != padding ) {
637 res.append(padding);
638 }
639 break;
640 } else {
641 // len >= 3 bytes
642 // 3rd symbol
643 res.append( aspec.charAt( ( ( to_int(in_octets[in_pos+1]) << 2 ) + ( to_int(in_octets[in_pos+2]) >> 6) ) & 0x3f ) ); // take ( in[1] 4 bits[3..0] -> symbol[5..2] ) + ( int[2] 2 bits[7..6] -> symbol[1..0] )
644 // 4th symbol
645 res.append( aspec.charAt( to_int(in_octets[in_pos+2]) & 0x3f ) ); // take in[2] 6 bits[5..0] -> symbol[5..0]
646 --in_len;
647 in_pos+=3;
648 }
649 }
650 return res;
651 }
652
653 /**
654 * Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets
655 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html).
656 *
657 * An error only occurs if the encoded string length > 0 and resulting decoded octets size is empty.
658 *
659 * @param in_code encoded string
660 * @param aspec the used base 64 alphabet specification
661 * @return the decoded octets, empty if base exceeds alphabet::max_base(), unknown code-point or invalid arguments
662 */
663 public static ByteBuffer decode64(final String in_code, final Alphabet aspec) {
664 if( 64 != aspec.base() ) {
665 return ByteBuffer.allocate(0); // Error
666 }
667 int in_len = in_code.length();
668 if( in_len == 0 ) {
669 return ByteBuffer.allocate(0); // OK
670 }
671 final char padding = aspec.padding64();
672
673 final int out_len = 3 * ( in_len / 4 ) + 2; // estimate w/ potentially up to 2 additional bytes
674 final ByteBuffer res = ByteBuffer.allocate(out_len);
675 int in_pos = 0;
676
677 while( in_len >= 2 ) {
678 final int cp0 = aspec.code_point( in_code.charAt( in_pos + 0 ) );
679 final int cp1 = aspec.code_point( in_code.charAt( in_pos + 1 ) );
680 if( 0 > cp0 || 0 > cp1 ) {
681 break;
682 }
683 res.put( (byte)(cp0 << 2 | cp1 >> 4) );
684 if( 2 == in_len ) {
685 if( 0 == padding ) {
686 in_len = 0; // accept w/o padding
687 }
688 break;
689 }
690 if( padding == in_code.charAt( in_pos + 2 ) ) {
691 if( 4 != in_len ) {
692 break;
693 }
694 if( padding != in_code.charAt( in_pos + 3 ) ) {
695 break;
696 }
697 } else {
698 final int cp2 = aspec.code_point( in_code.charAt( in_pos + 2 ) );
699 if( 0 > cp2 ) {
700 break;
701 }
702 res.put( (byte)( ( ( cp1 << 4 ) & 0xf0 ) | ( cp2 >> 2 ) ) );
703 if( 3 == in_len ) {
704 if( 0 == padding ) {
705 in_len = 0; // accept w/o padding
706 }
707 break;
708 }
709 if( padding == in_code.charAt( in_pos + 3 ) ) {
710 if( 4 != in_len ) {
711 break;
712 }
713 } else {
714 final int cp3 = aspec.code_point( in_code.charAt( in_pos + 3 ) );
715 if( 0 > cp3 ) {
716 break;
717 }
718 res.put( (byte)( ( ( cp2 << 6 ) & 0xc0 ) | cp3 ) );
719 }
720 }
721 in_pos += 4;
722 in_len -= 4;
723 }
724
725 if( 0 != in_len ) {
726 // System.err.printf("in_len %d/%d at '%s', out_len %d/%d\n", in_pos, in_code.length(), in_code, res.position(), out_len);
727 res.clear(); // decoding error, position = 0, limit = capacity
728 }
729 res.flip(); // limit = position, position = 0, remaining() = limit - position
730 return res;
731 }
732
733 /**
734 * Inserts a line feed (LF) character `\n` (ASCII 0x0a) after every period of characters.
735 *
736 * @param str the input string of characters, which will be mutated.
737 * @param period period of characters after which one LF will be inserted.
738 * @return count of inserted LF characters
739 */
740 public static int insert_lf(final StringBuilder str, final int period) {
741 int count = 0;
742 for(int i = period; i < str.length(); i += period + 1) {
743 str.insert(i, "\n");
744 ++count;
745 }
746 return count;
747 }
748
749 /**
750 * Removes line feed character from str.
751 *
752 * @param str the input string of characters, which will be mutated.
753 * @return count of removed LF characters
754 */
755 public static int remove_lf(final StringBuilder str) {
756 int count = 0;
757 int pos = 0;
758 pos = str.indexOf("\n", 0);
759 while( 0 < pos && pos <= str.length() ) {
760 str.replace(pos, pos+1, "");
761 ++count;
762 pos = str.indexOf("\n", pos);
763 }
764 return count;
765 }
766
767 /**
768 * Encodes given octets using the given alphabet and fixed base 64 encoding
769 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html)
770 * and adds line-feeds every 64 characters as required for PEM.
771 *
772 * An error only occurs if in_len > 0 and resulting encoded string is empty.
773 *
774 * @param in_octets pointer to octets start
775 * @param in_len length of octets in bytes
776 * @param aspec the used alphabet specification
777 * @return the encoded string, empty if base exceeds alphabet::max_base() or invalid arguments
778 */
779 public static StringBuilder encode64_pem(final byte[] in_octets, final int in_pos, final int in_len, final Alphabet aspec) {
780 final StringBuilder e = encode64(in_octets, in_pos, in_len, aspec);
781 insert_lf(e, 64);
782 return e;
783 }
784
785 /**
786 * Encodes given octets using the given alphabet and fixed base 64 encoding
787 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html)
788 * and adds line-feeds every 76 characters as required for MIME.
789 *
790 * An error only occurs if in_len > 0 and resulting encoded string is empty.
791 *
792 * @param in_octets pointer to octets start
793 * @param in_len length of octets in bytes
794 * @param aspec the used base 64 alphabet specification
795 * @return the encoded string, empty if base exceeds alphabet::max_base() or invalid arguments
796 */
797 public static StringBuilder encode64_mime(final byte[] in_octets, final int in_pos, final int in_len, final Alphabet aspec) {
798 final StringBuilder e = encode64(in_octets, in_pos, in_len, aspec);
799 insert_lf(e, 76);
800 return e;
801 }
802
803 /**
804 * Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets
805 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html)
806 * and removes all linefeeds before decoding as required for PEM and MIME.
807 *
808 * An error only occurs if the encoded string length > 0 and resulting decoded octets size is empty.
809 *
810 * @param str and encoded string, will be copied
811 * @param aspec the used base 64 alphabet specification
812 * @return the decoded octets, empty if base exceeds alphabet::max_base(), unknown code-point or invalid arguments
813 */
814 public static ByteBuffer decode64_lf(final String str, final Alphabet aspec) {
815 final StringBuilder e = new StringBuilder(str); // costly copy
816 remove_lf(e);
817 return decode64(e.toString(), aspec);
818 }
819
820 /**
821 * Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets
822 * according to `base64` [RFC 4648](https://www.rfc-editor.org/rfc/rfc4648.html)
823 * and removes all linefeeds before decoding as required for PEM and MIME.
824 *
825 * An error only occurs if the encoded string length > 0 and resulting decoded octets size is empty.
826 *
827 * @param str and encoded string, no copy, will be mutated
828 * @param aspec the used base 64 alphabet specification
829 * @return the decoded octets, empty if base exceeds alphabet::max_base(), unknown code-point or invalid arguments
830 */
831 public static ByteBuffer decode64_lf(final StringBuilder str, final Alphabet aspec) {
832 remove_lf(str);
833 return decode64(str.toString(), aspec);
834 }
835
836}
Base Alphabet Specification providing the alphabet for encode() and decode().
final String name()
Human readable name for this alphabet instance.
abstract int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
final char charAt(final int cp)
Retrieve the character at given code-point of this alphabet.
final char padding64()
Padding symbol for base <= 64 and block encoding only.
final int base()
The fixed base used for this alphabet.
boolean equals(final Object o)
final String symbols()
The string of symbols of this alphabet.
Alphabet(final String name, final int base, final String symbols, final char passing64)
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
int code_point(final char c)
Returns the code-point of the given character or -1 if not element of this alphabet.
Base codecs, i.e.
static StringBuilder encode64_pem(final byte[] in_octets, final int in_pos, final int in_len, final Alphabet aspec)
Encodes given octets using the given alphabet and fixed base 64 encoding according to base64 RFC 4648...
static StringBuilder encode64_mime(final byte[] in_octets, final int in_pos, final int in_len, final Alphabet aspec)
Encodes given octets using the given alphabet and fixed base 64 encoding according to base64 RFC 4648...
static String encode(long num, final Alphabet aspec, final int min_width)
Encodes a given positive decimal number to a symbolic string representing given alphabet and its base...
static ByteBuffer decode64_lf(final StringBuilder str, final Alphabet aspec)
Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets accordi...
static String encode(final long num, final Alphabet aspec)
Encodes a given positive decimal number to a symbolic string representing a given alphabet and its ba...
static long decode(final String str, final Alphabet aspec)
Decodes a given symbolic string representing a given alphabet and its base to a positive decimal numb...
static String encode(int num, final Alphabet aspec, final int min_width)
Encodes a given positive decimal number to a symbolic string representing a given alphabet and its ba...
static StringBuilder encode64(final byte[] in_octets, int in_pos, int in_len, final Alphabet aspec)
Encodes given octets using the given alphabet and fixed base 64 encoding according to base64 RFC 4648...
static int remove_lf(final StringBuilder str)
Removes line feed character from str.
static ByteBuffer decode64_lf(final String str, final Alphabet aspec)
Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets accordi...
static int insert_lf(final StringBuilder str, final int period)
Inserts a line feed (LF) character \n (ASCII 0x0a) after every period of characters.
static ByteBuffer decode64(final String in_code, final Alphabet aspec)
Decodes a given symbolic string representing using given alphabet and fixed base 64 to octets accordi...
static String encode(final int num, final Alphabet aspec)
Encodes a given positive decimal number to a symbolic string representing a given alphabet and its ba...
Author: Sven Gothel sgothel@jausoft.com Copyright (c) 2020 Gothel Software e.K.