1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package com.google.common.io;
16
17 import static com.google.common.base.Preconditions.checkArgument;
18 import static com.google.common.base.Preconditions.checkNotNull;
19 import static com.google.common.base.Preconditions.checkPositionIndexes;
20 import static com.google.common.base.Preconditions.checkState;
21 import static com.google.common.io.GwtWorkarounds.asCharInput;
22 import static com.google.common.io.GwtWorkarounds.asCharOutput;
23 import static com.google.common.io.GwtWorkarounds.asInputStream;
24 import static com.google.common.io.GwtWorkarounds.asOutputStream;
25 import static com.google.common.io.GwtWorkarounds.stringBuilderOutput;
26 import static com.google.common.math.IntMath.divide;
27 import static com.google.common.math.IntMath.log2;
28 import static java.math.RoundingMode.CEILING;
29 import static java.math.RoundingMode.FLOOR;
30 import static java.math.RoundingMode.UNNECESSARY;
31
32 import com.google.common.annotations.Beta;
33 import com.google.common.annotations.GwtCompatible;
34 import com.google.common.annotations.GwtIncompatible;
35 import com.google.common.base.Ascii;
36 import com.google.common.base.CharMatcher;
37 import com.google.common.io.GwtWorkarounds.ByteInput;
38 import com.google.common.io.GwtWorkarounds.ByteOutput;
39 import com.google.common.io.GwtWorkarounds.CharInput;
40 import com.google.common.io.GwtWorkarounds.CharOutput;
41
42 import java.io.IOException;
43 import java.io.InputStream;
44 import java.io.OutputStream;
45 import java.io.Reader;
46 import java.io.Writer;
47 import java.util.Arrays;
48
49 import javax.annotation.CheckReturnValue;
50 import javax.annotation.Nullable;
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 @Beta
132 @GwtCompatible(emulated = true)
133 public abstract class BaseEncoding {
134
135
136 BaseEncoding() {}
137
138
139
140
141
142
143
144 public static final class DecodingException extends IOException {
145 DecodingException(String message) {
146 super(message);
147 }
148
149 DecodingException(Throwable cause) {
150 super(cause);
151 }
152 }
153
154
155
156
157 public String encode(byte[] bytes) {
158 return encode(checkNotNull(bytes), 0, bytes.length);
159 }
160
161
162
163
164
165 public final String encode(byte[] bytes, int off, int len) {
166 checkNotNull(bytes);
167 checkPositionIndexes(off, off + len, bytes.length);
168 CharOutput result = stringBuilderOutput(maxEncodedSize(len));
169 ByteOutput byteOutput = encodingStream(result);
170 try {
171 for (int i = 0; i < len; i++) {
172 byteOutput.write(bytes[off + i]);
173 }
174 byteOutput.close();
175 } catch (IOException impossible) {
176 throw new AssertionError("impossible");
177 }
178 return result.toString();
179 }
180
181
182
183
184
185
186 @GwtIncompatible("Writer,OutputStream")
187 public final OutputStream encodingStream(Writer writer) {
188 return asOutputStream(encodingStream(asCharOutput(writer)));
189 }
190
191
192
193
194 @GwtIncompatible("ByteSink,CharSink")
195 public final ByteSink encodingSink(final CharSink encodedSink) {
196 checkNotNull(encodedSink);
197 return new ByteSink() {
198 @Override
199 public OutputStream openStream() throws IOException {
200 return encodingStream(encodedSink.openStream());
201 }
202 };
203 }
204
205
206
207 private static byte[] extract(byte[] result, int length) {
208 if (length == result.length) {
209 return result;
210 } else {
211 byte[] trunc = new byte[length];
212 System.arraycopy(result, 0, trunc, 0, length);
213 return trunc;
214 }
215 }
216
217
218
219
220
221
222
223
224 public final byte[] decode(CharSequence chars) {
225 try {
226 return decodeChecked(chars);
227 } catch (DecodingException badInput) {
228 throw new IllegalArgumentException(badInput);
229 }
230 }
231
232
233
234
235
236
237
238
239 final byte[] decodeChecked(CharSequence chars) throws DecodingException {
240 chars = padding().trimTrailingFrom(chars);
241 ByteInput decodedInput = decodingStream(asCharInput(chars));
242 byte[] tmp = new byte[maxDecodedSize(chars.length())];
243 int index = 0;
244 try {
245 for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) {
246 tmp[index++] = (byte) i;
247 }
248 } catch (DecodingException badInput) {
249 throw badInput;
250 } catch (IOException impossible) {
251 throw new AssertionError(impossible);
252 }
253 return extract(tmp, index);
254 }
255
256
257
258
259
260
261 @GwtIncompatible("Reader,InputStream")
262 public final InputStream decodingStream(Reader reader) {
263 return asInputStream(decodingStream(asCharInput(reader)));
264 }
265
266
267
268
269
270 @GwtIncompatible("ByteSource,CharSource")
271 public final ByteSource decodingSource(final CharSource encodedSource) {
272 checkNotNull(encodedSource);
273 return new ByteSource() {
274 @Override
275 public InputStream openStream() throws IOException {
276 return decodingStream(encodedSource.openStream());
277 }
278 };
279 }
280
281
282
283 abstract int maxEncodedSize(int bytes);
284
285 abstract ByteOutput encodingStream(CharOutput charOutput);
286
287 abstract int maxDecodedSize(int chars);
288
289 abstract ByteInput decodingStream(CharInput charInput);
290
291 abstract CharMatcher padding();
292
293
294
295
296
297
298
299
300 @CheckReturnValue
301 public abstract BaseEncoding omitPadding();
302
303
304
305
306
307
308
309
310 @CheckReturnValue
311 public abstract BaseEncoding withPadChar(char padChar);
312
313
314
315
316
317
318
319
320
321
322 @CheckReturnValue
323 public abstract BaseEncoding withSeparator(String separator, int n);
324
325
326
327
328
329
330
331
332 @CheckReturnValue
333 public abstract BaseEncoding upperCase();
334
335
336
337
338
339
340
341
342 @CheckReturnValue
343 public abstract BaseEncoding lowerCase();
344
345 private static final BaseEncoding BASE64 = new StandardBaseEncoding(
346 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361 public static BaseEncoding base64() {
362 return BASE64;
363 }
364
365 private static final BaseEncoding BASE64_URL = new StandardBaseEncoding(
366 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382 public static BaseEncoding base64Url() {
383 return BASE64_URL;
384 }
385
386 private static final BaseEncoding BASE32 =
387 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402 public static BaseEncoding base32() {
403 return BASE32;
404 }
405
406 private static final BaseEncoding BASE32_HEX =
407 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
408
409
410
411
412
413
414
415
416
417
418
419
420
421 public static BaseEncoding base32Hex() {
422 return BASE32_HEX;
423 }
424
425 private static final BaseEncoding BASE16 =
426 new StandardBaseEncoding("base16()", "0123456789ABCDEF", null);
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442 public static BaseEncoding base16() {
443 return BASE16;
444 }
445
446 private static final class Alphabet extends CharMatcher {
447 private final String name;
448
449 private final char[] chars;
450 final int mask;
451 final int bitsPerChar;
452 final int charsPerChunk;
453 final int bytesPerChunk;
454 private final byte[] decodabet;
455 private final boolean[] validPadding;
456
457 Alphabet(String name, char[] chars) {
458 this.name = checkNotNull(name);
459 this.chars = checkNotNull(chars);
460 try {
461 this.bitsPerChar = log2(chars.length, UNNECESSARY);
462 } catch (ArithmeticException e) {
463 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
464 }
465
466
467
468
469
470 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
471 this.charsPerChunk = 8 / gcd;
472 this.bytesPerChunk = bitsPerChar / gcd;
473
474 this.mask = chars.length - 1;
475
476 byte[] decodabet = new byte[Ascii.MAX + 1];
477 Arrays.fill(decodabet, (byte) -1);
478 for (int i = 0; i < chars.length; i++) {
479 char c = chars[i];
480 checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c);
481 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
482 decodabet[c] = (byte) i;
483 }
484 this.decodabet = decodabet;
485
486 boolean[] validPadding = new boolean[charsPerChunk];
487 for (int i = 0; i < bytesPerChunk; i++) {
488 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
489 }
490 this.validPadding = validPadding;
491 }
492
493 char encode(int bits) {
494 return chars[bits];
495 }
496
497 boolean isValidPaddingStartPosition(int index) {
498 return validPadding[index % charsPerChunk];
499 }
500
501 int decode(char ch) throws IOException {
502 if (ch > Ascii.MAX || decodabet[ch] == -1) {
503 throw new DecodingException("Unrecognized character: " + ch);
504 }
505 return decodabet[ch];
506 }
507
508 private boolean hasLowerCase() {
509 for (char c : chars) {
510 if (Ascii.isLowerCase(c)) {
511 return true;
512 }
513 }
514 return false;
515 }
516
517 private boolean hasUpperCase() {
518 for (char c : chars) {
519 if (Ascii.isUpperCase(c)) {
520 return true;
521 }
522 }
523 return false;
524 }
525
526 Alphabet upperCase() {
527 if (!hasLowerCase()) {
528 return this;
529 } else {
530 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
531 char[] upperCased = new char[chars.length];
532 for (int i = 0; i < chars.length; i++) {
533 upperCased[i] = Ascii.toUpperCase(chars[i]);
534 }
535 return new Alphabet(name + ".upperCase()", upperCased);
536 }
537 }
538
539 Alphabet lowerCase() {
540 if (!hasUpperCase()) {
541 return this;
542 } else {
543 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
544 char[] lowerCased = new char[chars.length];
545 for (int i = 0; i < chars.length; i++) {
546 lowerCased[i] = Ascii.toLowerCase(chars[i]);
547 }
548 return new Alphabet(name + ".lowerCase()", lowerCased);
549 }
550 }
551
552 @Override
553 public boolean matches(char c) {
554 return CharMatcher.ASCII.matches(c) && decodabet[c] != -1;
555 }
556
557 @Override
558 public String toString() {
559 return name;
560 }
561 }
562
563 static final class StandardBaseEncoding extends BaseEncoding {
564
565 private final Alphabet alphabet;
566
567 @Nullable
568 private final Character paddingChar;
569
570 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
571 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
572 }
573
574 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
575 this.alphabet = checkNotNull(alphabet);
576 checkArgument(paddingChar == null || !alphabet.matches(paddingChar),
577 "Padding character %s was already in alphabet", paddingChar);
578 this.paddingChar = paddingChar;
579 }
580
581 @Override
582 CharMatcher padding() {
583 return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue());
584 }
585
586 @Override
587 int maxEncodedSize(int bytes) {
588 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
589 }
590
591 @Override
592 ByteOutput encodingStream(final CharOutput out) {
593 checkNotNull(out);
594 return new ByteOutput() {
595 int bitBuffer = 0;
596 int bitBufferLength = 0;
597 int writtenChars = 0;
598
599 @Override
600 public void write(byte b) throws IOException {
601 bitBuffer <<= 8;
602 bitBuffer |= b & 0xFF;
603 bitBufferLength += 8;
604 while (bitBufferLength >= alphabet.bitsPerChar) {
605 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar))
606 & alphabet.mask;
607 out.write(alphabet.encode(charIndex));
608 writtenChars++;
609 bitBufferLength -= alphabet.bitsPerChar;
610 }
611 }
612
613 @Override
614 public void flush() throws IOException {
615 out.flush();
616 }
617
618 @Override
619 public void close() throws IOException {
620 if (bitBufferLength > 0) {
621 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength))
622 & alphabet.mask;
623 out.write(alphabet.encode(charIndex));
624 writtenChars++;
625 if (paddingChar != null) {
626 while (writtenChars % alphabet.charsPerChunk != 0) {
627 out.write(paddingChar.charValue());
628 writtenChars++;
629 }
630 }
631 }
632 out.close();
633 }
634 };
635 }
636
637 @Override
638 int maxDecodedSize(int chars) {
639 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
640 }
641
642 @Override
643 ByteInput decodingStream(final CharInput reader) {
644 checkNotNull(reader);
645 return new ByteInput() {
646 int bitBuffer = 0;
647 int bitBufferLength = 0;
648 int readChars = 0;
649 boolean hitPadding = false;
650 final CharMatcher paddingMatcher = padding();
651
652 @Override
653 public int read() throws IOException {
654 while (true) {
655 int readChar = reader.read();
656 if (readChar == -1) {
657 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
658 throw new DecodingException("Invalid input length " + readChars);
659 }
660 return -1;
661 }
662 readChars++;
663 char ch = (char) readChar;
664 if (paddingMatcher.matches(ch)) {
665 if (!hitPadding
666 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
667 throw new DecodingException("Padding cannot start at index " + readChars);
668 }
669 hitPadding = true;
670 } else if (hitPadding) {
671 throw new DecodingException(
672 "Expected padding character but found '" + ch + "' at index " + readChars);
673 } else {
674 bitBuffer <<= alphabet.bitsPerChar;
675 bitBuffer |= alphabet.decode(ch);
676 bitBufferLength += alphabet.bitsPerChar;
677
678 if (bitBufferLength >= 8) {
679 bitBufferLength -= 8;
680 return (bitBuffer >> bitBufferLength) & 0xFF;
681 }
682 }
683 }
684 }
685
686 @Override
687 public void close() throws IOException {
688 reader.close();
689 }
690 };
691 }
692
693 @Override
694 public BaseEncoding omitPadding() {
695 return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null);
696 }
697
698 @Override
699 public BaseEncoding withPadChar(char padChar) {
700 if (8 % alphabet.bitsPerChar == 0 ||
701 (paddingChar != null && paddingChar.charValue() == padChar)) {
702 return this;
703 } else {
704 return new StandardBaseEncoding(alphabet, padChar);
705 }
706 }
707
708 @Override
709 public BaseEncoding withSeparator(String separator, int afterEveryChars) {
710 checkNotNull(separator);
711 checkArgument(padding().or(alphabet).matchesNoneOf(separator),
712 "Separator cannot contain alphabet or padding characters");
713 return new SeparatedBaseEncoding(this, separator, afterEveryChars);
714 }
715
716 private transient BaseEncoding upperCase;
717 private transient BaseEncoding lowerCase;
718
719 @Override
720 public BaseEncoding upperCase() {
721 BaseEncoding result = upperCase;
722 if (result == null) {
723 Alphabet upper = alphabet.upperCase();
724 result = upperCase =
725 (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar);
726 }
727 return result;
728 }
729
730 @Override
731 public BaseEncoding lowerCase() {
732 BaseEncoding result = lowerCase;
733 if (result == null) {
734 Alphabet lower = alphabet.lowerCase();
735 result = lowerCase =
736 (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar);
737 }
738 return result;
739 }
740
741 @Override
742 public String toString() {
743 StringBuilder builder = new StringBuilder("BaseEncoding.");
744 builder.append(alphabet.toString());
745 if (8 % alphabet.bitsPerChar != 0) {
746 if (paddingChar == null) {
747 builder.append(".omitPadding()");
748 } else {
749 builder.append(".withPadChar(").append(paddingChar).append(')');
750 }
751 }
752 return builder.toString();
753 }
754 }
755
756 static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) {
757 checkNotNull(delegate);
758 checkNotNull(toIgnore);
759 return new CharInput() {
760 @Override
761 public int read() throws IOException {
762 int readChar;
763 do {
764 readChar = delegate.read();
765 } while (readChar != -1 && toIgnore.matches((char) readChar));
766 return readChar;
767 }
768
769 @Override
770 public void close() throws IOException {
771 delegate.close();
772 }
773 };
774 }
775
776 static CharOutput separatingOutput(
777 final CharOutput delegate, final String separator, final int afterEveryChars) {
778 checkNotNull(delegate);
779 checkNotNull(separator);
780 checkArgument(afterEveryChars > 0);
781 return new CharOutput() {
782 int charsUntilSeparator = afterEveryChars;
783
784 @Override
785 public void write(char c) throws IOException {
786 if (charsUntilSeparator == 0) {
787 for (int i = 0; i < separator.length(); i++) {
788 delegate.write(separator.charAt(i));
789 }
790 charsUntilSeparator = afterEveryChars;
791 }
792 delegate.write(c);
793 charsUntilSeparator--;
794 }
795
796 @Override
797 public void flush() throws IOException {
798 delegate.flush();
799 }
800
801 @Override
802 public void close() throws IOException {
803 delegate.close();
804 }
805 };
806 }
807
808 static final class SeparatedBaseEncoding extends BaseEncoding {
809 private final BaseEncoding delegate;
810 private final String separator;
811 private final int afterEveryChars;
812 private final CharMatcher separatorChars;
813
814 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
815 this.delegate = checkNotNull(delegate);
816 this.separator = checkNotNull(separator);
817 this.afterEveryChars = afterEveryChars;
818 checkArgument(
819 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
820 this.separatorChars = CharMatcher.anyOf(separator).precomputed();
821 }
822
823 @Override
824 CharMatcher padding() {
825 return delegate.padding();
826 }
827
828 @Override
829 int maxEncodedSize(int bytes) {
830 int unseparatedSize = delegate.maxEncodedSize(bytes);
831 return unseparatedSize + separator.length()
832 * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
833 }
834
835 @Override
836 ByteOutput encodingStream(final CharOutput output) {
837 return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars));
838 }
839
840 @Override
841 int maxDecodedSize(int chars) {
842 return delegate.maxDecodedSize(chars);
843 }
844
845 @Override
846 ByteInput decodingStream(final CharInput input) {
847 return delegate.decodingStream(ignoringInput(input, separatorChars));
848 }
849
850 @Override
851 public BaseEncoding omitPadding() {
852 return delegate.omitPadding().withSeparator(separator, afterEveryChars);
853 }
854
855 @Override
856 public BaseEncoding withPadChar(char padChar) {
857 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
858 }
859
860 @Override
861 public BaseEncoding withSeparator(String separator, int afterEveryChars) {
862 throw new UnsupportedOperationException("Already have a separator");
863 }
864
865 @Override
866 public BaseEncoding upperCase() {
867 return delegate.upperCase().withSeparator(separator, afterEveryChars);
868 }
869
870 @Override
871 public BaseEncoding lowerCase() {
872 return delegate.lowerCase().withSeparator(separator, afterEveryChars);
873 }
874
875 @Override
876 public String toString() {
877 return delegate.toString() +
878 ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
879 }
880 }
881 }