View Javadoc
1   /*
2    * Copyright (C) 2012 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.io;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  import static com.google.common.base.Preconditions.checkNotNull;
19  import static com.google.common.base.Preconditions.checkPositionIndexes;
20  import static com.google.common.base.Preconditions.checkState;
21  import static com.google.common.io.GwtWorkarounds.asCharInput;
22  import static com.google.common.io.GwtWorkarounds.asCharOutput;
23  import static com.google.common.io.GwtWorkarounds.asInputStream;
24  import static com.google.common.io.GwtWorkarounds.asOutputStream;
25  import static com.google.common.io.GwtWorkarounds.stringBuilderOutput;
26  import static com.google.common.math.IntMath.divide;
27  import static com.google.common.math.IntMath.log2;
28  import static java.math.RoundingMode.CEILING;
29  import static java.math.RoundingMode.FLOOR;
30  import static java.math.RoundingMode.UNNECESSARY;
31  
32  import com.google.common.annotations.Beta;
33  import com.google.common.annotations.GwtCompatible;
34  import com.google.common.annotations.GwtIncompatible;
35  import com.google.common.base.Ascii;
36  import com.google.common.base.CharMatcher;
37  import com.google.common.io.GwtWorkarounds.ByteInput;
38  import com.google.common.io.GwtWorkarounds.ByteOutput;
39  import com.google.common.io.GwtWorkarounds.CharInput;
40  import com.google.common.io.GwtWorkarounds.CharOutput;
41  
42  import java.io.IOException;
43  import java.io.InputStream;
44  import java.io.OutputStream;
45  import java.io.Reader;
46  import java.io.Writer;
47  import java.util.Arrays;
48  
49  import javax.annotation.CheckReturnValue;
50  import javax.annotation.Nullable;
51  
52  /**
53   * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
54   * strings. This class includes several constants for encoding schemes specified by <a
55   * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
56   *
57   * <pre>   {@code
58   *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre>
59   *
60   * <p>returns the string {@code "MZXW6==="}, and <pre>   {@code
61   *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre>
62   *
63   * <p>...returns the ASCII bytes of the string {@code "foo"}.
64   *
65   * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with
66   * RFC 4648.  Decoding rejects characters in the wrong case, though padding is optional.
67   * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding
68   * with modified behavior:
69   *
70   * <pre>   {@code
71   *  BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre>
72   *
73   * <p>Warning: BaseEncoding instances are immutable.  Invoking a configuration method has no effect
74   * on the receiving instance; you must store and use the new encoding instance it returns, instead.
75   *
76   * <pre>   {@code
77   *   // Do NOT do this
78   *   BaseEncoding hex = BaseEncoding.base16();
79   *   hex.lowerCase(); // does nothing!
80   *   return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre>
81   *
82   * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
83   * {@code x}, but the reverse does not necessarily hold.
84   *
85   * <p>
86   * <table>
87   * <tr>
88   * <th>Encoding
89   * <th>Alphabet
90   * <th>{@code char:byte} ratio
91   * <th>Default padding
92   * <th>Comments
93   * <tr>
94   * <td>{@link #base16()}
95   * <td>0-9 A-F
96   * <td>2.00
97   * <td>N/A
98   * <td>Traditional hexadecimal.  Defaults to upper case.
99   * <tr>
100  * <td>{@link #base32()}
101  * <td>A-Z 2-7
102  * <td>1.60
103  * <td>=
104  * <td>Human-readable; no possibility of mixing up 0/O or 1/I.  Defaults to upper case.
105  * <tr>
106  * <td>{@link #base32Hex()}
107  * <td>0-9 A-V
108  * <td>1.60
109  * <td>=
110  * <td>"Numerical" base 32; extended from the traditional hex alphabet.  Defaults to upper case.
111  * <tr>
112  * <td>{@link #base64()}
113  * <td>A-Z a-z 0-9 + /
114  * <td>1.33
115  * <td>=
116  * <td>
117  * <tr>
118  * <td>{@link #base64Url()}
119  * <td>A-Z a-z 0-9 - _
120  * <td>1.33
121  * <td>=
122  * <td>Safe to use as filenames, or to pass in URLs without escaping
123  * </table>
124  *
125  * <p>
126  * All instances of this class are immutable, so they may be stored safely as static constants.
127  *
128  * @author Louis Wasserman
129  * @since 14.0
130  */
131 @Beta
132 @GwtCompatible(emulated = true)
133 public abstract class BaseEncoding {
134   // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int])
135 
136   BaseEncoding() {}
137 
138   /**
139    * Exception indicating invalid base-encoded input encountered while decoding.
140    *
141    * @author Louis Wasserman
142    * @since 15.0
143    */
144   public static final class DecodingException extends IOException {
145     DecodingException(String message) {
146       super(message);
147     }
148 
149     DecodingException(Throwable cause) {
150       super(cause);
151     }
152   }
153 
154   /**
155    * Encodes the specified byte array, and returns the encoded {@code String}.
156    */
157   public String encode(byte[] bytes) {
158     return encode(checkNotNull(bytes), 0, bytes.length);
159   }
160 
161   /**
162    * Encodes the specified range of the specified byte array, and returns the encoded
163    * {@code String}.
164    */
165   public final String encode(byte[] bytes, int off, int len) {
166     checkNotNull(bytes);
167     checkPositionIndexes(off, off + len, bytes.length);
168     CharOutput result = stringBuilderOutput(maxEncodedSize(len));
169     ByteOutput byteOutput = encodingStream(result);
170     try {
171       for (int i = 0; i < len; i++) {
172         byteOutput.write(bytes[off + i]);
173       }
174       byteOutput.close();
175     } catch (IOException impossible) {
176       throw new AssertionError("impossible");
177     }
178     return result.toString();
179   }
180 
181   /**
182    * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
183    * {@code Writer}.  When the returned {@code OutputStream} is closed, so is the backing
184    * {@code Writer}.
185    */
186   @GwtIncompatible("Writer,OutputStream")
187   public final OutputStream encodingStream(Writer writer) {
188     return asOutputStream(encodingStream(asCharOutput(writer)));
189   }
190 
191   /**
192    * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
193    */
194   @GwtIncompatible("ByteSink,CharSink")
195   public final ByteSink encodingSink(final CharSink encodedSink) {
196     checkNotNull(encodedSink);
197     return new ByteSink() {
198       @Override
199       public OutputStream openStream() throws IOException {
200         return encodingStream(encodedSink.openStream());
201       }
202     };
203   }
204 
205   // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher)
206 
207   private static byte[] extract(byte[] result, int length) {
208     if (length == result.length) {
209       return result;
210     } else {
211       byte[] trunc = new byte[length];
212       System.arraycopy(result, 0, trunc, 0, length);
213       return trunc;
214     }
215   }
216 
217   /**
218    * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
219    * This is the inverse operation to {@link #encode(byte[])}.
220    *
221    * @throws IllegalArgumentException if the input is not a valid encoded string according to this
222    *         encoding.
223    */
224   public final byte[] decode(CharSequence chars) {
225     try {
226       return decodeChecked(chars);
227     } catch (DecodingException badInput) {
228       throw new IllegalArgumentException(badInput);
229     }
230   }
231 
232   /**
233    * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
234    * This is the inverse operation to {@link #encode(byte[])}.
235    *
236    * @throws DecodingException if the input is not a valid encoded string according to this
237    *         encoding.
238    */
239   final byte[] decodeChecked(CharSequence chars) throws DecodingException {
240     chars = padding().trimTrailingFrom(chars);
241     ByteInput decodedInput = decodingStream(asCharInput(chars));
242     byte[] tmp = new byte[maxDecodedSize(chars.length())];
243     int index = 0;
244     try {
245       for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) {
246         tmp[index++] = (byte) i;
247       }
248     } catch (DecodingException badInput) {
249       throw badInput;
250     } catch (IOException impossible) {
251       throw new AssertionError(impossible);
252     }
253     return extract(tmp, index);
254   }
255 
256   /**
257    * Returns an {@code InputStream} that decodes base-encoded input from the specified
258    * {@code Reader}.  The returned stream throws a {@link DecodingException} upon decoding-specific
259    * errors.
260    */
261   @GwtIncompatible("Reader,InputStream")
262   public final InputStream decodingStream(Reader reader) {
263     return asInputStream(decodingStream(asCharInput(reader)));
264   }
265 
266   /**
267    * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
268    * {@code CharSource}.
269    */
270   @GwtIncompatible("ByteSource,CharSource")
271   public final ByteSource decodingSource(final CharSource encodedSource) {
272     checkNotNull(encodedSource);
273     return new ByteSource() {
274       @Override
275       public InputStream openStream() throws IOException {
276         return decodingStream(encodedSource.openStream());
277       }
278     };
279   }
280 
281   // Implementations for encoding/decoding
282 
283   abstract int maxEncodedSize(int bytes);
284 
285   abstract ByteOutput encodingStream(CharOutput charOutput);
286 
287   abstract int maxDecodedSize(int chars);
288 
289   abstract ByteInput decodingStream(CharInput charInput);
290 
291   abstract CharMatcher padding();
292 
293   // Modified encoding generators
294 
295   /**
296    * Returns an encoding that behaves equivalently to this encoding, but omits any padding
297    * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
298    * section 3.2</a>, Padding of Encoded Data.
299    */
300   @CheckReturnValue
301   public abstract BaseEncoding omitPadding();
302 
303   /**
304    * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
305    * for padding.
306    *
307    * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
308    *         separator
309    */
310   @CheckReturnValue
311   public abstract BaseEncoding withPadChar(char padChar);
312 
313   /**
314    * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
315    * after every {@code n} characters. Any occurrences of any characters that occur in the separator
316    * are skipped over in decoding.
317    *
318    * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
319    *         string, or if {@code n <= 0}
320    * @throws UnsupportedOperationException if this encoding already uses a separator
321    */
322   @CheckReturnValue
323   public abstract BaseEncoding withSeparator(String separator, int n);
324 
325   /**
326    * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
327    * uppercase letters. Padding and separator characters remain in their original case.
328    *
329    * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
330    *         lower-case characters
331    */
332   @CheckReturnValue
333   public abstract BaseEncoding upperCase();
334 
335   /**
336    * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
337    * lowercase letters. Padding and separator characters remain in their original case.
338    *
339    * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
340    *         lower-case characters
341    */
342   @CheckReturnValue
343   public abstract BaseEncoding lowerCase();
344 
345   private static final BaseEncoding BASE64 = new StandardBaseEncoding(
346       "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
347 
348   /**
349    * The "base64" base encoding specified by <a
350    * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
351    * (This is the same as the base 64 encoding from <a
352    * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
353    *
354    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
355    * omitted} or {@linkplain #withPadChar(char) replaced}.
356    *
357    * <p>No line feeds are added by default, as per <a
358    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
359    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
360    */
361   public static BaseEncoding base64() {
362     return BASE64;
363   }
364 
365   private static final BaseEncoding BASE64_URL = new StandardBaseEncoding(
366       "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
367 
368   /**
369    * The "base64url" encoding specified by <a
370    * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
371    * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64."
372    * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a
373    * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
374    *
375    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
376    * omitted} or {@linkplain #withPadChar(char) replaced}.
377    *
378    * <p>No line feeds are added by default, as per <a
379    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
380    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
381    */
382   public static BaseEncoding base64Url() {
383     return BASE64_URL;
384   }
385 
386   private static final BaseEncoding BASE32 =
387       new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
388 
389   /**
390    * The "base32" encoding specified by <a
391    * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding.
392    * (This is the same as the base 32 encoding from <a
393    * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
394    *
395    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
396    * omitted} or {@linkplain #withPadChar(char) replaced}.
397    *
398    * <p>No line feeds are added by default, as per <a
399    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
400    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
401    */
402   public static BaseEncoding base32() {
403     return BASE32;
404   }
405 
406   private static final BaseEncoding BASE32_HEX =
407       new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
408 
409   /**
410    * The "base32hex" encoding specified by <a
411    * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
412    * with Extended Hex Alphabet.  There is no corresponding encoding in RFC 3548.
413    *
414    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
415    * omitted} or {@linkplain #withPadChar(char) replaced}.
416    *
417    * <p>No line feeds are added by default, as per <a
418    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
419    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
420    */
421   public static BaseEncoding base32Hex() {
422     return BASE32_HEX;
423   }
424 
425   private static final BaseEncoding BASE16 =
426       new StandardBaseEncoding("base16()", "0123456789ABCDEF", null);
427 
428   /**
429    * The "base16" encoding specified by <a
430    * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding.
431    * (This is the same as the base 16 encoding from <a
432    * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
433    * "hexadecimal" format.
434    *
435    * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and
436    * {@link #omitPadding()} have no effect.
437    *
438    * <p>No line feeds are added by default, as per <a
439    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
440    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
441    */
442   public static BaseEncoding base16() {
443     return BASE16;
444   }
445 
446   private static final class Alphabet extends CharMatcher {
447     private final String name;
448     // this is meant to be immutable -- don't modify it!
449     private final char[] chars;
450     final int mask;
451     final int bitsPerChar;
452     final int charsPerChunk;
453     final int bytesPerChunk;
454     private final byte[] decodabet;
455     private final boolean[] validPadding;
456 
457     Alphabet(String name, char[] chars) {
458       this.name = checkNotNull(name);
459       this.chars = checkNotNull(chars);
460       try {
461         this.bitsPerChar = log2(chars.length, UNNECESSARY);
462       } catch (ArithmeticException e) {
463         throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
464       }
465 
466       /*
467        * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
468        * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
469        */
470       int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
471       this.charsPerChunk = 8 / gcd;
472       this.bytesPerChunk = bitsPerChar / gcd;
473 
474       this.mask = chars.length - 1;
475 
476       byte[] decodabet = new byte[Ascii.MAX + 1];
477       Arrays.fill(decodabet, (byte) -1);
478       for (int i = 0; i < chars.length; i++) {
479         char c = chars[i];
480         checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c);
481         checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
482         decodabet[c] = (byte) i;
483       }
484       this.decodabet = decodabet;
485 
486       boolean[] validPadding = new boolean[charsPerChunk];
487       for (int i = 0; i < bytesPerChunk; i++) {
488         validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
489       }
490       this.validPadding = validPadding;
491     }
492 
493     char encode(int bits) {
494       return chars[bits];
495     }
496 
497     boolean isValidPaddingStartPosition(int index) {
498       return validPadding[index % charsPerChunk];
499     }
500 
501     int decode(char ch) throws IOException {
502       if (ch > Ascii.MAX || decodabet[ch] == -1) {
503         throw new DecodingException("Unrecognized character: " + ch);
504       }
505       return decodabet[ch];
506     }
507 
508     private boolean hasLowerCase() {
509       for (char c : chars) {
510         if (Ascii.isLowerCase(c)) {
511           return true;
512         }
513       }
514       return false;
515     }
516 
517     private boolean hasUpperCase() {
518       for (char c : chars) {
519         if (Ascii.isUpperCase(c)) {
520           return true;
521         }
522       }
523       return false;
524     }
525 
526     Alphabet upperCase() {
527       if (!hasLowerCase()) {
528         return this;
529       } else {
530         checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
531         char[] upperCased = new char[chars.length];
532         for (int i = 0; i < chars.length; i++) {
533           upperCased[i] = Ascii.toUpperCase(chars[i]);
534         }
535         return new Alphabet(name + ".upperCase()", upperCased);
536       }
537     }
538 
539     Alphabet lowerCase() {
540       if (!hasUpperCase()) {
541         return this;
542       } else {
543         checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
544         char[] lowerCased = new char[chars.length];
545         for (int i = 0; i < chars.length; i++) {
546           lowerCased[i] = Ascii.toLowerCase(chars[i]);
547         }
548         return new Alphabet(name + ".lowerCase()", lowerCased);
549       }
550     }
551 
552     @Override
553     public boolean matches(char c) {
554       return CharMatcher.ASCII.matches(c) && decodabet[c] != -1;
555     }
556 
557     @Override
558     public String toString() {
559       return name;
560     }
561   }
562 
563   static final class StandardBaseEncoding extends BaseEncoding {
564     // TODO(user): provide a useful toString
565     private final Alphabet alphabet;
566 
567     @Nullable
568     private final Character paddingChar;
569 
570     StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
571       this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
572     }
573 
574     StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
575       this.alphabet = checkNotNull(alphabet);
576       checkArgument(paddingChar == null || !alphabet.matches(paddingChar),
577           "Padding character %s was already in alphabet", paddingChar);
578       this.paddingChar = paddingChar;
579     }
580 
581     @Override
582     CharMatcher padding() {
583       return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue());
584     }
585 
586     @Override
587     int maxEncodedSize(int bytes) {
588       return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
589     }
590 
591     @Override
592     ByteOutput encodingStream(final CharOutput out) {
593       checkNotNull(out);
594       return new ByteOutput() {
595         int bitBuffer = 0;
596         int bitBufferLength = 0;
597         int writtenChars = 0;
598 
599         @Override
600         public void write(byte b) throws IOException {
601           bitBuffer <<= 8;
602           bitBuffer |= b & 0xFF;
603           bitBufferLength += 8;
604           while (bitBufferLength >= alphabet.bitsPerChar) {
605             int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar))
606                 & alphabet.mask;
607             out.write(alphabet.encode(charIndex));
608             writtenChars++;
609             bitBufferLength -= alphabet.bitsPerChar;
610           }
611         }
612 
613         @Override
614         public void flush() throws IOException {
615           out.flush();
616         }
617 
618         @Override
619         public void close() throws IOException {
620           if (bitBufferLength > 0) {
621             int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength))
622                 & alphabet.mask;
623             out.write(alphabet.encode(charIndex));
624             writtenChars++;
625             if (paddingChar != null) {
626               while (writtenChars % alphabet.charsPerChunk != 0) {
627                 out.write(paddingChar.charValue());
628                 writtenChars++;
629               }
630             }
631           }
632           out.close();
633         }
634       };
635     }
636 
637     @Override
638     int maxDecodedSize(int chars) {
639       return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
640     }
641 
642     @Override
643     ByteInput decodingStream(final CharInput reader) {
644       checkNotNull(reader);
645       return new ByteInput() {
646         int bitBuffer = 0;
647         int bitBufferLength = 0;
648         int readChars = 0;
649         boolean hitPadding = false;
650         final CharMatcher paddingMatcher = padding();
651 
652         @Override
653         public int read() throws IOException {
654           while (true) {
655             int readChar = reader.read();
656             if (readChar == -1) {
657               if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
658                 throw new DecodingException("Invalid input length " + readChars);
659               }
660               return -1;
661             }
662             readChars++;
663             char ch = (char) readChar;
664             if (paddingMatcher.matches(ch)) {
665               if (!hitPadding
666                   && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
667                 throw new DecodingException("Padding cannot start at index " + readChars);
668               }
669               hitPadding = true;
670             } else if (hitPadding) {
671               throw new DecodingException(
672                   "Expected padding character but found '" + ch + "' at index " + readChars);
673             } else {
674               bitBuffer <<= alphabet.bitsPerChar;
675               bitBuffer |= alphabet.decode(ch);
676               bitBufferLength += alphabet.bitsPerChar;
677 
678               if (bitBufferLength >= 8) {
679                 bitBufferLength -= 8;
680                 return (bitBuffer >> bitBufferLength) & 0xFF;
681               }
682             }
683           }
684         }
685 
686         @Override
687         public void close() throws IOException {
688           reader.close();
689         }
690       };
691     }
692 
693     @Override
694     public BaseEncoding omitPadding() {
695       return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null);
696     }
697 
698     @Override
699     public BaseEncoding withPadChar(char padChar) {
700       if (8 % alphabet.bitsPerChar == 0 ||
701           (paddingChar != null && paddingChar.charValue() == padChar)) {
702         return this;
703       } else {
704         return new StandardBaseEncoding(alphabet, padChar);
705       }
706     }
707 
708     @Override
709     public BaseEncoding withSeparator(String separator, int afterEveryChars) {
710       checkNotNull(separator);
711       checkArgument(padding().or(alphabet).matchesNoneOf(separator),
712           "Separator cannot contain alphabet or padding characters");
713       return new SeparatedBaseEncoding(this, separator, afterEveryChars);
714     }
715 
716     private transient BaseEncoding upperCase;
717     private transient BaseEncoding lowerCase;
718 
719     @Override
720     public BaseEncoding upperCase() {
721       BaseEncoding result = upperCase;
722       if (result == null) {
723         Alphabet upper = alphabet.upperCase();
724         result = upperCase =
725             (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar);
726       }
727       return result;
728     }
729 
730     @Override
731     public BaseEncoding lowerCase() {
732       BaseEncoding result = lowerCase;
733       if (result == null) {
734         Alphabet lower = alphabet.lowerCase();
735         result = lowerCase =
736             (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar);
737       }
738       return result;
739     }
740 
741     @Override
742     public String toString() {
743       StringBuilder builder = new StringBuilder("BaseEncoding.");
744       builder.append(alphabet.toString());
745       if (8 % alphabet.bitsPerChar != 0) {
746         if (paddingChar == null) {
747           builder.append(".omitPadding()");
748         } else {
749           builder.append(".withPadChar(").append(paddingChar).append(')');
750         }
751       }
752       return builder.toString();
753     }
754   }
755 
756   static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) {
757     checkNotNull(delegate);
758     checkNotNull(toIgnore);
759     return new CharInput() {
760       @Override
761       public int read() throws IOException {
762         int readChar;
763         do {
764           readChar = delegate.read();
765         } while (readChar != -1 && toIgnore.matches((char) readChar));
766         return readChar;
767       }
768 
769       @Override
770       public void close() throws IOException {
771         delegate.close();
772       }
773     };
774   }
775 
776   static CharOutput separatingOutput(
777       final CharOutput delegate, final String separator, final int afterEveryChars) {
778     checkNotNull(delegate);
779     checkNotNull(separator);
780     checkArgument(afterEveryChars > 0);
781     return new CharOutput() {
782       int charsUntilSeparator = afterEveryChars;
783 
784       @Override
785       public void write(char c) throws IOException {
786         if (charsUntilSeparator == 0) {
787           for (int i = 0; i < separator.length(); i++) {
788             delegate.write(separator.charAt(i));
789           }
790           charsUntilSeparator = afterEveryChars;
791         }
792         delegate.write(c);
793         charsUntilSeparator--;
794       }
795 
796       @Override
797       public void flush() throws IOException {
798         delegate.flush();
799       }
800 
801       @Override
802       public void close() throws IOException {
803         delegate.close();
804       }
805     };
806   }
807 
808   static final class SeparatedBaseEncoding extends BaseEncoding {
809     private final BaseEncoding delegate;
810     private final String separator;
811     private final int afterEveryChars;
812     private final CharMatcher separatorChars;
813 
814     SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
815       this.delegate = checkNotNull(delegate);
816       this.separator = checkNotNull(separator);
817       this.afterEveryChars = afterEveryChars;
818       checkArgument(
819           afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
820       this.separatorChars = CharMatcher.anyOf(separator).precomputed();
821     }
822 
823     @Override
824     CharMatcher padding() {
825       return delegate.padding();
826     }
827 
828     @Override
829     int maxEncodedSize(int bytes) {
830       int unseparatedSize = delegate.maxEncodedSize(bytes);
831       return unseparatedSize + separator.length()
832           * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
833     }
834 
835     @Override
836     ByteOutput encodingStream(final CharOutput output) {
837       return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars));
838     }
839 
840     @Override
841     int maxDecodedSize(int chars) {
842       return delegate.maxDecodedSize(chars);
843     }
844 
845     @Override
846     ByteInput decodingStream(final CharInput input) {
847       return delegate.decodingStream(ignoringInput(input, separatorChars));
848     }
849 
850     @Override
851     public BaseEncoding omitPadding() {
852       return delegate.omitPadding().withSeparator(separator, afterEveryChars);
853     }
854 
855     @Override
856     public BaseEncoding withPadChar(char padChar) {
857       return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
858     }
859 
860     @Override
861     public BaseEncoding withSeparator(String separator, int afterEveryChars) {
862       throw new UnsupportedOperationException("Already have a separator");
863     }
864 
865     @Override
866     public BaseEncoding upperCase() {
867       return delegate.upperCase().withSeparator(separator, afterEveryChars);
868     }
869 
870     @Override
871     public BaseEncoding lowerCase() {
872       return delegate.lowerCase().withSeparator(separator, afterEveryChars);
873     }
874 
875     @Override
876     public String toString() {
877       return delegate.toString() +
878           ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
879     }
880   }
881 }