View Javadoc
1   /*
2    * Copyright (C) 2012 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.common.base;
18  
19  import com.google.common.annotations.GwtIncompatible;
20  import com.google.common.annotations.VisibleForTesting;
21  import com.google.common.base.CharMatcher.FastMatcher;
22  
23  import java.util.BitSet;
24  
25  /**
26   * An immutable version of CharMatcher for smallish sets of characters that uses a hash table
27   * with linear probing to check for matches.
28   *
29   * @author Christopher Swenson
30   */
31  @GwtIncompatible("no precomputation is done in GWT")
32  final class SmallCharMatcher extends FastMatcher {
33    static final int MAX_SIZE = 1023;
34    private final char[] table;
35    private final boolean containsZero;
36    private final long filter;
37  
38    private SmallCharMatcher(char[] table, long filter, boolean containsZero,
39        String description) {
40      super(description);
41      this.table = table;
42      this.filter = filter;
43      this.containsZero = containsZero;
44    }
45    
46    private static final int C1 = 0xcc9e2d51;
47    private static final int C2 = 0x1b873593;
48  
49    /*
50     * This method was rewritten in Java from an intermediate step of the Murmur hash function in
51     * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp, which contained the
52     * following header:
53     *
54     * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author
55     * hereby disclaims copyright to this source code.
56     */
57    static int smear(int hashCode) {
58      return C2 * Integer.rotateLeft(hashCode * C1, 15);
59    }
60  
61    private boolean checkFilter(int c) {
62      return 1 == (1 & (filter >> c));
63    }
64  
65    // This is all essentially copied from ImmutableSet, but we have to duplicate because
66    // of dependencies.
67  
68    // Represents how tightly we can pack things, as a maximum.
69    private static final double DESIRED_LOAD_FACTOR = 0.5;
70  
71   /**
72    * Returns an array size suitable for the backing array of a hash table that
73    * uses open addressing with linear probing in its implementation.  The
74    * returned size is the smallest power of two that can hold setSize elements
75    * with the desired load factor.
76    */
77    @VisibleForTesting static int chooseTableSize(int setSize) {
78      if (setSize == 1) {
79        return 2;
80      }
81      // Correct the size for open addressing to match desired load factor.
82      // Round up to the next highest power of 2.
83      int tableSize = Integer.highestOneBit(setSize - 1) << 1;
84      while (tableSize * DESIRED_LOAD_FACTOR < setSize) {
85        tableSize <<= 1;
86      }
87      return tableSize;
88    }
89  
90    static CharMatcher from(BitSet chars, String description) {
91      // Compute the filter.
92      long filter = 0;
93      int size = chars.cardinality();
94      boolean containsZero = chars.get(0);
95      // Compute the hash table.
96      char[] table = new char[chooseTableSize(size)];
97      int mask = table.length - 1;
98      for (int c = chars.nextSetBit(0); c != -1; c = chars.nextSetBit(c + 1)) {
99        // Compute the filter at the same time.
100       filter |= 1L << c;
101       int index = smear(c) & mask;
102       while (true) {
103         // Check for empty.
104         if (table[index] == 0) {
105           table[index] = (char) c;
106           break;
107         }
108         // Linear probing.
109         index = (index + 1) & mask;
110       }
111     }
112     return new SmallCharMatcher(table, filter, containsZero, description);
113   }
114 
115   @Override
116   public boolean matches(char c) {
117     if (c == 0) {
118       return containsZero;
119     }
120     if (!checkFilter(c)) {
121       return false;
122     }
123     int mask = table.length - 1;
124     int startingIndex = smear(c) & mask;
125     int index = startingIndex;
126     do {
127       // Check for empty.
128       if (table[index] == 0) {
129         return false;
130       // Check for match.
131       } else if (table[index] == c) {
132         return true;
133       } else {
134         // Linear probing.
135         index = (index + 1) & mask;
136       }
137       // Check to see if we wrapped around the whole table.
138     } while (index != startingIndex);
139     return false;
140   }
141 
142   @Override
143   void setBits(BitSet table) {
144     if (containsZero) {
145       table.set(0);
146     }
147     for (char c : this.table) {
148       if (c != 0) {
149         table.set(c);
150       }
151     }
152   }
153 }