private string _addSymbol(char[] buffer, int start, int len, int h, int index) { if (!_dirty) { //need to do copy-on-write? copyArrays(); _dirty = true; } else { if (_size >= _sizeThreshold) { // Need to expand? rehash(); /* Need to recalc hash; rare occurence (index mask has been * recalculated as part of rehash) */ index = _hashToIndex(calcHash(buffer, start, len)); } } string newSymbol = new string(buffer, start, len); if (com.fasterxml.jackson.core.JsonFactory.Feature.INTERN_FIELD_NAMES.enabledIn(_flags )) { newSymbol = com.fasterxml.jackson.core.util.InternCache.instance.intern(newSymbol ); } ++_size; // Ok; do we need to add primary entry, or a bucket? if (_symbols[index] == null) { _symbols[index] = newSymbol; } else { int bix = (index >> 1); com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket newB = new com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket (newSymbol, _buckets[bix]); int collLen = newB.length; if (collLen > MAX_COLL_CHAIN_LENGTH) { /* 23-May-2014, tatu: Instead of throwing an exception right away, let's handle * in bit smarter way. */ _handleSpillOverflow(bix, newB); } else { _buckets[bix] = newB; _longestCollisionList = System.Math.max(collLen, _longestCollisionList); } } return newSymbol; }
public Bucket(string s, com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket n) { // For debugging, comment out /* @Override public String toString() { StringBuilder sb = new StringBuilder(); int primaryCount = 0; for (String s : _symbols) { if (s != null) ++primaryCount; } sb.append("[BytesToNameCanonicalizer, size: "); sb.append(_size); sb.append('/'); sb.append(_symbols.length); sb.append(", "); sb.append(primaryCount); sb.append('/'); sb.append(_size - primaryCount); sb.append(" coll; avg length: "); // Average length: minimum of 1 for all (1 == primary hit); // and then 1 per each traversal for collisions/buckets //int maxDist = 1; int pathCount = _size; for (Bucket b : _buckets) { if (b != null) { int spillLen = b.length; for (int j = 1; j <= spillLen; ++j) { pathCount += j; } } } double avgLength; if (_size == 0) { avgLength = 0.0; } else { avgLength = (double) pathCount / (double) _size; } // let's round up a bit (two 2 decimal places) //avgLength -= (avgLength % 0.01); sb.append(avgLength); sb.append(']'); return sb.toString(); } */ /* /********************************************************** /* Bucket class /********************************************************** */ symbol = s; next = n; length = (n == null) ? 1 : n.length + 1; }
/// <summary> /// Method called when size (number of entries) of symbol table grows /// so big that load factor is exceeded. /// </summary> /// <remarks> /// Method called when size (number of entries) of symbol table grows /// so big that load factor is exceeded. Since size has to remain /// power of two, arrays will then always be doubled. Main work /// is really redistributing old entries into new String/Bucket /// entries. /// </remarks> private void rehash() { int size = _symbols.Length; int newSize = size + size; /* 12-Mar-2010, tatu: Let's actually limit maximum size we are * prepared to use, to guard against OOME in case of unbounded * name sets (unique [non-repeating] names) */ if (newSize > MAX_T_SIZE) { /* If this happens, there's no point in either growing or shrinking hash areas. * Rather, let's just cut our losses and stop canonicalizing. */ _size = 0; _canonicalize = false; // in theory, could just leave these as null, but... _symbols = new string[DEFAULT_T_SIZE]; _buckets = new com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket[DEFAULT_T_SIZE >> 1]; _indexMask = DEFAULT_T_SIZE - 1; _dirty = true; return; } string[] oldSyms = _symbols; com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket[] oldBuckets = _buckets; _symbols = new string[newSize]; _buckets = new com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket[newSize >> 1]; // Let's update index mask, threshold, now (needed for rehashing) _indexMask = newSize - 1; _sizeThreshold = _thresholdSize(newSize); int count = 0; // let's do sanity check /* Need to do two loops, unfortunately, since spill-over area is * only half the size: */ int maxColl = 0; for (int i = 0; i < size; ++i) { string symbol = oldSyms[i]; if (symbol != null) { ++count; int index = _hashToIndex(calcHash(symbol)); if (_symbols[index] == null) { _symbols[index] = symbol; } else { int bix = (index >> 1); com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket newB = new com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket (symbol, _buckets[bix]); _buckets[bix] = newB; maxColl = System.Math.max(maxColl, newB.length); } } } size >>= 1; for (int i_1 = 0; i_1 < size; ++i_1) { com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket b = oldBuckets[i_1 ]; while (b != null) { ++count; string symbol = b.symbol; int index = _hashToIndex(calcHash(symbol)); if (_symbols[index] == null) { _symbols[index] = symbol; } else { int bix = (index >> 1); com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket newB = new com.fasterxml.jackson.core.sym.CharsToNameCanonicalizer.Bucket (symbol, _buckets[bix]); _buckets[bix] = newB; maxColl = System.Math.max(maxColl, newB.length); } b = b.next; } } _longestCollisionList = maxColl; _overflows = null; if (count != _size) { throw new Sharpen.Error("Internal error on SymbolTable.rehash(): had " + _size + " entries; now have " + count + "."); } }