public void InitReader(ByteSliceReader reader, RawPostingList p, int stream) { System.Diagnostics.Debug.Assert(stream < streamCount); int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK; reader.Init(bytePool, p.byteStart + stream * ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto + stream]); }
/// <summary>Called when postings hash is too small (> 50% /// occupied) or too large (< 20% occupied). /// </summary> internal void RehashPostings(int newSize) { int newMask = newSize - 1; RawPostingList[] newHash = ArrayPool <RawPostingList> .Shared.Rent(newSize); for (int i = 0; i < postingsHashSize; i++) { RawPostingList p0 = postingsHash[i]; if (p0 != null) { int code; if (perThread.primary) { int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK; char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos = start; while (text[pos] != 0xffff) { pos++; } code = 0; while (pos > start) { code = (code * 31) + text[--pos]; } } else { code = p0.textStart; } int hashPos = code & newMask; System.Diagnostics.Debug.Assert(hashPos >= 0); if (newHash[hashPos] != null) { int inc = ((code >> 8) + code) | 1; do { code += inc; hashPos = code & newMask; }while (newHash[hashPos] != null); } newHash[hashPos] = p0; } } ArrayPool <RawPostingList> .Shared.Return(postingsHash, clearArray : true); postingsHashMask = newMask; postingsHash = newHash; postingsHashSize = newSize; postingsHashHalfSize = newSize >> 1; }
internal void ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount); if (newSize != postingsFreeList.Length) { RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
internal void shrinkFreePostings(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, System.Threading.Thread.CurrentThread.Name + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount); if (newSize != postingsFreeList.Length) { RawPostingList[] newArray = new RawPostingList[newSize]; System.Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
internal override void AddTerm(RawPostingList p0) { System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start")); FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList)p0; System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0); if (omitTermFreqAndPositions) { if (docState.docID != p.lastDocID) { System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID); termsHashPerField.WriteVInt(0, p.lastDocCode); p.lastDocCode = docState.docID - p.lastDocID; p.lastDocID = docState.docID; } } else { if (docState.docID != p.lastDocID) { System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID); // Term not yet seen in the current doc but previously // seen in other doc(s) since the last flush // Now that we know doc freq for previous doc, // write it & lastDocCode if (1 == p.docFreq) { termsHashPerField.WriteVInt(0, p.lastDocCode | 1); } else { termsHashPerField.WriteVInt(0, p.lastDocCode); termsHashPerField.WriteVInt(0, p.docFreq); } p.docFreq = 1; p.lastDocCode = (docState.docID - p.lastDocID) << 1; p.lastDocID = docState.docID; WriteProx(p, fieldState.position); } else { p.docFreq++; WriteProx(p, fieldState.position - p.lastPosition); } } }
internal override void AddTerm(RawPostingList p0) { System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start")); FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0; System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0); if (omitTermFreqAndPositions) { if (docState.docID != p.lastDocID) { System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID); termsHashPerField.WriteVInt(0, p.lastDocCode); p.lastDocCode = docState.docID - p.lastDocID; p.lastDocID = docState.docID; } } else { if (docState.docID != p.lastDocID) { System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID); // Term not yet seen in the current doc but previously // seen in other doc(s) since the last flush // Now that we know doc freq for previous doc, // write it & lastDocCode if (1 == p.docFreq) termsHashPerField.WriteVInt(0, p.lastDocCode | 1); else { termsHashPerField.WriteVInt(0, p.lastDocCode); termsHashPerField.WriteVInt(0, p.docFreq); } p.docFreq = 1; p.lastDocCode = (docState.docID - p.lastDocID) << 1; p.lastDocID = docState.docID; WriteProx(p, fieldState.position); } else { p.docFreq++; WriteProx(p, fieldState.position - p.lastPosition); } } }
public void getPostings(RawPostingList[] postings) { lock (this) { System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings start")); System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsFreeList.Length); System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsAllocCount, "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount); int numToCopy; if (postingsFreeCount < postings.Length) numToCopy = postingsFreeCount; else numToCopy = postings.Length; int start = postingsFreeCount - numToCopy; System.Diagnostics.Debug.Assert(start >= 0); System.Diagnostics.Debug.Assert(start + numToCopy <= postingsFreeList.Length); System.Diagnostics.Debug.Assert(numToCopy <= postings.Length); System.Array.Copy(postingsFreeList, start, postings, 0, numToCopy); // Directly allocate the remainder if any if (numToCopy != postings.Length) { int extra = postings.Length - numToCopy; int newPostingsAllocCount = postingsAllocCount + extra; consumer.createPostings(postings, numToCopy, extra); System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings after create")); postingsAllocCount += extra; if (trackAllocations) docWriter.BytesAllocated(extra * bytesPerPosting); if (newPostingsAllocCount > postingsFreeList.Length) // Pre-allocate the postingsFreeList so it's large // enough to hold all postings we've given out postingsFreeList = new RawPostingList[ArrayUtil.GetNextSize(newPostingsAllocCount)]; } postingsFreeCount -= numToCopy; if (trackAllocations) docWriter.BytesUsed(postings.Length * bytesPerPosting); } }
internal override void NewTerm(RawPostingList p0) { // First time we're seeing this term since the last // flush System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start")); FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList)p0; p.lastDocID = docState.docID; if (omitTermFreqAndPositions) { p.lastDocCode = docState.docID; } else { p.lastDocCode = docState.docID << 1; p.docFreq = 1; WriteProx(p, fieldState.position); } }
/** Compares term text for two Posting instance and * returns -1 if p1 < p2; 1 if p1 > p2; else 0. */ int comparePostings(RawPostingList p1, RawPostingList p2) { if (p1 == p2) { return(0); } char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK; char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK; System.Diagnostics.Debug.Assert(text1 != text2 || pos1 != pos2); while (true) { char c1 = text1[pos1++]; char c2 = text2[pos2++]; if (c1 != c2) { if (0xffff == c2) { return(1); } else if (0xffff == c1) { return(-1); } else { return(c1 - c2); } } else { // This method should never compare equal postings // unless p1==p2 System.Diagnostics.Debug.Assert(c1 != 0xffff); } } }
private bool PostingEquals(ref RawPostingList currentP, char[] tokenText, int tokenTextLen) { char[] text = perThread.charPool.buffers[currentP.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; System.Diagnostics.Debug.Assert(text != null); int pos = currentP.textStart & DocumentsWriter.CHAR_BLOCK_MASK; int tokenPos = 0; for (; tokenPos < tokenTextLen; pos++, tokenPos++) { if (tokenText[tokenPos] != text[pos]) { goto ReturnFalse; } } return(0xffff == text[pos]); ReturnFalse: return(false); }
internal override void addTerm(Token t, RawPostingList p0) { System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.addTerm start")); TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList)p0; p.freq++; if (doVectorOffsets) { int startOffset = fieldState.offset + t.StartOffset(); int endOffset = fieldState.offset + t.EndOffset(); termsHashPerField.writeVInt(1, startOffset - p.lastOffset); termsHashPerField.writeVInt(1, endOffset - startOffset); p.lastOffset = endOffset; } if (doVectorPositions) { termsHashPerField.writeVInt(0, fieldState.position - p.lastPosition); p.lastPosition = fieldState.position; } }
internal void ShrinkFreePostings(IDictionary <InvertedDocConsumerPerThread, ICollection <InvertedDocConsumerPerField> > threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = 1; if (newSize != postingsFreeList.Length) { if (postingsFreeCount > newSize) { if (trackAllocations) { docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); } postingsFreeCount = newSize; postingsAllocCount = newSize; } RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
internal override void NewTerm(RawPostingList p0) { System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.newTerm start")); TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList)p0; p.freq = 1; if (doVectorOffsets) { int startOffset = fieldState.offset + offsetAttribute.StartOffset();; int endOffset = fieldState.offset + offsetAttribute.EndOffset(); termsHashPerField.WriteVInt(1, startOffset); termsHashPerField.WriteVInt(1, endOffset - startOffset); p.lastOffset = endOffset; } if (doVectorPositions) { termsHashPerField.WriteVInt(0, fieldState.position); p.lastPosition = fieldState.position; } }
internal void shrinkFreePostings(IDictionary<object, ICollection<object>> threadsAndFields, DocumentsWriter.FlushState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, System.Threading.Thread.CurrentThread.Name + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount); if (newSize != postingsFreeList.Length) { RawPostingList[] newArray = new RawPostingList[newSize]; System.Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
private static bool noNullPostings(RawPostingList[] postings, int count, System.String details) { for (int i = 0; i < count; i++) System.Diagnostics.Debug.Assert(postings[i] != null, "postings[" + i + "] of " + count + " is null: " + details); return true; }
internal override void NewTerm(RawPostingList p0) { System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.newTerm start")); TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList) p0; p.freq = 1; if (doVectorOffsets) { int startOffset = fieldState.offset + offsetAttribute.StartOffset; ; int endOffset = fieldState.offset + offsetAttribute.EndOffset; termsHashPerField.WriteVInt(1, startOffset); termsHashPerField.WriteVInt(1, endOffset - startOffset); p.lastOffset = endOffset; } if (doVectorPositions) { termsHashPerField.WriteVInt(0, fieldState.position); p.lastPosition = fieldState.position; } }
// Secondary entry point (for 2nd & subsequent TermsHash), // because token text has already been "interned" into // textStart, so we hash by textStart public void Add(int textStart) { int code = textStart; int hashPos = code & postingsHashMask; System.Diagnostics.Debug.Assert(!postingsCompacted); // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p != null && p.textStart != textStart) { // Conflict: keep searching different locations in // the hash table. int inc = ((code >> 8) + code) | 1; do { code += inc; hashPos = code & postingsHashMask; p = postingsHash[hashPos]; } while (p != null && p.textStart != textStart); } if (p == null) { // First time we are seeing this token since we last // flushed the hash. // Refill? if (0 == perThread.freePostingsCount) perThread.MorePostings(); // Pull next free RawPostingList from free list p = perThread.freePostings[--perThread.freePostingsCount]; System.Diagnostics.Debug.Assert(p != null); p.textStart = textStart; System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null); postingsHash[hashPos] = p; numPostings++; if (numPostings == postingsHashHalfSize) RehashPostings(2 * postingsHashSize); // Init stream slices if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) intPool.NextBuffer(); if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE) bytePool.NextBuffer(); intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; p.intStart = intUptoStart + intPool.intOffset; for (int i = 0; i < streamCount; i++) { int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart + i] = upto + bytePool.byteOffset; } p.byteStart = intUptos[intUptoStart]; consumer.NewTerm(p); } else { intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; consumer.AddTerm(p); } }
// Secondary entry point (for 2nd & subsequent TermsHash), // because token text has already been "interned" into // textStart, so we hash by textStart public void add(Token token, int textStart) { int code = textStart; int hashPos = code & postingsHashMask; System.Diagnostics.Debug.Assert(!postingsCompacted); // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p != null && p.textStart != textStart) { // Conflict: keep searching different locations in // the hash table. int inc = ((code >> 8) + code) | 1; do { code += inc; hashPos = code & postingsHashMask; p = postingsHash[hashPos]; } while (p != null && p.textStart != textStart); } if (p == null) { // First time we are seeing this token since we last // flushed the hash. // Refill? if (0 == perThread.freePostingsCount) { perThread.morePostings(); } // Pull next free RawPostingList from free list p = perThread.freePostings[--perThread.freePostingsCount]; System.Diagnostics.Debug.Assert(p != null); p.textStart = textStart; System.Diagnostics.Debug.Assert(postingsHash[hashPos] == null); postingsHash[hashPos] = p; numPostings++; if (numPostings == postingsHashHalfSize) { rehashPostings(2 * postingsHashSize); } // Init stream slices if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) { intPool.nextBuffer(); } if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.NextBuffer(); } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; p.intStart = intUptoStart + intPool.intOffset; for (int i = 0; i < streamCount; i++) { int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart + i] = upto + bytePool.byteOffset; } p.byteStart = intUptos[intUptoStart]; consumer.newTerm(token, p); } else { intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; consumer.addTerm(token, p); } }
void quickSort(RawPostingList[] postings, int lo, int hi) { if (lo >= hi) { return; } else if (hi == 1 + lo) { if (comparePostings(postings[lo], postings[hi]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[hi]; postings[hi] = tmp; } return; } int mid = (int)((uint)(lo + hi) >> 1); if (comparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp; } if (comparePostings(postings[mid], postings[hi]) > 0) { RawPostingList tmp = postings[mid]; postings[mid] = postings[hi]; postings[hi] = tmp; if (comparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp2 = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp2; } } int left = lo + 1; int right = hi - 1; if (left >= right) { return; } RawPostingList partition = postings[mid]; for (; ;) { while (comparePostings(postings[right], partition) > 0) { --right; } while (left < right && comparePostings(postings[left], partition) <= 0) { ++left; } if (left < right) { RawPostingList tmp = postings[left]; postings[left] = postings[right]; postings[right] = tmp; --right; } else { break; } } quickSort(postings, lo, left); quickSort(postings, left + 1, hi); }
/// <summary>Compares term text for two Posting instance and /// returns -1 if p1 < p2; 1 if p1 > p2; else 0. /// </summary> internal int ComparePostings(RawPostingList p1, RawPostingList p2) { if (p1 == p2) return 0; char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK; char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK; System.Diagnostics.Debug.Assert(text1 != text2 || pos1 != pos2); while (true) { char c1 = text1[pos1++]; char c2 = text2[pos2++]; if (c1 != c2) { if (0xffff == c2) return 1; else if (0xffff == c1) return - 1; else return c1 - c2; } else // This method should never compare equal postings // unless p1==p2 System.Diagnostics.Debug.Assert(c1 != 0xffff); } }
internal abstract void AddTerm(RawPostingList p);
public void RecyclePostings(RawPostingList[] postings, int numPostings) { lock (this) { System.Diagnostics.Debug.Assert(postings.Length >= numPostings); // Move all Postings from this ThreadState back to our // free list. We pre-allocated this array while we were // creating Postings to make sure it's large enough System.Diagnostics.Debug.Assert(postingsFreeCount + numPostings <= postingsFreeList.Length); Array.Copy(postings, 0, postingsFreeList, postingsFreeCount, numPostings); postingsFreeCount += numPostings; } }
internal abstract void NewTerm(RawPostingList p);
/// <summary>Called when postings hash is too small (> 50% /// occupied) or too large (< 20% occupied). /// </summary> internal void RehashPostings(int newSize) { int newMask = newSize - 1; RawPostingList[] newHash = new RawPostingList[newSize]; for (int i = 0; i < postingsHashSize; i++) { RawPostingList p0 = postingsHash[i]; if (p0 != null) { int code; if (perThread.primary) { int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK; char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos = start; while (text[pos] != 0xffff) pos++; code = 0; while (pos > start) code = (code * 31) + text[--pos]; } else code = p0.textStart; int hashPos = code & newMask; System.Diagnostics.Debug.Assert(hashPos >= 0); if (newHash[hashPos] != null) { int inc = ((code >> 8) + code) | 1; do { code += inc; hashPos = code & newMask; } while (newHash[hashPos] != null); } newHash[hashPos] = p0; } } postingsHashMask = newMask; postingsHash = newHash; postingsHashSize = newSize; postingsHashHalfSize = newSize >> 1; }
// Primary entry point (for first TermsHash) internal override void Add() { System.Diagnostics.Debug.Assert(!postingsCompacted); // We are first in the chain so we must "intern" the // term text into textStart address // Get the text of this term. char[] tokenText = termAtt.TermBuffer(); ; int tokenTextLen = termAtt.TermLength(); // Compute hashcode & replace any invalid UTF16 sequences int downto = tokenTextLen; int code = 0; while (downto > 0) { char ch = tokenText[--downto]; if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) { if (0 == downto) { // Unpaired ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR); } else { char ch2 = tokenText[downto - 1]; if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) { // OK: high followed by low. This is a valid // surrogate pair. code = ((code * 31) + ch) * 31 + ch2; downto--; continue; } else { // Unpaired ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR); } } } else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && (ch <= UnicodeUtil.UNI_SUR_HIGH_END || ch == 0xffff)) { // Unpaired or 0xffff ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR); } code = (code * 31) + ch; } int hashPos = code & postingsHashMask; // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p != null && !PostingEquals(tokenText, tokenTextLen)) { // Conflict: keep searching different locations in // the hash table. int inc = ((code >> 8) + code) | 1; do { code += inc; hashPos = code & postingsHashMask; p = postingsHash[hashPos]; } while (p != null && !PostingEquals(tokenText, tokenTextLen)); } if (p == null) { // First time we are seeing this token since we last // flushed the hash. int textLen1 = 1 + tokenTextLen; if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) { if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) { // Just skip this term, to remain as robust as // possible during indexing. A TokenFilter // can be inserted into the analyzer chain if // other behavior is wanted (pruning the term // to a prefix, throwing an exception, etc). if (docState.maxTermPrefix == null) docState.maxTermPrefix = new System.String(tokenText, 0, 30); consumer.SkippingLongTerm(); return ; } charPool.NextBuffer(); } // Refill? if (0 == perThread.freePostingsCount) perThread.MorePostings(); // Pull next free RawPostingList from free list p = perThread.freePostings[--perThread.freePostingsCount]; System.Diagnostics.Debug.Assert(p != null); char[] text = charPool.buffer; int textUpto = charPool.charUpto; p.textStart = textUpto + charPool.charOffset; charPool.charUpto += textLen1; Array.Copy(tokenText, 0, text, textUpto, tokenTextLen); text[textUpto + tokenTextLen] = (char) (0xffff); System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null); postingsHash[hashPos] = p; numPostings++; if (numPostings == postingsHashHalfSize) RehashPostings(2 * postingsHashSize); // Init stream slices if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) intPool.NextBuffer(); if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE) bytePool.NextBuffer(); intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; p.intStart = intUptoStart + intPool.intOffset; for (int i = 0; i < streamCount; i++) { int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart + i] = upto + bytePool.byteOffset; } p.byteStart = intUptos[intUptoStart]; consumer.NewTerm(p); } else { intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; consumer.AddTerm(p); } if (doNextCall) nextPerField.Add(p.textStart); }
void quickSort(RawPostingList[] postings, int lo, int hi) { if (lo >= hi) return; else if (hi == 1 + lo) { if (comparePostings(postings[lo], postings[hi]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[hi]; postings[hi] = tmp; } return; } int mid = (int)((uint)(lo + hi) >> 1); if (comparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp; } if (comparePostings(postings[mid], postings[hi]) > 0) { RawPostingList tmp = postings[mid]; postings[mid] = postings[hi]; postings[hi] = tmp; if (comparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp2 = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp2; } } int left = lo + 1; int right = hi - 1; if (left >= right) return; RawPostingList partition = postings[mid]; for (; ; ) { while (comparePostings(postings[right], partition) > 0) --right; while (left < right && comparePostings(postings[left], partition) <= 0) ++left; if (left < right) { RawPostingList tmp = postings[left]; postings[left] = postings[right]; postings[right] = tmp; --right; } else { break; } } quickSort(postings, lo, left); quickSort(postings, left + 1, hi); }
internal void QuickSort(RawPostingList[] postings, int lo, int hi) { if (lo >= hi) return ; else if (hi == 1 + lo) { if (ComparePostings(postings[lo], postings[hi]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[hi]; postings[hi] = tmp; } return ; } int mid = Number.URShift((lo + hi), 1); if (ComparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp; } if (ComparePostings(postings[mid], postings[hi]) > 0) { RawPostingList tmp = postings[mid]; postings[mid] = postings[hi]; postings[hi] = tmp; if (ComparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp2 = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp2; } } int left = lo + 1; int right = hi - 1; if (left >= right) return ; RawPostingList partition = postings[mid]; for (; ; ) { while (ComparePostings(postings[right], partition) > 0) --right; while (left < right && ComparePostings(postings[left], partition) <= 0) ++left; if (left < right) { RawPostingList tmp = postings[left]; postings[left] = postings[right]; postings[right] = tmp; --right; } else { break; } } QuickSort(postings, lo, left); QuickSort(postings, left + 1, hi); }
internal abstract void addTerm(Token t, RawPostingList p);
internal abstract void newTerm(Token t, RawPostingList p);
internal abstract void CreatePostings(RawPostingList[] postings, int start, int count);
// Primary entry point (for first TermsHash) internal override void add(Token token) { System.Diagnostics.Debug.Assert(!postingsCompacted); // We are first in the chain so we must "intern" the // term text into textStart address // Get the text of this term. char[] tokenText = token.TermBuffer(); int tokenTextLen = token.TermLength(); // Compute hashcode & replace any invalid UTF16 sequences int downto = tokenTextLen; int code = 0; while (downto > 0) { char ch = tokenText[--downto]; if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) { if (0 == downto) { // Unpaired ch = tokenText[downto] = (char)UnicodeUtil.UNI_REPLACEMENT_CHAR; } else { char ch2 = tokenText[downto - 1]; if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) { // OK: high followed by low. This is a valid // surrogate pair. code = ((code * 31) + ch) * 31 + ch2; downto--; continue; } else { // Unpaired ch = tokenText[downto] = (char)UnicodeUtil.UNI_REPLACEMENT_CHAR; } } } else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END) { // Unpaired ch = tokenText[downto] = (char)UnicodeUtil.UNI_REPLACEMENT_CHAR; } code = (code * 31) + ch; } int hashPos = code & postingsHashMask; // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p != null && !postingEquals(tokenText, tokenTextLen)) { // Conflict: keep searching different locations in // the hash table. int inc = ((code >> 8) + code) | 1; do { code += inc; hashPos = code & postingsHashMask; p = postingsHash[hashPos]; } while (p != null && !postingEquals(tokenText, tokenTextLen)); } if (p == null) { // First time we are seeing this token since we last // flushed the hash. int textLen1 = 1 + tokenTextLen; if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) { if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) { // Just skip this term, to remain as robust as // possible during indexing. A TokenFilter // can be inserted into the analyzer chain if // other behavior is wanted (pruning the term // to a prefix, throwing an exception, etc). if (docState.maxTermPrefix == null) { docState.maxTermPrefix = new System.String(tokenText, 0, 30); } consumer.skippingLongTerm(token); return; } charPool.nextBuffer(); } // Refill? if (0 == perThread.freePostingsCount) { perThread.morePostings(); } // Pull next free RawPostingList from free list p = perThread.freePostings[--perThread.freePostingsCount]; System.Diagnostics.Debug.Assert(p != null); char[] text = charPool.buffer; int textUpto = charPool.charUpto; p.textStart = textUpto + charPool.charOffset; charPool.charUpto += textLen1; System.Array.Copy(tokenText, 0, text, textUpto, tokenTextLen); text[textUpto + tokenTextLen] = (char)0xffff; System.Diagnostics.Debug.Assert(postingsHash[hashPos] == null); postingsHash[hashPos] = p; numPostings++; if (numPostings == postingsHashHalfSize) { rehashPostings(2 * postingsHashSize); } // Init stream slices if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) { intPool.nextBuffer(); } if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.NextBuffer(); } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; p.intStart = intUptoStart + intPool.intOffset; for (int i = 0; i < streamCount; i++) { int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart + i] = upto + bytePool.byteOffset; } p.byteStart = intUptos[intUptoStart]; consumer.newTerm(token, p); } else { intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; consumer.addTerm(token, p); } if (doNextCall) { nextPerField.add(token, p.textStart); } }
private void InitBlock() { postingsHashHalfSize = postingsHashSize / 2; postingsHashMask = postingsHashSize - 1; postingsHash = new RawPostingList[postingsHashSize]; }
internal void QuickSort(RawPostingList[] postings, int lo, int hi) { if (lo >= hi) { return; } else if (hi == 1 + lo) { if (ComparePostings(postings[lo], postings[hi]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[hi]; postings[hi] = tmp; } return; } int mid = Number.URShift((lo + hi), 1); if (ComparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp; } if (ComparePostings(postings[mid], postings[hi]) > 0) { RawPostingList tmp = postings[mid]; postings[mid] = postings[hi]; postings[hi] = tmp; if (ComparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp2 = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp2; } } int left = lo + 1; int right = hi - 1; if (left >= right) { return; } RawPostingList partition = postings[mid]; for (; ;) { while (ComparePostings(postings[right], partition) > 0) { --right; } while (left < right && ComparePostings(postings[left], partition) <= 0) { ++left; } if (left < right) { RawPostingList tmp = postings[left]; postings[left] = postings[right]; postings[right] = tmp; --right; } else { break; } } QuickSort(postings, lo, left); QuickSort(postings, left + 1, hi); }
internal override void NewTerm(RawPostingList p0) { // First time we're seeing this term since the last // flush System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start")); FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0; p.lastDocID = docState.docID; if (omitTermFreqAndPositions) { p.lastDocCode = docState.docID; } else { p.lastDocCode = docState.docID << 1; p.docFreq = 1; WriteProx(p, fieldState.position); } }
internal override void CreatePostings(RawPostingList[] postings, int start, int count) { int end = start + count; for (int i = start; i < end; i++) postings[i] = new PostingList(); }
internal void ShrinkFreePostings(IDictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = 1; if (newSize != postingsFreeList.Length) { if (postingsFreeCount > newSize) { if (trackAllocations) { docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); } postingsFreeCount = newSize; postingsAllocCount = newSize; } RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }