Exemple #1
0
        public void  InitReader(ByteSliceReader reader, RawPostingList p, int stream)
        {
            System.Diagnostics.Debug.Assert(stream < streamCount);
            int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
            int   upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK;

            reader.Init(bytePool, p.byteStart + stream * ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto + stream]);
        }
Exemple #2
0
        /// <summary>Called when postings hash is too small (> 50%
        /// occupied) or too large (&lt; 20% occupied).
        /// </summary>
        internal void  RehashPostings(int newSize)
        {
            int newMask = newSize - 1;

            RawPostingList[] newHash = new RawPostingList[newSize];
            for (int i = 0; i < postingsHashSize; i++)
            {
                RawPostingList p0 = postingsHash[i];
                if (p0 != null)
                {
                    int code;
                    if (perThread.primary)
                    {
                        int    start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
                        char[] text  = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
                        int    pos   = start;
                        while (text[pos] != 0xffff)
                        {
                            pos++;
                        }
                        code = 0;
                        while (pos > start)
                        {
                            code = (code * 31) + text[--pos];
                        }
                    }
                    else
                    {
                        code = p0.textStart;
                    }

                    int hashPos = code & newMask;
                    System.Diagnostics.Debug.Assert(hashPos >= 0);
                    if (newHash[hashPos] != null)
                    {
                        int inc = ((code >> 8) + code) | 1;
                        do
                        {
                            code   += inc;
                            hashPos = code & newMask;
                        }while (newHash[hashPos] != null);
                    }
                    newHash[hashPos] = p0;
                }
            }

            postingsHashMask     = newMask;
            postingsHash         = newHash;
            postingsHashSize     = newSize;
            postingsHashHalfSize = newSize >> 1;
        }
        internal override void  AddTerm(RawPostingList p0)
        {
            System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));

            FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList)p0;

            System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0);

            if (omitTermFreqAndPositions)
            {
                if (docState.docID != p.lastDocID)
                {
                    System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
                    termsHashPerField.WriteVInt(0, p.lastDocCode);
                    p.lastDocCode = docState.docID - p.lastDocID;
                    p.lastDocID   = docState.docID;
                }
            }
            else
            {
                if (docState.docID != p.lastDocID)
                {
                    System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
                    // Term not yet seen in the current doc but previously
                    // seen in other doc(s) since the last flush

                    // Now that we know doc freq for previous doc,
                    // write it & lastDocCode
                    if (1 == p.docFreq)
                    {
                        termsHashPerField.WriteVInt(0, p.lastDocCode | 1);
                    }
                    else
                    {
                        termsHashPerField.WriteVInt(0, p.lastDocCode);
                        termsHashPerField.WriteVInt(0, p.docFreq);
                    }
                    p.docFreq     = 1;
                    p.lastDocCode = (docState.docID - p.lastDocID) << 1;
                    p.lastDocID   = docState.docID;
                    WriteProx(p, fieldState.position);
                }
                else
                {
                    p.docFreq++;
                    WriteProx(p, fieldState.position - p.lastPosition);
                }
            }
        }
 internal override void  NewTerm(RawPostingList p0)
 {
     // First time we're seeing this term since the last
     // flush
     System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start"));
     FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList)p0;
     p.lastDocID = docState.docID;
     if (omitTermFreqAndPositions)
     {
         p.lastDocCode = docState.docID;
     }
     else
     {
         p.lastDocCode = docState.docID << 1;
         p.docFreq     = 1;
         WriteProx(p, fieldState.position);
     }
 }
Exemple #5
0
        /// <summary>Compares term text for two Posting instance and
        /// returns -1 if p1 &lt; p2; 1 if p1 &gt; p2; else 0.
        /// </summary>
        internal int ComparePostings(RawPostingList p1, RawPostingList p2)
        {
            if (p1 == p2)
            {
                return(0);
            }

            char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
            int    pos1  = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK;

            char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
            int    pos2  = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK;

            System.Diagnostics.Debug.Assert(text1 != text2 || pos1 != pos2);

            while (true)
            {
                char c1 = text1[pos1++];
                char c2 = text2[pos2++];
                if (c1 != c2)
                {
                    if (0xffff == c2)
                    {
                        return(1);
                    }
                    else if (0xffff == c1)
                    {
                        return(-1);
                    }
                    else
                    {
                        return(c1 - c2);
                    }
                }
                else
                {
                    // This method should never compare equal postings
                    // unless p1==p2
                    System.Diagnostics.Debug.Assert(c1 != 0xffff);
                }
            }
        }
Exemple #6
0
        internal override void  AddTerm(RawPostingList p0)
        {
            System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.addTerm start"));

            TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList)p0;
            p.freq++;

            if (doVectorOffsets)
            {
                int startOffset = fieldState.offset + offsetAttribute.StartOffset();;
                int endOffset   = fieldState.offset + offsetAttribute.EndOffset();

                termsHashPerField.WriteVInt(1, startOffset - p.lastOffset);
                termsHashPerField.WriteVInt(1, endOffset - startOffset);
                p.lastOffset = endOffset;
            }

            if (doVectorPositions)
            {
                termsHashPerField.WriteVInt(0, fieldState.position - p.lastPosition);
                p.lastPosition = fieldState.position;
            }
        }
Exemple #7
0
        internal void  ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer);

            int newSize = 1;

            if (newSize != postingsFreeList.Length)
            {
                if (postingsFreeCount > newSize)
                {
                    if (trackAllocations)
                    {
                        docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting);
                    }
                    postingsFreeCount  = newSize;
                    postingsAllocCount = newSize;
                }

                RawPostingList[] newArray = new RawPostingList[newSize];
                Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
                postingsFreeList = newArray;
            }
        }
		internal override void  AddTerm(RawPostingList p0)
		{
			
			System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.addTerm start"));
			
			TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList) p0;
			p.freq++;
			
			if (doVectorOffsets)
			{
				int startOffset = fieldState.offset + offsetAttribute.StartOffset(); ;
				int endOffset = fieldState.offset + offsetAttribute.EndOffset();
				
				termsHashPerField.WriteVInt(1, startOffset - p.lastOffset);
				termsHashPerField.WriteVInt(1, endOffset - startOffset);
				p.lastOffset = endOffset;
			}
			
			if (doVectorPositions)
			{
				termsHashPerField.WriteVInt(0, fieldState.position - p.lastPosition);
				p.lastPosition = fieldState.position;
			}
		}
Exemple #9
0
        // Primary entry point (for first TermsHash)
        internal override void  Add()
        {
            System.Diagnostics.Debug.Assert(!postingsCompacted);

            // We are first in the chain so we must "intern" the
            // term text into textStart address

            // Get the text of this term.
            char[] tokenText = termAtt.TermBuffer();
            ;
            int tokenTextLen = termAtt.TermLength();

            // Compute hashcode & replace any invalid UTF16 sequences
            int downto = tokenTextLen;
            int code   = 0;

            while (downto > 0)
            {
                char ch = tokenText[--downto];

                if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END)
                {
                    if (0 == downto)
                    {
                        // Unpaired
                        ch = tokenText[downto] = (char)(UnicodeUtil.UNI_REPLACEMENT_CHAR);
                    }
                    else
                    {
                        char ch2 = tokenText[downto - 1];
                        if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END)
                        {
                            // OK: high followed by low.  This is a valid
                            // surrogate pair.
                            code = ((code * 31) + ch) * 31 + ch2;
                            downto--;
                            continue;
                        }
                        else
                        {
                            // Unpaired
                            ch = tokenText[downto] = (char)(UnicodeUtil.UNI_REPLACEMENT_CHAR);
                        }
                    }
                }
                else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && (ch <= UnicodeUtil.UNI_SUR_HIGH_END || ch == 0xffff))
                {
                    // Unpaired or 0xffff
                    ch = tokenText[downto] = (char)(UnicodeUtil.UNI_REPLACEMENT_CHAR);
                }

                code = (code * 31) + ch;
            }

            int hashPos = code & postingsHashMask;

            // Locate RawPostingList in hash
            p = postingsHash[hashPos];

            if (p != null && !PostingEquals(tokenText, tokenTextLen))
            {
                // Conflict: keep searching different locations in
                // the hash table.
                int inc = ((code >> 8) + code) | 1;
                do
                {
                    code   += inc;
                    hashPos = code & postingsHashMask;
                    p       = postingsHash[hashPos];
                }while (p != null && !PostingEquals(tokenText, tokenTextLen));
            }

            if (p == null)
            {
                // First time we are seeing this token since we last
                // flushed the hash.
                int textLen1 = 1 + tokenTextLen;
                if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE)
                {
                    if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE)
                    {
                        // Just skip this term, to remain as robust as
                        // possible during indexing.  A TokenFilter
                        // can be inserted into the analyzer chain if
                        // other behavior is wanted (pruning the term
                        // to a prefix, throwing an exception, etc).

                        if (docState.maxTermPrefix == null)
                        {
                            docState.maxTermPrefix = new System.String(tokenText, 0, 30);
                        }

                        consumer.SkippingLongTerm();
                        return;
                    }
                    charPool.NextBuffer();
                }

                // Refill?
                if (0 == perThread.freePostingsCount)
                {
                    perThread.MorePostings();
                }

                // Pull next free RawPostingList from free list
                p = perThread.freePostings[--perThread.freePostingsCount];
                System.Diagnostics.Debug.Assert(p != null);

                char[] text     = charPool.buffer;
                int    textUpto = charPool.charUpto;
                p.textStart        = textUpto + charPool.charOffset;
                charPool.charUpto += textLen1;
                Array.Copy(tokenText, 0, text, textUpto, tokenTextLen);
                text[textUpto + tokenTextLen] = (char)(0xffff);

                System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null);
                postingsHash[hashPos] = p;
                numPostings++;

                if (numPostings == postingsHashHalfSize)
                {
                    RehashPostings(2 * postingsHashSize);
                }

                // Init stream slices
                if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
                {
                    intPool.NextBuffer();
                }

                if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE)
                {
                    bytePool.NextBuffer();
                }

                intUptos         = intPool.buffer;
                intUptoStart     = intPool.intUpto;
                intPool.intUpto += streamCount;

                p.intStart = intUptoStart + intPool.intOffset;

                for (int i = 0; i < streamCount; i++)
                {
                    int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
                    intUptos[intUptoStart + i] = upto + bytePool.byteOffset;
                }
                p.byteStart = intUptos[intUptoStart];

                consumer.NewTerm(p);
            }
            else
            {
                intUptos     = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
                intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
                consumer.AddTerm(p);
            }

            if (doNextCall)
            {
                nextPerField.Add(p.textStart);
            }
        }
		internal override void  CreatePostings(RawPostingList[] postings, int start, int count)
		{
			int end = start + count;
			for (int i = start; i < end; i++)
				postings[i] = new PostingList();
		}
		internal abstract void  AddTerm(RawPostingList p);
Exemple #12
0
        // Secondary entry point (for 2nd & subsequent TermsHash),
        // because token text has already been "interned" into
        // textStart, so we hash by textStart
        public void  Add(int textStart)
        {
            int code = textStart;

            int hashPos = code & postingsHashMask;

            System.Diagnostics.Debug.Assert(!postingsCompacted);

            // Locate RawPostingList in hash
            p = postingsHash[hashPos];

            if (p != null && p.textStart != textStart)
            {
                // Conflict: keep searching different locations in
                // the hash table.
                int inc = ((code >> 8) + code) | 1;
                do
                {
                    code   += inc;
                    hashPos = code & postingsHashMask;
                    p       = postingsHash[hashPos];
                }while (p != null && p.textStart != textStart);
            }

            if (p == null)
            {
                // First time we are seeing this token since we last
                // flushed the hash.

                // Refill?
                if (0 == perThread.freePostingsCount)
                {
                    perThread.MorePostings();
                }

                // Pull next free RawPostingList from free list
                p = perThread.freePostings[--perThread.freePostingsCount];
                System.Diagnostics.Debug.Assert(p != null);

                p.textStart = textStart;

                System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null);
                postingsHash[hashPos] = p;
                numPostings++;

                if (numPostings == postingsHashHalfSize)
                {
                    RehashPostings(2 * postingsHashSize);
                }

                // Init stream slices
                if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
                {
                    intPool.NextBuffer();
                }

                if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE)
                {
                    bytePool.NextBuffer();
                }

                intUptos         = intPool.buffer;
                intUptoStart     = intPool.intUpto;
                intPool.intUpto += streamCount;

                p.intStart = intUptoStart + intPool.intOffset;

                for (int i = 0; i < streamCount; i++)
                {
                    int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
                    intUptos[intUptoStart + i] = upto + bytePool.byteOffset;
                }
                p.byteStart = intUptos[intUptoStart];

                consumer.NewTerm(p);
            }
            else
            {
                intUptos     = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
                intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
                consumer.AddTerm(p);
            }
        }
Exemple #13
0
		/// <summary>Compares term text for two Posting instance and
        /// returns -1 if p1 &lt; p2; 1 if p1 &gt; p2; else 0. 
		/// </summary>
		internal int ComparePostings(RawPostingList p1, RawPostingList p2)
		{
			
			if (p1 == p2)
				return 0;
			
			char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
			int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
			char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
			int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
			
			System.Diagnostics.Debug.Assert(text1 != text2 || pos1 != pos2);
			
			while (true)
			{
				char c1 = text1[pos1++];
				char c2 = text2[pos2++];
				if (c1 != c2)
				{
					if (0xffff == c2)
						return 1;
					else if (0xffff == c1)
						return - 1;
					else
						return c1 - c2;
				}
				else
					// This method should never compare equal postings
					// unless p1==p2
					System.Diagnostics.Debug.Assert(c1 != 0xffff);
			}
		}
Exemple #14
0
		internal void  QuickSort(RawPostingList[] postings, int lo, int hi)
		{
			if (lo >= hi)
				return ;
			else if (hi == 1 + lo)
			{
				if (ComparePostings(postings[lo], postings[hi]) > 0)
				{
					RawPostingList tmp = postings[lo];
					postings[lo] = postings[hi];
					postings[hi] = tmp;
				}
				return ;
			}
			
			int mid = SupportClass.Number.URShift((lo + hi), 1);
			
			if (ComparePostings(postings[lo], postings[mid]) > 0)
			{
				RawPostingList tmp = postings[lo];
				postings[lo] = postings[mid];
				postings[mid] = tmp;
			}
			
			if (ComparePostings(postings[mid], postings[hi]) > 0)
			{
				RawPostingList tmp = postings[mid];
				postings[mid] = postings[hi];
				postings[hi] = tmp;
				
				if (ComparePostings(postings[lo], postings[mid]) > 0)
				{
					RawPostingList tmp2 = postings[lo];
					postings[lo] = postings[mid];
					postings[mid] = tmp2;
				}
			}
			
			int left = lo + 1;
			int right = hi - 1;
			
			if (left >= right)
				return ;
			
			RawPostingList partition = postings[mid];
			
			for (; ; )
			{
				while (ComparePostings(postings[right], partition) > 0)
					--right;
				
				while (left < right && ComparePostings(postings[left], partition) <= 0)
					++left;
				
				if (left < right)
				{
					RawPostingList tmp = postings[left];
					postings[left] = postings[right];
					postings[right] = tmp;
					--right;
				}
				else
				{
					break;
				}
			}
			
			QuickSort(postings, lo, left);
			QuickSort(postings, left + 1, hi);
		}
Exemple #15
0
		private static bool noNullPostings(RawPostingList[] postings, int count, System.String details)
		{
			for (int i = 0; i < count; i++)
				System.Diagnostics.Debug.Assert(postings[i] != null, "postings[" + i + "] of " + count + " is null: " + details);
			return true;
		}
 internal abstract void  NewTerm(RawPostingList p);
Exemple #17
0
		internal abstract void  CreatePostings(RawPostingList[] postings, int start, int count);
Exemple #18
0
		/// <summary>Called when postings hash is too small (> 50%
        /// occupied) or too large (&lt; 20% occupied). 
		/// </summary>
		internal void  RehashPostings(int newSize)
		{
			
			int newMask = newSize - 1;
			
			RawPostingList[] newHash = new RawPostingList[newSize];
			for (int i = 0; i < postingsHashSize; i++)
			{
				RawPostingList p0 = postingsHash[i];
				if (p0 != null)
				{
					int code;
					if (perThread.primary)
					{
						int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
						char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
						int pos = start;
						while (text[pos] != 0xffff)
							pos++;
						code = 0;
						while (pos > start)
							code = (code * 31) + text[--pos];
					}
					else
						code = p0.textStart;
					
					int hashPos = code & newMask;
					System.Diagnostics.Debug.Assert(hashPos >= 0);
					if (newHash[hashPos] != null)
					{
						int inc = ((code >> 8) + code) | 1;
						do 
						{
							code += inc;
							hashPos = code & newMask;
						}
						while (newHash[hashPos] != null);
					}
					newHash[hashPos] = p0;
				}
			}
			
			postingsHashMask = newMask;
			postingsHash = newHash;
			postingsHashSize = newSize;
			postingsHashHalfSize = newSize >> 1;
		}
Exemple #19
0
		// Primary entry point (for first TermsHash)
		internal override void  Add()
		{
			
			System.Diagnostics.Debug.Assert(!postingsCompacted);
			
			// We are first in the chain so we must "intern" the
			// term text into textStart address
			
			// Get the text of this term.
			char[] tokenText = termAtt.TermBuffer();
			;
			int tokenTextLen = termAtt.TermLength();
			
			// Compute hashcode & replace any invalid UTF16 sequences
			int downto = tokenTextLen;
			int code = 0;
			while (downto > 0)
			{
				char ch = tokenText[--downto];
				
				if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END)
				{
					if (0 == downto)
					{
						// Unpaired
						ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR);
					}
					else
					{
						char ch2 = tokenText[downto - 1];
						if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END)
						{
							// OK: high followed by low.  This is a valid
							// surrogate pair.
							code = ((code * 31) + ch) * 31 + ch2;
							downto--;
							continue;
						}
						else
						{
							// Unpaired
							ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR);
						}
					}
				}
				else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && (ch <= UnicodeUtil.UNI_SUR_HIGH_END || ch == 0xffff))
				{
					// Unpaired or 0xffff
					ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR);
				}
				
				code = (code * 31) + ch;
			}
			
			int hashPos = code & postingsHashMask;
			
			// Locate RawPostingList in hash
			p = postingsHash[hashPos];
			
			if (p != null && !PostingEquals(tokenText, tokenTextLen))
			{
				// Conflict: keep searching different locations in
				// the hash table.
				int inc = ((code >> 8) + code) | 1;
				do 
				{
					code += inc;
					hashPos = code & postingsHashMask;
					p = postingsHash[hashPos];
				}
				while (p != null && !PostingEquals(tokenText, tokenTextLen));
			}
			
			if (p == null)
			{
				
				// First time we are seeing this token since we last
				// flushed the hash.
				int textLen1 = 1 + tokenTextLen;
				if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE)
				{
					if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE)
					{
						// Just skip this term, to remain as robust as
						// possible during indexing.  A TokenFilter
						// can be inserted into the analyzer chain if
						// other behavior is wanted (pruning the term
						// to a prefix, throwing an exception, etc).
						
						if (docState.maxTermPrefix == null)
							docState.maxTermPrefix = new System.String(tokenText, 0, 30);
						
						consumer.SkippingLongTerm();
						return ;
					}
					charPool.NextBuffer();
				}
				
				// Refill?
				if (0 == perThread.freePostingsCount)
					perThread.MorePostings();
				
				// Pull next free RawPostingList from free list
				p = perThread.freePostings[--perThread.freePostingsCount];
				System.Diagnostics.Debug.Assert(p != null);
				
				char[] text = charPool.buffer;
				int textUpto = charPool.charUpto;
				p.textStart = textUpto + charPool.charOffset;
				charPool.charUpto += textLen1;
				Array.Copy(tokenText, 0, text, textUpto, tokenTextLen);
				text[textUpto + tokenTextLen] = (char) (0xffff);
				
				System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null);
				postingsHash[hashPos] = p;
				numPostings++;
				
				if (numPostings == postingsHashHalfSize)
					RehashPostings(2 * postingsHashSize);
				
				// Init stream slices
				if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
					intPool.NextBuffer();
				
				if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE)
					bytePool.NextBuffer();
				
				intUptos = intPool.buffer;
				intUptoStart = intPool.intUpto;
				intPool.intUpto += streamCount;
				
				p.intStart = intUptoStart + intPool.intOffset;
				
				for (int i = 0; i < streamCount; i++)
				{
					int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
					intUptos[intUptoStart + i] = upto + bytePool.byteOffset;
				}
				p.byteStart = intUptos[intUptoStart];
				
				consumer.NewTerm(p);
			}
			else
			{
				intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
				intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
				consumer.AddTerm(p);
			}
			
			if (doNextCall)
				nextPerField.Add(p.textStart);
		}
Exemple #20
0
		// Secondary entry point (for 2nd & subsequent TermsHash),
		// because token text has already been "interned" into
		// textStart, so we hash by textStart
		public void  Add(int textStart)
		{
			
			int code = textStart;
			
			int hashPos = code & postingsHashMask;
			
			System.Diagnostics.Debug.Assert(!postingsCompacted);
			
			// Locate RawPostingList in hash
			p = postingsHash[hashPos];
			
			if (p != null && p.textStart != textStart)
			{
				// Conflict: keep searching different locations in
				// the hash table.
				int inc = ((code >> 8) + code) | 1;
				do 
				{
					code += inc;
					hashPos = code & postingsHashMask;
					p = postingsHash[hashPos];
				}
				while (p != null && p.textStart != textStart);
			}
			
			if (p == null)
			{
				
				// First time we are seeing this token since we last
				// flushed the hash.
				
				// Refill?
				if (0 == perThread.freePostingsCount)
					perThread.MorePostings();
				
				// Pull next free RawPostingList from free list
				p = perThread.freePostings[--perThread.freePostingsCount];
				System.Diagnostics.Debug.Assert(p != null);
				
				p.textStart = textStart;
				
				System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null);
				postingsHash[hashPos] = p;
				numPostings++;
				
				if (numPostings == postingsHashHalfSize)
					RehashPostings(2 * postingsHashSize);
				
				// Init stream slices
				if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
					intPool.NextBuffer();
				
				if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE)
					bytePool.NextBuffer();
				
				intUptos = intPool.buffer;
				intUptoStart = intPool.intUpto;
				intPool.intUpto += streamCount;
				
				p.intStart = intUptoStart + intPool.intOffset;
				
				for (int i = 0; i < streamCount; i++)
				{
					int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
					intUptos[intUptoStart + i] = upto + bytePool.byteOffset;
				}
				p.byteStart = intUptos[intUptoStart];
				
				consumer.NewTerm(p);
			}
			else
			{
				intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
				intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
				consumer.AddTerm(p);
			}
		}
Exemple #21
0
		private void  InitBlock()
		{
			postingsHashHalfSize = postingsHashSize / 2;
			postingsHashMask = postingsHashSize - 1;
			postingsHash = new RawPostingList[postingsHashSize];
		}
 internal abstract void  AddTerm(RawPostingList p);
Exemple #23
0
		public void  GetPostings(RawPostingList[] postings)
		{
			lock (this)
			{
				
				System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings start"));
				
				System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsFreeList.Length);
				System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsAllocCount, "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount);
				
				int numToCopy;
				if (postingsFreeCount < postings.Length)
					numToCopy = postingsFreeCount;
				else
					numToCopy = postings.Length;
				int start = postingsFreeCount - numToCopy;
				System.Diagnostics.Debug.Assert(start >= 0);
				System.Diagnostics.Debug.Assert(start + numToCopy <= postingsFreeList.Length);
				System.Diagnostics.Debug.Assert(numToCopy <= postings.Length);
				Array.Copy(postingsFreeList, start, postings, 0, numToCopy);
				
				// Directly allocate the remainder if any
				if (numToCopy != postings.Length)
				{
					int extra = postings.Length - numToCopy;
					int newPostingsAllocCount = postingsAllocCount + extra;
					
					consumer.CreatePostings(postings, numToCopy, extra);
					System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings after create"));
					postingsAllocCount += extra;
					
					if (trackAllocations)
						docWriter.BytesAllocated(extra * bytesPerPosting);
					
					if (newPostingsAllocCount > postingsFreeList.Length)
					// Pre-allocate the postingsFreeList so it's large
					// enough to hold all postings we've given out
						postingsFreeList = new RawPostingList[ArrayUtil.GetNextSize(newPostingsAllocCount)];
				}
				
				postingsFreeCount -= numToCopy;
				
				if (trackAllocations)
					docWriter.BytesUsed(postings.Length * bytesPerPosting);
			}
		}
		internal override void  NewTerm(RawPostingList p0)
		{
			// First time we're seeing this term since the last
			// flush
			System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start"));
			FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
			p.lastDocID = docState.docID;
			if (omitTermFreqAndPositions)
			{
				p.lastDocCode = docState.docID;
			}
			else
			{
				p.lastDocCode = docState.docID << 1;
				p.docFreq = 1;
				WriteProx(p, fieldState.position);
			}
		}
Exemple #25
0
		public void  InitReader(ByteSliceReader reader, RawPostingList p, int stream)
		{
			System.Diagnostics.Debug.Assert(stream < streamCount);
			int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
			int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
			reader.Init(bytePool, p.byteStart + stream * ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto + stream]);
		}
Exemple #26
0
		public void  RecyclePostings(RawPostingList[] postings, int numPostings)
		{
			lock (this)
			{
				
				System.Diagnostics.Debug.Assert(postings.Length >= numPostings);
				
				// Move all Postings from this ThreadState back to our
				// free list.  We pre-allocated this array while we were
				// creating Postings to make sure it's large enough
				System.Diagnostics.Debug.Assert(postingsFreeCount + numPostings <= postingsFreeList.Length);
				Array.Copy(postings, 0, postingsFreeList, postingsFreeCount, numPostings);
				postingsFreeCount += numPostings;
			}
		}
Exemple #27
0
        internal void  QuickSort(RawPostingList[] postings, int lo, int hi)
        {
            if (lo >= hi)
            {
                return;
            }
            else if (hi == 1 + lo)
            {
                if (ComparePostings(postings[lo], postings[hi]) > 0)
                {
                    RawPostingList tmp = postings[lo];
                    postings[lo] = postings[hi];
                    postings[hi] = tmp;
                }
                return;
            }

            int mid = SupportClass.Number.URShift((lo + hi), 1);

            if (ComparePostings(postings[lo], postings[mid]) > 0)
            {
                RawPostingList tmp = postings[lo];
                postings[lo]  = postings[mid];
                postings[mid] = tmp;
            }

            if (ComparePostings(postings[mid], postings[hi]) > 0)
            {
                RawPostingList tmp = postings[mid];
                postings[mid] = postings[hi];
                postings[hi]  = tmp;

                if (ComparePostings(postings[lo], postings[mid]) > 0)
                {
                    RawPostingList tmp2 = postings[lo];
                    postings[lo]  = postings[mid];
                    postings[mid] = tmp2;
                }
            }

            int left  = lo + 1;
            int right = hi - 1;

            if (left >= right)
            {
                return;
            }

            RawPostingList partition = postings[mid];

            for (; ;)
            {
                while (ComparePostings(postings[right], partition) > 0)
                {
                    --right;
                }

                while (left < right && ComparePostings(postings[left], partition) <= 0)
                {
                    ++left;
                }

                if (left < right)
                {
                    RawPostingList tmp = postings[left];
                    postings[left]  = postings[right];
                    postings[right] = tmp;
                    --right;
                }
                else
                {
                    break;
                }
            }

            QuickSort(postings, lo, left);
            QuickSort(postings, left + 1, hi);
        }
Exemple #28
0
		internal void  ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
		{
			
			System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer);

            int newSize = 1;
			if (newSize != postingsFreeList.Length)
			{
                if (postingsFreeCount > newSize)
                {
                    if (trackAllocations)
                    {
                        docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting);
                    }
                    postingsFreeCount = newSize;
                    postingsAllocCount = newSize;
                }

				RawPostingList[] newArray = new RawPostingList[newSize];
				Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
				postingsFreeList = newArray;
			}
		}
Exemple #29
0
 private void  InitBlock()
 {
     postingsHashHalfSize = postingsHashSize / 2;
     postingsHashMask     = postingsHashSize - 1;
     postingsHash         = new RawPostingList[postingsHashSize];
 }
		internal override void  AddTerm(RawPostingList p0)
		{
			
			System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));
			
			FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
			
			System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0);
			
			if (omitTermFreqAndPositions)
			{
				if (docState.docID != p.lastDocID)
				{
					System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
					termsHashPerField.WriteVInt(0, p.lastDocCode);
					p.lastDocCode = docState.docID - p.lastDocID;
					p.lastDocID = docState.docID;
				}
			}
			else
			{
				if (docState.docID != p.lastDocID)
				{
					System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
					// Term not yet seen in the current doc but previously
					// seen in other doc(s) since the last flush
					
					// Now that we know doc freq for previous doc,
					// write it & lastDocCode
					if (1 == p.docFreq)
						termsHashPerField.WriteVInt(0, p.lastDocCode | 1);
					else
					{
						termsHashPerField.WriteVInt(0, p.lastDocCode);
						termsHashPerField.WriteVInt(0, p.docFreq);
					}
					p.docFreq = 1;
					p.lastDocCode = (docState.docID - p.lastDocID) << 1;
					p.lastDocID = docState.docID;
					WriteProx(p, fieldState.position);
				}
				else
				{
					p.docFreq++;
					WriteProx(p, fieldState.position - p.lastPosition);
				}
			}
		}
		internal abstract void  NewTerm(RawPostingList p);