internal void writeProx(Token t, FreqProxTermsWriter.PostingList p, int proxCode)
        {
            Payload payload = t.GetPayload();

            if (payload != null && payload.length > 0)
            {
                termsHashPerField.writeVInt(1, (proxCode << 1) | 1);
                termsHashPerField.writeVInt(1, payload.length);
                termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
                hasPayloads = true;
            }
            else
            {
                termsHashPerField.writeVInt(1, proxCode << 1);
            }
            p.lastPosition = fieldState.position;
        }
Пример #2
0
				/// <summary>This is the hotspot of indexing: it's called once
				/// for every term of every document.  Its job is to *
				/// update the postings byte stream (Postings hash) *
				/// based on the occurence of a single term. 
				/// </summary>
				private void  AddPosition(Token token)
				{
					
					Payload payload = token.GetPayload();
					
					// Get the text of this term.  Term can either
					// provide a String token or offset into a char[]
					// array
					char[] tokenText = token.TermBuffer();
					int tokenTextLen = token.TermLength();
					
					int code = 0;
					
					// Compute hashcode
					int downto = tokenTextLen;
					while (downto > 0)
						code = (code * 31) + tokenText[--downto];
					
					// System.out.println("  addPosition: buffer=" + new String(tokenText, 0, tokenTextLen) + " pos=" + position + " offsetStart=" + (offset+token.startOffset()) + " offsetEnd=" + (offset + token.endOffset()) + " docID=" + docID + " doPos=" + doVectorPositions + " doOffset=" + doVectorOffsets);
					
					int hashPos = code & postingsHashMask;
					
					System.Diagnostics.Debug.Assert(!postingsCompacted);
					
					// Locate Posting in hash
					Enclosing_Instance.p = postingsHash[hashPos];
					
					if (Enclosing_Instance.p != null && !Enclosing_Instance.PostingEquals(tokenText, tokenTextLen))
					{
						// Conflict: keep searching different locations in
						// the hash table.
						int inc = ((code >> 8) + code) | 1;
						do 
						{
							code += inc;
							hashPos = code & postingsHashMask;
							Enclosing_Instance.p = postingsHash[hashPos];
						}
						while (Enclosing_Instance.p != null && !Enclosing_Instance.PostingEquals(tokenText, tokenTextLen));
					}
					
					int proxCode;
					
					// If we hit an exception below, it's possible the
					// posting list or term vectors data will be
					// partially written and thus inconsistent if
					// flushed, so we have to abort all documents
					// since the last flush:
					
					try
					{
						
						if (Enclosing_Instance.p != null)
						{
							// term seen since last flush
							
							if (Enclosing_Instance.docID != Enclosing_Instance.p.lastDocID)
							{
								// term not yet seen in this doc
								
								// System.out.println("    seen before (new docID=" + docID + ") freqUpto=" + p.freqUpto +" proxUpto=" + p.proxUpto);

								System.Diagnostics.Debug.Assert(Enclosing_Instance.p.docFreq > 0);
								
								// Now that we know doc freq for previous doc,
								// write it & lastDocCode
								Enclosing_Instance.freqUpto = Enclosing_Instance.p.freqUpto & Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_MASK;
								Enclosing_Instance.freq = Enclosing_Instance.postingsPool.buffers[Enclosing_Instance.p.freqUpto >> Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_SHIFT];
								if (1 == Enclosing_Instance.p.docFreq)
									Enclosing_Instance.WriteFreqVInt(Enclosing_Instance.p.lastDocCode | 1);
								else
								{
									Enclosing_Instance.WriteFreqVInt(Enclosing_Instance.p.lastDocCode);
									Enclosing_Instance.WriteFreqVInt(Enclosing_Instance.p.docFreq);
								}
								Enclosing_Instance.p.freqUpto = Enclosing_Instance.freqUpto + (Enclosing_Instance.p.freqUpto & Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_NOT_MASK);
								
								if (doVectors)
								{
									Enclosing_Instance.vector = AddNewVector();
									if (doVectorOffsets)
									{
										offsetStartCode = offsetStart = offset + token.StartOffset();
										offsetEnd = offset + token.EndOffset();
									}
								}
								
								proxCode = position;
								
								Enclosing_Instance.p.docFreq = 1;
								
								// Store code so we can write this after we're
								// done with this new doc
								Enclosing_Instance.p.lastDocCode = (Enclosing_Instance.docID - Enclosing_Instance.p.lastDocID) << 1;
								Enclosing_Instance.p.lastDocID = Enclosing_Instance.docID;
							}
							else
							{
								// term already seen in this doc
								// System.out.println("    seen before (same docID=" + docID + ") proxUpto=" + p.proxUpto);
								Enclosing_Instance.p.docFreq++;
								
								proxCode = position - Enclosing_Instance.p.lastPosition;
								
								if (doVectors)
								{
									Enclosing_Instance.vector = Enclosing_Instance.p.vector;
									if (Enclosing_Instance.vector == null)
										Enclosing_Instance.vector = AddNewVector();
									if (doVectorOffsets)
									{
										offsetStart = offset + token.StartOffset();
										offsetEnd = offset + token.EndOffset();
										offsetStartCode = offsetStart - Enclosing_Instance.vector.lastOffset;
									}
								}
							}
						}
						else
						{
							// term not seen before
							// System.out.println("    never seen docID=" + docID);
							
							// Refill?
							if (0 == Enclosing_Instance.postingsFreeCount)
							{
								Enclosing_Instance.Enclosing_Instance.GetPostings(Enclosing_Instance.postingsFreeList);
								Enclosing_Instance.postingsFreeCount = Enclosing_Instance.postingsFreeList.Length;
							}
							
							int textLen1 = 1 + tokenTextLen;
							if (textLen1 + Enclosing_Instance.charPool.byteUpto > Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_SIZE)
							{
								if (textLen1 > Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_SIZE)
								{
									// Just skip this term, to remain as robust as
									// possible during indexing.  A TokenFilter
									// can be inserted into the analyzer chain if
									// other behavior is wanted (pruning the term
									// to a prefix, throwing an exception, etc).
									if (Enclosing_Instance.maxTermPrefix == null)
										Enclosing_Instance.maxTermPrefix = new System.String(tokenText, 0, 30);
									
									// Still increment position:
									position++;
									return ;
								}
								Enclosing_Instance.charPool.NextBuffer();
							}
							char[] text = Enclosing_Instance.charPool.buffer;
							int textUpto = Enclosing_Instance.charPool.byteUpto;
							
							// Pull next free Posting from free list
							Enclosing_Instance.p = Enclosing_Instance.postingsFreeList[--Enclosing_Instance.postingsFreeCount];
							
							Enclosing_Instance.p.textStart = textUpto + Enclosing_Instance.charPool.byteOffset;
							Enclosing_Instance.charPool.byteUpto += textLen1;
							
							Array.Copy(tokenText, 0, text, textUpto, tokenTextLen);
							
							text[textUpto + tokenTextLen] = (char) (0xffff);
							
							System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null);
							
							postingsHash[hashPos] = Enclosing_Instance.p;
							numPostings++;
							
							if (numPostings == postingsHashHalfSize)
								RehashPostings(2 * postingsHashSize);
							
							// Init first slice for freq & prox streams
							int firstSize = Lucene.Net.Index.DocumentsWriter.levelSizeArray[0];
							
							int upto1 = Enclosing_Instance.postingsPool.NewSlice(firstSize);
							Enclosing_Instance.p.freqStart = Enclosing_Instance.p.freqUpto = Enclosing_Instance.postingsPool.byteOffset + upto1;
							
							int upto2 = Enclosing_Instance.postingsPool.NewSlice(firstSize);
							Enclosing_Instance.p.proxStart = Enclosing_Instance.p.proxUpto = Enclosing_Instance.postingsPool.byteOffset + upto2;
							
							Enclosing_Instance.p.lastDocCode = Enclosing_Instance.docID << 1;
							Enclosing_Instance.p.lastDocID = Enclosing_Instance.docID;
							Enclosing_Instance.p.docFreq = 1;
							
							if (doVectors)
							{
								Enclosing_Instance.vector = AddNewVector();
								if (doVectorOffsets)
								{
									offsetStart = offsetStartCode = offset + token.StartOffset();
									offsetEnd = offset + token.EndOffset();
								}
							}
							
							proxCode = position;
						}
						
						Enclosing_Instance.proxUpto = Enclosing_Instance.p.proxUpto & Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_MASK;
						Enclosing_Instance.prox = Enclosing_Instance.postingsPool.buffers[Enclosing_Instance.p.proxUpto >> Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_SHIFT];
						System.Diagnostics.Debug.Assert(Enclosing_Instance.prox != null);
						
						if (payload != null && payload.length > 0)
						{
							Enclosing_Instance.WriteProxVInt((proxCode << 1) | 1);
							Enclosing_Instance.WriteProxVInt(payload.length);
							Enclosing_Instance.WriteProxBytes(payload.data, payload.offset, payload.length);
							fieldInfo.storePayloads = true;
						}
						else
							Enclosing_Instance.WriteProxVInt(proxCode << 1);
						
						Enclosing_Instance.p.proxUpto = Enclosing_Instance.proxUpto + (Enclosing_Instance.p.proxUpto & Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_NOT_MASK);
						
						Enclosing_Instance.p.lastPosition = position++;
						
						if (doVectorPositions)
						{
							Enclosing_Instance.posUpto = Enclosing_Instance.vector.posUpto & Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_MASK;
							Enclosing_Instance.pos = Enclosing_Instance.vectorsPool.buffers[Enclosing_Instance.vector.posUpto >> Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_SHIFT];
							Enclosing_Instance.WritePosVInt(proxCode);
							Enclosing_Instance.vector.posUpto = Enclosing_Instance.posUpto + (Enclosing_Instance.vector.posUpto & Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_NOT_MASK);
						}
						
						if (doVectorOffsets)
						{
							Enclosing_Instance.offsetUpto = Enclosing_Instance.vector.offsetUpto & Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_MASK;
							Enclosing_Instance.offsets = Enclosing_Instance.vectorsPool.buffers[Enclosing_Instance.vector.offsetUpto >> Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_SHIFT];
							Enclosing_Instance.WriteOffsetVInt(offsetStartCode);
							Enclosing_Instance.WriteOffsetVInt(offsetEnd - offsetStart);
							Enclosing_Instance.vector.lastOffset = offsetEnd;
							Enclosing_Instance.vector.offsetUpto = Enclosing_Instance.offsetUpto + (Enclosing_Instance.vector.offsetUpto & Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_NOT_MASK);
						}
					}
					catch (System.Exception t)
					{
						throw new AbortException(t, Enclosing_Instance.Enclosing_Instance);
					}
				}
 internal void writeProx(Token t, FreqProxTermsWriter.PostingList p, int proxCode)
 {
     Payload payload = t.GetPayload();
     if (payload != null && payload.length > 0)
     {
         termsHashPerField.writeVInt(1, (proxCode << 1) | 1);
         termsHashPerField.writeVInt(1, payload.length);
         termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
         hasPayloads = true;
     }
     else
         termsHashPerField.writeVInt(1, proxCode << 1);
     p.lastPosition = fieldState.position;
 }