Esempio n. 1
0
        private Posting[] SortPostingTable()
        {
            // copy postingTable into an array
            Posting[] array = new Posting[postingTable.Count];
            System.Collections.IEnumerator postings = postingTable.Values.GetEnumerator();
            for (int i = 0; postings.MoveNext(); i++)
            {
                array[i] = (Posting)postings.Current;
            }

            // sort the array
            QuickSort(array, 0, array.Length - 1);

            return(array);
        }
Esempio n. 2
0
        private Term termBuffer = new Term("", "");         // avoid consing

        private void  AddPosition(System.String field, System.String text, int position, TermVectorOffsetInfo offset)
        {
            termBuffer.Set(field, text);
            //System.out.println("Offset: " + offset);
            Posting ti = (Posting)postingTable[termBuffer];

            if (ti != null)
            {
                // word seen before
                int freq = ti.freq;
                if (ti.positions.Length == freq)
                {
                    // positions array is full
                    int[] newPositions = new int[freq * 2];                     // double size
                    int[] positions    = ti.positions;
                    for (int i = 0; i < freq; i++)
                    {
                        // copy old positions to new
                        newPositions[i] = positions[i];
                    }
                    ti.positions = newPositions;
                }
                ti.positions[freq] = position;                 // add new position

                if (offset != null)
                {
                    if (ti.offsets.Length == freq)
                    {
                        TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[freq * 2];
                        TermVectorOffsetInfo[] offsets    = ti.offsets;
                        for (int i = 0; i < freq; i++)
                        {
                            newOffsets[i] = offsets[i];
                        }
                        ti.offsets = newOffsets;
                    }
                    ti.offsets[freq] = offset;
                }
                ti.freq = freq + 1;                 // update frequency
            }
            else
            {
                // word not seen before
                Term term = new Term(field, text, false);
                postingTable[term] = new Posting(term, position, offset);
            }
        }
Esempio n. 3
0
		private Posting[] SortPostingTable()
		{
			// copy postingTable into an array
			Posting[] array = new Posting[postingTable.Count];
			System.Collections.IEnumerator postings = postingTable.Values.GetEnumerator();
			for (int i = 0; postings.MoveNext(); i++)
			{
				array[i] = (Posting) postings.Current;
			}
			
			// sort the array
			QuickSort(array, 0, array.Length - 1);
			
			return array;
		}
Esempio n. 4
0
		private Term termBuffer = new Term("", ""); // avoid consing
		
		private void  AddPosition(System.String field, System.String text, int position, TermVectorOffsetInfo offset)
		{
			termBuffer.Set(field, text);
			//System.out.println("Offset: " + offset);
			Posting ti = (Posting) postingTable[termBuffer];
			if (ti != null)
			{
				// word seen before
				int freq = ti.freq;
				if (ti.positions.Length == freq)
				{
					// positions array is full
					int[] newPositions = new int[freq * 2]; // double size
					int[] positions = ti.positions;
					Array.Copy(positions, 0, newPositions, 0, freq);
					ti.positions = newPositions;
				}
				ti.positions[freq] = position; // add new position
				
				if (offset != null)
				{
					if (ti.offsets.Length == freq)
					{
						TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[freq * 2];
						TermVectorOffsetInfo[] offsets = ti.offsets;
						Array.Copy(offsets, 0, newOffsets, 0, freq);
						ti.offsets = newOffsets;
					}
					ti.offsets[freq] = offset;
				}
				ti.freq = freq + 1; // update frequency
			}
			else
			{
				// word not seen before
				Term term = new Term(field, text, false);
				postingTable[term] = new Posting(term, position, offset);
			}
		}
		internal void  RecyclePostings(Posting[] postings, int numPostings)
		{
			lock (this)
			{
				// Move all Postings from this ThreadState back to our
				// free list.  We pre-allocated this array while we were
				// creating Postings to make sure it's large enough
				System.Diagnostics.Debug.Assert(postingsFreeCount + numPostings <= postingsFreeList.Length);
				Array.Copy(postings, 0, postingsFreeList, postingsFreeCount, numPostings);
				postingsFreeCount += numPostings;
			}
		}
		/* Allocate more Postings from shared pool */
		internal void  GetPostings(Posting[] postings)
		{
			lock (this)
			{
				numBytesUsed += postings.Length * POSTING_NUM_BYTE;
				int numToCopy;
				if (postingsFreeCount < postings.Length)
					numToCopy = postingsFreeCount;
				else
					numToCopy = postings.Length;
				int start = postingsFreeCount - numToCopy;
				Array.Copy(postingsFreeList, start, postings, 0, numToCopy);
				postingsFreeCount -= numToCopy;
				
				// Directly allocate the remainder if any
				if (numToCopy < postings.Length)
				{
					int extra = postings.Length - numToCopy;
					int newPostingsAllocCount = postingsAllocCount + extra;
					if (newPostingsAllocCount > postingsFreeList.Length)
					{
						postingsFreeList = new Posting[(int) (1.25 * newPostingsAllocCount)];
					}
					
					BalanceRAM();
					for (int i = numToCopy; i < postings.Length; i++)
					{
						postings[i] = new Posting();
						numBytesAlloc += POSTING_NUM_BYTE;
						postingsAllocCount++;
					}
				}
			}
		}
			internal bool NextTerm()
			{
				postingUpto++;
				if (postingUpto == field.numPostings)
					return false;
				
				p = postings[postingUpto];
				docID = 0;
				
				text = field.threadState.charPool.buffers[p.textStart >> Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_SHIFT];
				textOffset = p.textStart & Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_MASK;
				
				if (p.freqUpto > p.freqStart)
					freq.Init(field.threadState.postingsPool, p.freqStart, p.freqUpto);
				else
					freq.bufferOffset = freq.upto = freq.endIndex = 0;
				
				prox.Init(field.threadState.postingsPool, p.proxStart, p.proxUpto);
				
				// Should always be true
				bool result = NextDoc();
				System.Diagnostics.Debug.Assert(result);
				
				return true;
			}
		/// <summary>Creates a segment from all Postings in the Postings
		/// hashes across all ThreadStates & FieldDatas. 
		/// </summary>
		private System.Collections.IList WriteSegment()
		{
			
			System.Diagnostics.Debug.Assert(AllThreadsIdle());
			
			System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
			
			System.String segmentName;
			
			segmentName = segment;
			
			TermInfosWriter termsOut = new TermInfosWriter(directory, segmentName, fieldInfos, writer.GetTermIndexInterval());
			
			IndexOutput freqOut = directory.CreateOutput(segmentName + ".frq");
			IndexOutput proxOut = directory.CreateOutput(segmentName + ".prx");
			
			// Gather all FieldData's that have postings, across all
			// ThreadStates
			System.Collections.ArrayList allFields = new System.Collections.ArrayList();
			System.Diagnostics.Debug.Assert(AllThreadsIdle());
			for (int i = 0; i < threadStates.Length; i++)
			{
				ThreadState state = threadStates[i];
				state.TrimFields();
				int numFields = state.numAllFieldData;
				for (int j = 0; j < numFields; j++)
				{
					ThreadState.FieldData fp = state.allFieldDataArray[j];
					if (fp.numPostings > 0)
						allFields.Add(fp);
				}
			}
			
			// Sort by field name
			allFields.Sort();
			int numAllFields = allFields.Count;
			
			skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, numDocsInRAM, freqOut, proxOut);
			
			int start = 0;
			while (start < numAllFields)
			{
				
				System.String fieldName = ((ThreadState.FieldData) allFields[start]).fieldInfo.name;
				
				int end = start + 1;
				while (end < numAllFields && ((ThreadState.FieldData) allFields[end]).fieldInfo.name.Equals(fieldName))
					end++;
				
				ThreadState.FieldData[] fields = new ThreadState.FieldData[end - start];
				for (int i = start; i < end; i++)
					fields[i - start] = (ThreadState.FieldData) allFields[i];
				
				// If this field has postings then add them to the
				// segment
				AppendPostings(fields, termsOut, freqOut, proxOut);
				
				for (int i = 0; i < fields.Length; i++)
					fields[i].ResetPostingArrays();
				
				start = end;
			}
			
			freqOut.Close();
			proxOut.Close();
			termsOut.Close();
			
			// Record all files we have flushed
			System.Collections.IList flushedFiles = new System.Collections.ArrayList();
			flushedFiles.Add(SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION));
			flushedFiles.Add(SegmentFileName(IndexFileNames.FREQ_EXTENSION));
			flushedFiles.Add(SegmentFileName(IndexFileNames.PROX_EXTENSION));
			flushedFiles.Add(SegmentFileName(IndexFileNames.TERMS_EXTENSION));
			flushedFiles.Add(SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
			
			if (hasNorms)
			{
				WriteNorms(segmentName, numDocsInRAM);
				flushedFiles.Add(SegmentFileName(IndexFileNames.NORMS_EXTENSION));
			}
			
			if (infoStream != null)
			{
				long newSegmentSize = SegmentSize(segmentName);
				System.String message = String.Format(nf, "  oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}",
					new Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (newSegmentSize / numBytesUsed) });
				infoStream.WriteLine(message);
			}
			
			ResetPostingsData();
			
			nextDocID = 0;
			nextWriteDocID = 0;
			numDocsInRAM = 0;
			files = null;
			
			// Maybe downsize postingsFreeList array
			if (postingsFreeList.Length > 1.5 * postingsFreeCount)
			{
				int newSize = postingsFreeList.Length;
				while (newSize > 1.25 * postingsFreeCount)
				{
					newSize = (int) (newSize * 0.8);
				}
				Posting[] newArray = new Posting[newSize];
				Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
				postingsFreeList = newArray;
			}
			
			return flushedFiles;
		}
		private BufferedNorms[] norms; // Holds norms until we flush
		
		internal DocumentsWriter(Directory directory, IndexWriter writer)
		{
			InitBlock();
			this.directory = directory;
			this.writer = writer;
			
			postingsFreeList = new Posting[0];
		}
Esempio n. 10
0
			/// <summary>Compares term text for two Posting instance and
			/// returns -1 if p1 < p2; 1 if p1 > p2; else 0.
			/// </summary>
			internal int ComparePostings(Posting p1, Posting p2)
			{
				char[] text1 = charPool.buffers[p1.textStart >> Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_SHIFT];
				int pos1 = p1.textStart & Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_MASK;
				char[] text2 = charPool.buffers[p2.textStart >> Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_SHIFT];
				int pos2 = p2.textStart & Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_MASK;
				while (true)
				{
					char c1 = text1[pos1++];
					char c2 = text2[pos2++];
					if (c1 < c2)
						if (0xffff == c2)
							return 1;
						else
							return - 1;
					else if (c2 < c1)
						if (0xffff == c1)
							return - 1;
						else
							return 1;
					else if (0xffff == c1)
						return 0;
				}
			}
Esempio n. 11
0
			internal void  QuickSort(Posting[] postings, int lo, int hi)
			{
				if (lo >= hi)
					return ;
				
				int mid = SupportClass.Number.URShift((lo + hi), 1);
				
				if (ComparePostings(postings[lo], postings[mid]) > 0)
				{
					Posting tmp = postings[lo];
					postings[lo] = postings[mid];
					postings[mid] = tmp;
				}
				
				if (ComparePostings(postings[mid], postings[hi]) > 0)
				{
					Posting tmp = postings[mid];
					postings[mid] = postings[hi];
					postings[hi] = tmp;
					
					if (ComparePostings(postings[lo], postings[mid]) > 0)
					{
						Posting tmp2 = postings[lo];
						postings[lo] = postings[mid];
						postings[mid] = tmp2;
					}
				}
				
				int left = lo + 1;
				int right = hi - 1;
				
				if (left >= right)
					return ;
				
				Posting partition = postings[mid];
				
				for (; ; )
				{
					while (ComparePostings(postings[right], partition) > 0)
						--right;
					
					while (left < right && ComparePostings(postings[left], partition) <= 0)
						++left;
					
					if (left < right)
					{
						Posting tmp = postings[left];
						postings[left] = postings[right];
						postings[right] = tmp;
						--right;
					}
					else
					{
						break;
					}
				}
				
				QuickSort(postings, lo, left);
				QuickSort(postings, left + 1, hi);
			}
Esempio n. 12
0
			/// <summary>Do in-place sort of Posting array </summary>
			internal void  DoPostingSort(Posting[] postings, int numPosting)
			{
				QuickSort(postings, 0, numPosting - 1);
			}
Esempio n. 13
0
        private void  WritePostings(Posting[] postings, System.String segment)
        {
            IndexOutput       freq = null, prox = null;
            TermInfosWriter   tis              = null;
            TermVectorsWriter termVectorWriter = null;

            try
            {
                //open files for inverse index storage
                freq = directory.CreateOutput(segment + ".frq");
                prox = directory.CreateOutput(segment + ".prx");
                tis  = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
                TermInfo      ti           = new TermInfo();
                System.String currentField = null;

                for (int i = 0; i < postings.Length; i++)
                {
                    Posting posting = postings[i];

                    // add an entry to the dictionary with pointers to prox and freq files
                    ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1);
                    tis.Add(posting.term, ti);

                    // add an entry to the freq file
                    int postingFreq = posting.freq;
                    if (postingFreq == 1)
                    {
                        // optimize freq=1
                        freq.WriteVInt(1);
                    }
                    // set low bit of doc num.
                    else
                    {
                        freq.WriteVInt(0);                         // the document number
                        freq.WriteVInt(postingFreq);               // frequency in doc
                    }

                    int   lastPosition = 0;                   // write positions
                    int[] positions    = posting.positions;
                    for (int j = 0; j < postingFreq; j++)
                    {
                        // use delta-encoding
                        int position = positions[j];
                        prox.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                    // check to see if we switched to a new field
                    System.String termField = posting.term.Field();
                    if (currentField != termField)
                    {
                        // changing field - see if there is something to save
                        currentField = termField;
                        FieldInfo fi = fieldInfos.FieldInfo(currentField);
                        if (fi.storeTermVector)
                        {
                            if (termVectorWriter == null)
                            {
                                termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
                                termVectorWriter.OpenDocument();
                            }
                            termVectorWriter.OpenField(currentField);
                        }
                        else if (termVectorWriter != null)
                        {
                            termVectorWriter.CloseField();
                        }
                    }
                    if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
                    {
                        termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
                    }
                }
                if (termVectorWriter != null)
                {
                    termVectorWriter.CloseDocument();
                }
            }
            finally
            {
                // make an effort to close all streams we can but remember and re-throw
                // the first exception encountered in this process
                System.IO.IOException keep = null;
                if (freq != null)
                {
                    try
                    {
                        freq.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (prox != null)
                {
                    try
                    {
                        prox.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (tis != null)
                {
                    try
                    {
                        tis.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (termVectorWriter != null)
                {
                    try
                    {
                        termVectorWriter.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (keep != null)
                {
                    throw new System.IO.IOException(keep.StackTrace);
                }
            }
        }
Esempio n. 14
0
        private static void  QuickSort(Posting[] postings, int lo, int hi)
        {
            if (lo >= hi)
            {
                return;
            }

            int mid = (lo + hi) / 2;

            if (postings[lo].term.CompareTo(postings[mid].term) > 0)
            {
                Posting tmp = postings[lo];
                postings[lo]  = postings[mid];
                postings[mid] = tmp;
            }

            if (postings[mid].term.CompareTo(postings[hi].term) > 0)
            {
                Posting tmp = postings[mid];
                postings[mid] = postings[hi];
                postings[hi]  = tmp;

                if (postings[lo].term.CompareTo(postings[mid].term) > 0)
                {
                    Posting tmp2 = postings[lo];
                    postings[lo]  = postings[mid];
                    postings[mid] = tmp2;
                }
            }

            int left  = lo + 1;
            int right = hi - 1;

            if (left >= right)
            {
                return;
            }

            Term partition = postings[mid].term;

            for (; ;)
            {
                while (postings[right].term.CompareTo(partition) > 0)
                {
                    --right;
                }

                while (left < right && postings[left].term.CompareTo(partition) <= 0)
                {
                    ++left;
                }

                if (left < right)
                {
                    Posting tmp = postings[left];
                    postings[left]  = postings[right];
                    postings[right] = tmp;
                    --right;
                }
                else
                {
                    break;
                }
            }

            QuickSort(postings, lo, left);
            QuickSort(postings, left + 1, hi);
        }
Esempio n. 15
0
		private static void  QuickSort(Posting[] postings, int lo, int hi)
		{
			if (lo >= hi)
				return ;
			
			int mid = (lo + hi) / 2;
			
			if (postings[lo].term.CompareTo(postings[mid].term) > 0)
			{
				Posting tmp = postings[lo];
				postings[lo] = postings[mid];
				postings[mid] = tmp;
			}
			
			if (postings[mid].term.CompareTo(postings[hi].term) > 0)
			{
				Posting tmp = postings[mid];
				postings[mid] = postings[hi];
				postings[hi] = tmp;
				
				if (postings[lo].term.CompareTo(postings[mid].term) > 0)
				{
					Posting tmp2 = postings[lo];
					postings[lo] = postings[mid];
					postings[mid] = tmp2;
				}
			}
			
			int left = lo + 1;
			int right = hi - 1;
			
			if (left >= right)
				return ;
			
			Term partition = postings[mid].term;
			
			for (; ; )
			{
				while (postings[right].term.CompareTo(partition) > 0)
					--right;
				
				while (left < right && postings[left].term.CompareTo(partition) <= 0)
					++left;
				
				if (left < right)
				{
					Posting tmp = postings[left];
					postings[left] = postings[right];
					postings[right] = tmp;
					--right;
				}
				else
				{
					break;
				}
			}
			
			QuickSort(postings, lo, left);
			QuickSort(postings, left + 1, hi);
		}
Esempio n. 16
0
		private void  WritePostings(Posting[] postings, System.String segment)
		{
			IndexOutput freq = null, prox = null;
			TermInfosWriter tis = null;
			TermVectorsWriter termVectorWriter = null;
			try
			{
				//open files for inverse index storage
				freq = directory.CreateOutput(segment + ".frq");
				prox = directory.CreateOutput(segment + ".prx");
				tis = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
				TermInfo ti = new TermInfo();
				System.String currentField = null;
				
				for (int i = 0; i < postings.Length; i++)
				{
					Posting posting = postings[i];
					
					// add an entry to the dictionary with pointers to prox and freq files
					ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), - 1);
					tis.Add(posting.term, ti);
					
					// add an entry to the freq file
					int postingFreq = posting.freq;
					if (postingFreq == 1)
					// optimize freq=1
						freq.WriteVInt(1);
					// set low bit of doc num.
					else
					{
						freq.WriteVInt(0); // the document number
						freq.WriteVInt(postingFreq); // frequency in doc
					}
					
					int lastPosition = 0; // write positions
					int[] positions = posting.positions;
					for (int j = 0; j < postingFreq; j++)
					{
						// use delta-encoding
						int position = positions[j];
						prox.WriteVInt(position - lastPosition);
						lastPosition = position;
					}
					// check to see if we switched to a new field
					System.String termField = posting.term.Field();
					if (currentField != termField)
					{
						// changing field - see if there is something to save
						currentField = termField;
						FieldInfo fi = fieldInfos.FieldInfo(currentField);
						if (fi.storeTermVector)
						{
							if (termVectorWriter == null)
							{
								termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
								termVectorWriter.OpenDocument();
							}
							termVectorWriter.OpenField(currentField);
						}
						else if (termVectorWriter != null)
						{
							termVectorWriter.CloseField();
						}
					}
					if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
					{
						termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
					}
				}
				if (termVectorWriter != null)
					termVectorWriter.CloseDocument();
			}
			finally
			{
				// make an effort to close all streams we can but remember and re-throw
				// the first exception encountered in this process
				System.IO.IOException keep = null;
				if (freq != null)
					try
					{
						freq.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (prox != null)
					try
					{
						prox.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (tis != null)
					try
					{
						tis.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (termVectorWriter != null)
					try
					{
						termVectorWriter.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (keep != null)
				{
					throw new System.IO.IOException(keep.StackTrace);
				}
			}
		}
Esempio n. 17
0
				/// <summary>Called when postings hash is too small (> 50%
				/// occupied) or too large (< 20% occupied). 
				/// </summary>
				internal void  RehashPostings(int newSize)
				{
					
					int newMask = newSize - 1;
					
					Posting[] newHash = new Posting[newSize];
					for (int i = 0; i < postingsHashSize; i++)
					{
						Posting p0 = postingsHash[i];
						if (p0 != null)
						{
							int start = p0.textStart & Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_MASK;
							char[] text = Enclosing_Instance.charPool.buffers[p0.textStart >> Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_SHIFT];
							int pos = start;
							while (text[pos] != 0xffff)
								pos++;
							int code = 0;
							while (pos > start)
								code = (code * 31) + text[--pos];
							
							int hashPos = code & newMask;
							System.Diagnostics.Debug.Assert(hashPos >= 0);
							if (newHash[hashPos] != null)
							{
								int inc = ((code >> 8) + code) | 1;
								do 
								{
									code += inc;
									hashPos = code & newMask;
								}
								while (newHash[hashPos] != null);
							}
							newHash[hashPos] = p0;
						}
					}
					
					postingsHashMask = newMask;
					postingsHash = newHash;
					postingsHashSize = newSize;
					postingsHashHalfSize = newSize >> 1;
				}