Пример #1
0
        public MultiTermEnum(Monodoc.Lucene.Net.Index.IndexReader[] readers, int[] starts, Term t)
        {
            queue = new SegmentMergeQueue(readers.Length);
            for (int i = 0; i < readers.Length; i++)
            {
                Monodoc.Lucene.Net.Index.IndexReader reader = readers[i];
                TermEnum termEnum;

                if (t != null)
                {
                    termEnum = reader.Terms(t);
                }
                else
                {
                    termEnum = reader.Terms();
                }

                SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
                if (t == null?smi.Next():termEnum.Term() != null)
                {
                    queue.Put(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            if (t != null && queue.Size() > 0)
            {
                Next();
            }
        }
Пример #2
0
        public override bool Next()
        {
            SegmentMergeInfo top = (SegmentMergeInfo)queue.Top();

            if (top == null)
            {
                term = null;
                return(false);
            }

            term    = top.term;
            docFreq = 0;

            while (top != null && term.CompareTo(top.term) == 0)
            {
                queue.Pop();
                docFreq += top.termEnum.DocFreq();                 // increment freq
                if (top.Next())
                {
                    queue.Put(top);
                }
                // restore queue
                else
                {
                    top.Close();                     // done with a segment
                }
                top = (SegmentMergeInfo)queue.Top();
            }
            return(true);
        }
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        private int AppendPostings(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df      = 0; // number of docs w/ term

            ResetSkip();
            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.postings;
                int   base_Renamed        = smi.base_Renamed;
                int[] docMap = smi.docMap;
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc]; // map around deletions
                    }
                    doc += base_Renamed;   // convert to merged space

                    if (doc < lastDoc)
                    {
                        throw new System.SystemException("docs out of order");
                    }

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        BufferSkip(lastDoc);
                    }

                    int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
                    lastDoc = doc;

                    int freq = postings.Freq();
                    if (freq == 1)
                    {
                        freqOutput.WriteVInt(docCode | 1); // write doc & freq=1
                    }
                    else
                    {
                        freqOutput.WriteVInt(docCode); // write doc
                        freqOutput.WriteVInt(freq);    // write frequency in doc
                    }

                    int lastPosition = 0; // write position deltas
                    for (int j = 0; j < freq; j++)
                    {
                        int position = postings.NextPosition();
                        proxOutput.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                }
            }
            return(df);
        }
        private void  MergeTermInfos()
        {
            int base_Renamed = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                Monodoc.Lucene.Net.Index.IndexReader reader = (Monodoc.Lucene.Net.Index.IndexReader)readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                base_Renamed += reader.NumDocs();
                if (smi.Next())
                {
                    queue.Put(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            while (queue.Size() > 0)
            {
                int matchSize = 0; // pop matching terms
                match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = (SegmentMergeInfo)queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                    top = (SegmentMergeInfo)queue.Top();
                }

                MergeTermInfo(match, matchSize); // add new TermInfo

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Put(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Close(); // done with a segment
                    }
                }
            }
        }
Пример #5
0
        public override bool LessThan(System.Object a, System.Object b)
        {
            SegmentMergeInfo stiA = (SegmentMergeInfo)a;
            SegmentMergeInfo stiB = (SegmentMergeInfo)b;
            int comparison        = stiA.term.CompareTo(stiB.term);

            if (comparison == 0)
            {
                return(stiA.base_Renamed < stiB.base_Renamed);
            }
            else
            {
                return(comparison < 0);
            }
        }
Пример #6
0
		/// <summary>Process postings from multiple segments all positioned on the
		/// same term. Writes out merged entries into freqOutput and
		/// the proxOutput streams.
		/// 
		/// </summary>
		/// <param name="smis">array of segments
		/// </param>
		/// <param name="n">number of cells in the array actually occupied
		/// </param>
		/// <returns> number of documents across all segments where this term was found
		/// </returns>
		private int AppendPostings(SegmentMergeInfo[] smis, int n)
		{
			int lastDoc = 0;
			int df = 0; // number of docs w/ term
			ResetSkip();
			for (int i = 0; i < n; i++)
			{
				SegmentMergeInfo smi = smis[i];
				TermPositions postings = smi.postings;
				int base_Renamed = smi.base_Renamed;
				int[] docMap = smi.docMap;
				postings.Seek(smi.termEnum);
				while (postings.Next())
				{
					int doc = postings.Doc();
					if (docMap != null)
						doc = docMap[doc]; // map around deletions
					doc += base_Renamed; // convert to merged space
					
					if (doc < lastDoc)
						throw new System.SystemException("docs out of order");
					
					df++;
					
					if ((df % skipInterval) == 0)
					{
						BufferSkip(lastDoc);
					}
					
					int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
					lastDoc = doc;
					
					int freq = postings.Freq();
					if (freq == 1)
					{
						freqOutput.WriteVInt(docCode | 1); // write doc & freq=1
					}
					else
					{
						freqOutput.WriteVInt(docCode); // write doc
						freqOutput.WriteVInt(freq); // write frequency in doc
					}
					
					int lastPosition = 0; // write position deltas
					for (int j = 0; j < freq; j++)
					{
						int position = postings.NextPosition();
						proxOutput.WriteVInt(position - lastPosition);
						lastPosition = position;
					}
				}
			}
			return df;
		}
Пример #7
0
		private TermInfo termInfo = new TermInfo(); // minimize consing
		
		/// <summary>Merge one term found in one or more segments. The array <code>smis</code>
		/// contains segments that are positioned at the same term. <code>N</code>
		/// is the number of cells in the array actually occupied.
		/// 
		/// </summary>
		/// <param name="smis">array of segments
		/// </param>
		/// <param name="n">number of cells in the array actually occupied
		/// </param>
		private void  MergeTermInfo(SegmentMergeInfo[] smis, int n)
		{
			long freqPointer = freqOutput.GetFilePointer();
			long proxPointer = proxOutput.GetFilePointer();
			
			int df = AppendPostings(smis, n); // append posting data
			
			long skipPointer = WriteSkip();
			
			if (df > 0)
			{
				// add an entry to the dictionary with pointers to prox and freq files
				termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
				termInfosWriter.Add(smis[0].term, termInfo);
			}
		}
Пример #8
0
		private void  MergeTermInfos()
		{
			int base_Renamed = 0;
			for (int i = 0; i < readers.Count; i++)
			{
				Monodoc.Lucene.Net.Index.IndexReader reader = (Monodoc.Lucene.Net.Index.IndexReader) readers[i];
				TermEnum termEnum = reader.Terms();
				SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
				base_Renamed += reader.NumDocs();
				if (smi.Next())
					queue.Put(smi);
				// initialize queue
				else
					smi.Close();
			}
			
			SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];
			
			while (queue.Size() > 0)
			{
				int matchSize = 0; // pop matching terms
				match[matchSize++] = (SegmentMergeInfo) queue.Pop();
				Term term = match[0].term;
				SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
				
				while (top != null && term.CompareTo(top.term) == 0)
				{
					match[matchSize++] = (SegmentMergeInfo) queue.Pop();
					top = (SegmentMergeInfo) queue.Top();
				}
				
				MergeTermInfo(match, matchSize); // add new TermInfo
				
				while (matchSize > 0)
				{
					SegmentMergeInfo smi = match[--matchSize];
					if (smi.Next())
						queue.Put(smi);
					// restore queue
					else
						smi.Close(); // done with a segment
				}
			}
		}
Пример #9
0
		public MultiTermEnum(Monodoc.Lucene.Net.Index.IndexReader[] readers, int[] starts, Term t)
		{
			queue = new SegmentMergeQueue(readers.Length);
			for (int i = 0; i < readers.Length; i++)
			{
				Monodoc.Lucene.Net.Index.IndexReader reader = readers[i];
				TermEnum termEnum;
				
				if (t != null)
				{
					termEnum = reader.Terms(t);
				}
				else
					termEnum = reader.Terms();
				
				SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
				if (t == null?smi.Next():termEnum.Term() != null)
					queue.Put(smi);
				// initialize queue
				else
					smi.Close();
			}
			
			if (t != null && queue.Size() > 0)
			{
				Next();
			}
		}