/// <summary>Scans within block for matching term. </summary>
        private TermInfo ScanEnum(Term term)
        {
            SegmentTermEnum enumerator = GetEnum();

            while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
            {
            }
            if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
            {
                return(enumerator.TermInfo());
            }
            else
            {
                return(null);
            }
        }
Exemple #2
0
		// Term Vector support
		
		/// <summary>Skips terms to the first beyond the current whose value is
		/// greater or equal to <i>target</i>. <p>Returns true iff there is such
		/// an entry.  <p>Behaves as if written: <pre>
		/// public boolean skipTo(Term target) {
		/// do {
		/// if (!next())
		/// return false;
		/// } while (target > term());
		/// return true;
		/// }
		/// </pre>
		/// Some implementations are considerably more efficient than that.
		/// </summary>
		public virtual bool SkipTo(Term target)
		{
			do 
			{
				if (!Next())
					return false;
			}
			while (target.CompareTo(Term()) > 0);
			return true;
		}
Exemple #3
0
        // Term Vector support

        /// <summary>Skips terms to the first beyond the current whose value is
        /// greater or equal to <i>target</i>. <p>Returns true iff there is such
        /// an entry.  <p>Behaves as if written: <pre>
        /// public boolean skipTo(Term target) {
        /// do {
        /// if (!next())
        /// return false;
        /// } while (target > term());
        /// return true;
        /// }
        /// </pre>
        /// Some implementations are considerably more efficient than that.
        /// </summary>
        public virtual bool SkipTo(Term target)
        {
            do
            {
                if (!Next())
                {
                    return(false);
                }
            }while (target.CompareTo(Term()) > 0);
            return(true);
        }
        private void  MergeTermInfos()
        {
            int base_Renamed = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                Monodoc.Lucene.Net.Index.IndexReader reader = (Monodoc.Lucene.Net.Index.IndexReader)readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                base_Renamed += reader.NumDocs();
                if (smi.Next())
                {
                    queue.Put(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            while (queue.Size() > 0)
            {
                int matchSize = 0; // pop matching terms
                match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = (SegmentMergeInfo)queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                    top = (SegmentMergeInfo)queue.Top();
                }

                MergeTermInfo(match, matchSize); // add new TermInfo

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Put(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Close(); // done with a segment
                    }
                }
            }
        }
        /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
        public /*internal*/ TermInfo Get(Term term)
        {
            if (size == 0)
            {
                return(null);
            }

            // optimize sequential access: first try scanning cached enum w/o seeking
            SegmentTermEnum enumerator = GetEnum();

            if (enumerator.Term() != null && ((enumerator.prev != null && term.CompareTo(enumerator.prev) > 0) || term.CompareTo(enumerator.Term()) >= 0))
            {
                int enumOffset = (int)(enumerator.position / enumerator.indexInterval) + 1;
                if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
                {
                    return(ScanEnum(term)); // no need to seek
                }
            }

            // random-access: must seek
            SeekEnum(GetIndexOffset(term));
            return(ScanEnum(term));
        }
Exemple #6
0
        /// <summary>Adds a new <Term, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        public /*internal*/ void  Add(Term term, TermInfo ti)
        {
            if (!isIndex && term.CompareTo(lastTerm) <= 0)
            {
                throw new System.IO.IOException("term out of order");
            }
            if (ti.freqPointer < lastTi.freqPointer)
            {
                throw new System.IO.IOException("freqPointer out of order");
            }
            if (ti.proxPointer < lastTi.proxPointer)
            {
                throw new System.IO.IOException("proxPointer out of order");
            }

            if (!isIndex && size % indexInterval == 0)
            {
                other.Add(lastTerm, lastTi);                        // add an index term
            }
            WriteTerm(term);                                        // write term
            output.WriteVInt(ti.docFreq);                           // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
                lastIndexPointer = other.output.GetFilePointer();                 // write pointer
            }

            lastTi.Set(ti);
            size++;
        }
        /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
        private int GetIndexOffset(Term term)
        {
            int lo = 0; // binary search indexTerms[]
            int hi = indexTerms.Length - 1;

            while (hi >= lo)
            {
                int mid   = (lo + hi) >> 1;
                int delta = term.CompareTo(indexTerms[mid]);
                if (delta < 0)
                {
                    hi = mid - 1;
                }
                else if (delta > 0)
                {
                    lo = mid + 1;
                }
                else
                {
                    return(mid);
                }
            }
            return(hi);
        }
Exemple #8
0
		/// <summary>Returns the position of a Term in the set or -1. </summary>
		internal long GetPosition(Term term)
		{
			if (size == 0)
				return - 1;
			
			int indexOffset = GetIndexOffset(term);
			SeekEnum(indexOffset);
			
			SegmentTermEnum enumerator = GetEnum();
			while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
			{
			}
			
			if (term.CompareTo(enumerator.Term()) == 0)
				return enumerator.position;
			else
				return - 1;
		}
Exemple #9
0
		/// <summary>Scans within block for matching term. </summary>
		private TermInfo ScanEnum(Term term)
		{
			SegmentTermEnum enumerator = GetEnum();
			while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
			{
			}
			if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
				return enumerator.TermInfo();
			else
				return null;
		}
Exemple #10
0
		/// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
		public /*internal*/ TermInfo Get(Term term)
		{
			if (size == 0)
				return null;
			
			// optimize sequential access: first try scanning cached enum w/o seeking
			SegmentTermEnum enumerator = GetEnum();
			if (enumerator.Term() != null && ((enumerator.prev != null && term.CompareTo(enumerator.prev) > 0) || term.CompareTo(enumerator.Term()) >= 0))
			{
				int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1;
				if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
					return ScanEnum(term); // no need to seek
			}
			
			// random-access: must seek
			SeekEnum(GetIndexOffset(term));
			return ScanEnum(term);
		}
Exemple #11
0
		/// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
		private int GetIndexOffset(Term term)
		{
			int lo = 0; // binary search indexTerms[]
			int hi = indexTerms.Length - 1;
			
			while (hi >= lo)
			{
				int mid = (lo + hi) >> 1;
				int delta = term.CompareTo(indexTerms[mid]);
				if (delta < 0)
					hi = mid - 1;
				else if (delta > 0)
					lo = mid + 1;
				else
					return mid;
			}
			return hi;
		}
Exemple #12
0
		/// <summary>Adds a new <Term, TermInfo> pair to the set.
		/// Term must be lexicographically greater than all previous Terms added.
		/// TermInfo pointers must be positive and greater than all previous.
		/// </summary>
		public /*internal*/ void  Add(Term term, TermInfo ti)
		{
			if (!isIndex && term.CompareTo(lastTerm) <= 0)
				throw new System.IO.IOException("term out of order");
			if (ti.freqPointer < lastTi.freqPointer)
				throw new System.IO.IOException("freqPointer out of order");
			if (ti.proxPointer < lastTi.proxPointer)
				throw new System.IO.IOException("proxPointer out of order");
			
			if (!isIndex && size % indexInterval == 0)
				other.Add(lastTerm, lastTi); // add an index term
			
			WriteTerm(term); // write term
			output.WriteVInt(ti.docFreq); // write doc freq
			output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
			output.WriteVLong(ti.proxPointer - lastTi.proxPointer);
			
			if (ti.docFreq >= skipInterval)
			{
				output.WriteVInt(ti.skipOffset);
			}
			
			if (isIndex)
			{
				output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
				lastIndexPointer = other.output.GetFilePointer(); // write pointer
			}
			
			lastTi.Set(ti);
			size++;
		}
		public override bool Next()
		{
			SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
			if (top == null)
			{
				term = null;
				return false;
			}
			
			term = top.term;
			docFreq = 0;
			
			while (top != null && term.CompareTo(top.term) == 0)
			{
				queue.Pop();
				docFreq += top.termEnum.DocFreq(); // increment freq
				if (top.Next())
					queue.Put(top);
				// restore queue
				else
					top.Close(); // done with a segment
				top = (SegmentMergeInfo) queue.Top();
			}
			return true;
		}