Esempio n. 1
0
            private readonly Term lastTerm = new Term("");   // LUCENENET: marked readonly

            /// <summary>
            /// add a term </summary>
            public virtual void Add(Term term)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(lastTerm.Equals(new Term("")) || term.CompareTo(lastTerm) > 0);
                }

                try
                {
                    int prefix = SharedPrefix(lastTerm.Bytes, term.Bytes);
                    int suffix = term.Bytes.Length - prefix;
                    if (term.Field.Equals(lastTerm.Field, StringComparison.Ordinal))
                    {
                        output.WriteVInt32(prefix << 1);
                    }
                    else
                    {
                        output.WriteVInt32(prefix << 1 | 1);
                        output.WriteString(term.Field);
                    }
                    output.WriteVInt32(suffix);
                    output.WriteBytes(term.Bytes.Bytes, term.Bytes.Offset + prefix, suffix);
                    lastTerm.Bytes.CopyBytes(term.Bytes);
                    lastTerm.Field = term.Field;
                }
                catch (IOException e)
                {
                    throw new Exception(e.ToString(), e);
                }
            }
Esempio n. 2
0
            public override bool Next()
            {
                SegmentMergeInfo top = (SegmentMergeInfo)queue.Top();

                if (top == null)
                {
                    term = null;
                    return(false);
                }

                term    = top.term;
                docFreq = 0;

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    queue.Pop();
                    docFreq += top.termEnum.DocFreq();                     // increment freq
                    if (top.Next())
                    {
                        queue.Put(top);
                    }
                    // restore queue
                    else
                    {
                        top.Close();                         // done with a segment
                    }
                    top = (SegmentMergeInfo)queue.Top();
                }
                return(true);
            }
Esempio n. 3
0
        private void  MergeTermInfos()
        {
            int base_Renamed = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader      reader   = (IndexReader)readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                base_Renamed += reader.NumDocs();
                if (smi.Next())
                {
                    queue.Put(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            while (queue.Size() > 0)
            {
                int matchSize = 0;                 // pop matching terms
                match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = (SegmentMergeInfo)queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                    top = (SegmentMergeInfo)queue.Top();
                }

                int df = MergeTermInfo(match, matchSize);                 // add new TermInfo

                if (checkAbort != null)
                {
                    checkAbort.Work(df / 3.0);
                }

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Put(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Close();                         // done with a segment
                    }
                }
            }
        }
Esempio n. 4
0
 // used only by assert
 private bool CheckDeleteTerm(Term term)
 {
     if (term != null)
     {
         Debug.Assert(LastDeleteTerm == null || term.CompareTo(LastDeleteTerm) > 0, "lastTerm=" + LastDeleteTerm + " vs term=" + term);
     }
     // TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert
     LastDeleteTerm = term == null ? null : new Term(term.Field(), BytesRef.DeepCopyOf(term.Bytes_Renamed));
     return(true);
 }
Esempio n. 5
0
		public virtual bool SkipTo(Term target)
		{
			do 
			{
				if (!Next())
					return false;
			}
			while (target.CompareTo(Term()) > 0);
			return true;
		}
Esempio n. 6
0
        // Term Vector support

        /// <summary>Skips terms to the first beyond the current whose value is
        /// greater or equal to <i>target</i>. <p>Returns true iff there is such
        /// an entry.  <p>Behaves as if written: <pre>
        /// public boolean skipTo(Term target) {
        /// do {
        /// if (!next())
        /// return false;
        /// } while (target > term());
        /// return true;
        /// }
        /// </pre>
        /// Some implementations are considerably more efficient than that.
        /// </summary>
        public virtual bool SkipTo(Term target)
        {
            do
            {
                if (!Next())
                {
                    return(false);
                }
            }while (target.CompareTo(Term()) > 0);
            return(true);
        }
Esempio n. 7
0
 // used only by assert
 private bool CheckDeleteTerm(Term term)
 {
     if (term != null)
     {
         if (Debugging.AssertsEnabled)
         {
             Debugging.Assert(lastDeleteTerm == null || term.CompareTo(lastDeleteTerm) > 0, "lastTerm={0} vs term={1}", lastDeleteTerm, term);
         }
     }
     // TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert
     lastDeleteTerm = term == null ? null : new Term(term.Field, BytesRef.DeepCopyOf(term.Bytes));
     return(true);
 }
Esempio n. 8
0
        /// <summary>Scans within block for matching term. </summary>
        private TermInfo ScanEnum(Term term)
        {
            SegmentTermEnum enumerator = GetEnum();

            enumerator.ScanTo(term);
            if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
            {
                return(enumerator.TermInfo());
            }
            else
            {
                return(null);
            }
        }
Esempio n. 9
0
        /// <summary>Adds a new <Term, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        public /*internal*/ void  Add(Term term, TermInfo ti)
        {
            if (!isIndex && term.CompareTo(lastTerm) <= 0)
            {
                throw new System.IO.IOException("term out of order");
            }
            if (ti.freqPointer < lastTi.freqPointer)
            {
                throw new System.IO.IOException("freqPointer out of order");
            }
            if (ti.proxPointer < lastTi.proxPointer)
            {
                throw new System.IO.IOException("proxPointer out of order");
            }

            if (!isIndex && size % indexInterval == 0)
            {
                other.Add(lastTerm, lastTi);                        // add an index term
            }
            WriteTerm(term);                                        // write term
            output.WriteVInt(ti.docFreq);                           // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
                lastIndexPointer = other.output.GetFilePointer();                 // write pointer
            }

            lastTi.Set(ti);
            size++;
        }
Esempio n. 10
0
        /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
        private int GetIndexOffset(Term term)
        {
            int lo = 0;             // binary search indexTerms[]
            int hi = indexTerms.Length - 1;

            while (hi >= lo)
            {
                int mid   = SupportClass.Number.URShift((lo + hi), 1);
                int delta = term.CompareTo(indexTerms[mid]);
                if (delta < 0)
                {
                    hi = mid - 1;
                }
                else if (delta > 0)
                {
                    lo = mid + 1;
                }
                else
                {
                    return(mid);
                }
            }
            return(hi);
        }
Esempio n. 11
0
		/// <summary>Scans within block for matching term. </summary>
		private TermInfo ScanEnum(Term term)
		{
			SegmentTermEnum enumerator = GetEnum();
			enumerator.ScanTo(term);
			if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
				return enumerator.TermInfo();
			else
				return null;
		}
Esempio n. 12
0
 // used only by assert
 private bool CheckDeleteTerm(Term term) 
 {
     if (term != null) {
         System.Diagnostics.Debug.Assert(lastDeleteTerm == null || term.CompareTo(lastDeleteTerm) > 0, "lastTerm=" + lastDeleteTerm + " vs term=" + term);
     }
     lastDeleteTerm = term;
     return true;
 }
Esempio n. 13
0
			public override bool Next()
			{
				for (int i = 0; i < matchingSegments.Length; i++)
				{
					SegmentMergeInfo smi = matchingSegments[i];
					if (smi == null)
						break;
					if (smi.Next())
						queue.Put(smi);
					else
						smi.Close(); // done with segment
				}
				
				int numMatchingSegments = 0;
				matchingSegments[0] = null;
				
				SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
				
				if (top == null)
				{
					term = null;
					return false;
				}
				
				term = top.term;
				docFreq = 0;
				
				while (top != null && term.CompareTo(top.term) == 0)
				{
					matchingSegments[numMatchingSegments++] = top;
					queue.Pop();
					docFreq += top.termEnum.DocFreq(); // increment freq
					top = (SegmentMergeInfo) queue.Top();
				}
				
				matchingSegments[numMatchingSegments] = null;
				return true;
			}
Esempio n. 14
0
        private void  MergeTermInfos(FormatPostingsFieldsConsumer consumer)
        {
            int base_Renamed = 0;
            int readerCount  = readers.Count;

            for (int i = 0; i < readerCount; i++)
            {
                IndexReader      reader   = readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                int[]            docMap   = smi.GetDocMap();
                if (docMap != null)
                {
                    if (docMaps == null)
                    {
                        docMaps   = new int[readerCount][];
                        delCounts = new int[readerCount];
                    }
                    docMaps[i]   = docMap;
                    delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs();
                }

                base_Renamed += reader.NumDocs();

                System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount);

                if (smi.Next())
                {
                    queue.Add(smi);
                }
                // initialize queue
                else
                {
                    smi.Dispose();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            System.String currentField = null;
            FormatPostingsTermsConsumer termsConsumer = null;

            while (queue.Size() > 0)
            {
                int matchSize = 0; // pop matching terms
                match[matchSize++] = queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = queue.Pop();
                    top = queue.Top();
                }

                if ((System.Object)currentField != (System.Object)term.Field)
                {
                    currentField = term.Field;
                    if (termsConsumer != null)
                    {
                        termsConsumer.Finish();
                    }
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
                    termsConsumer            = consumer.AddField(fieldInfo);
                    omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
                }

                int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo

                checkAbort.Work(df / 3.0);

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Add(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Dispose(); // done with a segment
                    }
                }
            }
        }
Esempio n. 15
0
		public override bool Next()
		{
			SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
			if (top == null)
			{
				term = null;
				return false;
			}
			
			term = top.term;
			docFreq = 0;
			
			while (top != null && term.CompareTo(top.term) == 0)
			{
				queue.Pop();
				docFreq += top.termEnum.DocFreq(); // increment freq
				if (top.Next())
					queue.Put(top);
				// restore queue
				else
					top.Close(); // done with a segment
				top = (SegmentMergeInfo) queue.Top();
			}
			return true;
		}
Esempio n. 16
0
 public override bool Next()
 {
     foreach (SegmentMergeInfo smi in matchingSegments)
     {
     	if (smi == null)
     		break;
     	if (smi.Next())
     		queue.Add(smi);
     	else
     		smi.Dispose(); // done with segment
     }
     
     int numMatchingSegments = 0;
     matchingSegments[0] = null;
     
     SegmentMergeInfo top = queue.Top();
     
     if (top == null)
     {
         term = null;
         return false;
     }
     
     term = top.term;
     docFreq = 0;
     
     while (top != null && term.CompareTo(top.term) == 0)
     {
         matchingSegments[numMatchingSegments++] = top;
         queue.Pop();
         docFreq += top.termEnum.DocFreq(); // increment freq
         top = queue.Top();
     }
     
     matchingSegments[numMatchingSegments] = null;
     return true;
 }
Esempio n. 17
0
 /// <summary>Returns the position of a Term in the set or -1. </summary>
 internal long GetPosition(Term term)
 {
     if (size == 0)
         return - 1;
     
     EnsureIndexIsRead();
     int indexOffset = GetIndexOffset(term);
     
     SegmentTermEnum enumerator = GetThreadResources().termEnum;
     SeekEnum(enumerator, indexOffset);
     
     while (term.CompareTo(enumerator.Term) > 0 && enumerator.Next())
     {
     }
     
     if (term.CompareTo(enumerator.Term) == 0)
         return enumerator.position;
     else
         return - 1;
 }
Esempio n. 18
0
		/// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
		public /*internal*/ TermInfo Get(Term term)
		{
			if (size == 0)
				return null;
			
			EnsureIndexIsRead();
			
			// optimize sequential access: first try scanning cached enum w/o seeking
			SegmentTermEnum enumerator = GetEnum();
			if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
			{
				int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1;
				if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
					return ScanEnum(term); // no need to seek
			}
			
			// random-access: must seek
			SeekEnum(GetIndexOffset(term));
			return ScanEnum(term);
		}
        /// <summary>Adds a new <Term, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        /*internal*/
        public void Add(Term term, TermInfo ti)
        {
            if (!isIndex && term.CompareTo(lastTerm) <= 0)
            {
                throw new System.IO.IOException("term out of order (\"" + term + "\".compareTo(\"" + lastTerm + "\") <= 0)");
            }
            if (ti.freqPointer < lastTi.freqPointer)
                throw new System.IO.IOException("freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
            if (ti.proxPointer < lastTi.proxPointer)
                throw new System.IO.IOException("proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");

            if (!isIndex && size % indexInterval == 0)
                other.Add(lastTerm, lastTi); // add an index term

            WriteTerm(term); // write term
            output.WriteVInt(ti.docFreq); // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
                lastIndexPointer = other.output.GetFilePointer(); // write pointer
            }

            lastTi.Set(ti);
            size++;
        }
Esempio n. 20
0
 /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
 private int GetIndexOffset(Term term)
 {
     int lo = 0; // binary search indexTerms[]
     int hi = indexTerms.Length - 1;
     
     while (hi >= lo)
     {
         int mid = Number.URShift((lo + hi), 1);
         int delta = term.CompareTo(indexTerms[mid]);
         if (delta < 0)
             hi = mid - 1;
         else if (delta > 0)
             lo = mid + 1;
         else
             return mid;
     }
     return hi;
 }
Esempio n. 21
0
 /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
 private TermInfo Get(Term term, bool useCache)
 {
     if (size == 0)
         return null;
     
     EnsureIndexIsRead();
     
     TermInfo ti;
     ThreadResources resources = GetThreadResources();
     Cache<Term, TermInfo> cache = null;
     
     if (useCache)
     {
         cache = resources.termInfoCache;
         // check the cache first if the term was recently looked up
         ti = cache.Get(term);
         if (ti != null)
         {
             return ti;
         }
     }
     
     // optimize sequential access: first try scanning cached enum w/o seeking
     SegmentTermEnum enumerator = resources.termEnum;
     if (enumerator.Term != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term) >= 0))
     {
         int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1;
         if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
         {
             // no need to seek
             
             int numScans = enumerator.ScanTo(term);
             if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0)
             {
                 ti = enumerator.TermInfo();
                 if (cache != null && numScans > 1)
                 {
                     // we only  want to put this TermInfo into the cache if
                     // scanEnum skipped more than one dictionary entry.
                     // This prevents RangeQueries or WildcardQueries to 
                     // wipe out the cache when they iterate over a large numbers
                     // of terms in order
                     cache.Put(term, ti);
                 }
             }
             else
             {
                 ti = null;
             }
             
             return ti;
         }
     }
     
     // random-access: must seek
     SeekEnum(enumerator, GetIndexOffset(term));
     enumerator.ScanTo(term);
     if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0)
     {
         ti = enumerator.TermInfo();
         if (cache != null)
         {
             cache.Put(term, ti);
         }
     }
     else
     {
         ti = null;
     }
     return ti;
 }
Esempio n. 22
0
        /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
        private TermInfo Get(Term term, bool useCache)
        {
            if (size == 0)
            {
                return(null);
            }

            EnsureIndexIsRead();

            TermInfo        ti;
            ThreadResources resources = GetThreadResources();

            Lucene.Net.Util.Cache.Cache cache = null;

            if (useCache)
            {
                cache = resources.termInfoCache;
                // check the cache first if the term was recently looked up
                ti = (TermInfo)cache.Get(term);
                if (ti != null)
                {
                    return(ti);
                }
            }

            // optimize sequential access: first try scanning cached enum w/o seeking
            SegmentTermEnum enumerator = resources.termEnum;

            if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
            {
                int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1;
                if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
                {
                    // no need to seek

                    int numScans = enumerator.ScanTo(term);
                    if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
                    {
                        ti = enumerator.TermInfo();
                        if (cache != null && numScans > 1)
                        {
                            // we only  want to put this TermInfo into the cache if
                            // scanEnum skipped more than one dictionary entry.
                            // This prevents RangeQueries or WildcardQueries to
                            // wipe out the cache when they iterate over a large numbers
                            // of terms in order
                            cache.Put(term, ti);
                        }
                    }
                    else
                    {
                        ti = null;
                    }

                    return(ti);
                }
            }

            // random-access: must seek
            SeekEnum(enumerator, GetIndexOffset(term));
            enumerator.ScanTo(term);
            if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
            {
                ti = enumerator.TermInfo();
                if (cache != null)
                {
                    cache.Put(term, ti);
                }
            }
            else
            {
                ti = null;
            }
            return(ti);
        }
Esempio n. 23
0
 public int CompareTo(DeleteTerm other)
 {
     return(Term.CompareTo(other.Term));
 }