private readonly Term lastTerm = new Term(""); // LUCENENET: marked readonly /// <summary> /// add a term </summary> public virtual void Add(Term term) { if (Debugging.AssertsEnabled) { Debugging.Assert(lastTerm.Equals(new Term("")) || term.CompareTo(lastTerm) > 0); } try { int prefix = SharedPrefix(lastTerm.Bytes, term.Bytes); int suffix = term.Bytes.Length - prefix; if (term.Field.Equals(lastTerm.Field, StringComparison.Ordinal)) { output.WriteVInt32(prefix << 1); } else { output.WriteVInt32(prefix << 1 | 1); output.WriteString(term.Field); } output.WriteVInt32(suffix); output.WriteBytes(term.Bytes.Bytes, term.Bytes.Offset + prefix, suffix); lastTerm.Bytes.CopyBytes(term.Bytes); lastTerm.Field = term.Field; } catch (IOException e) { throw new Exception(e.ToString(), e); } }
public override bool Next() { SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); if (top == null) { term = null; return(false); } term = top.term; docFreq = 0; while (top != null && term.CompareTo(top.term) == 0) { queue.Pop(); docFreq += top.termEnum.DocFreq(); // increment freq if (top.Next()) { queue.Put(top); } // restore queue else { top.Close(); // done with a segment } top = (SegmentMergeInfo)queue.Top(); } return(true); }
private void MergeTermInfos() { int base_Renamed = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); base_Renamed += reader.NumDocs(); if (smi.Next()) { queue.Put(smi); } // initialize queue else { smi.Close(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo)queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo)queue.Pop(); top = (SegmentMergeInfo)queue.Top(); } int df = MergeTermInfo(match, matchSize); // add new TermInfo if (checkAbort != null) { checkAbort.Work(df / 3.0); } while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Put(smi); } // restore queue else { smi.Close(); // done with a segment } } } }
// used only by assert private bool CheckDeleteTerm(Term term) { if (term != null) { Debug.Assert(LastDeleteTerm == null || term.CompareTo(LastDeleteTerm) > 0, "lastTerm=" + LastDeleteTerm + " vs term=" + term); } // TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert LastDeleteTerm = term == null ? null : new Term(term.Field(), BytesRef.DeepCopyOf(term.Bytes_Renamed)); return(true); }
public virtual bool SkipTo(Term target) { do { if (!Next()) return false; } while (target.CompareTo(Term()) > 0); return true; }
// Term Vector support /// <summary>Skips terms to the first beyond the current whose value is /// greater or equal to <i>target</i>. <p>Returns true iff there is such /// an entry. <p>Behaves as if written: <pre> /// public boolean skipTo(Term target) { /// do { /// if (!next()) /// return false; /// } while (target > term()); /// return true; /// } /// </pre> /// Some implementations are considerably more efficient than that. /// </summary> public virtual bool SkipTo(Term target) { do { if (!Next()) { return(false); } }while (target.CompareTo(Term()) > 0); return(true); }
// used only by assert private bool CheckDeleteTerm(Term term) { if (term != null) { if (Debugging.AssertsEnabled) { Debugging.Assert(lastDeleteTerm == null || term.CompareTo(lastDeleteTerm) > 0, "lastTerm={0} vs term={1}", lastDeleteTerm, term); } } // TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert lastDeleteTerm = term == null ? null : new Term(term.Field, BytesRef.DeepCopyOf(term.Bytes)); return(true); }
/// <summary>Scans within block for matching term. </summary> private TermInfo ScanEnum(Term term) { SegmentTermEnum enumerator = GetEnum(); enumerator.ScanTo(term); if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) { return(enumerator.TermInfo()); } else { return(null); } }
/// <summary>Adds a new <Term, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> public /*internal*/ void Add(Term term, TermInfo ti) { if (!isIndex && term.CompareTo(lastTerm) <= 0) { throw new System.IO.IOException("term out of order"); } if (ti.freqPointer < lastTi.freqPointer) { throw new System.IO.IOException("freqPointer out of order"); } if (ti.proxPointer < lastTi.proxPointer) { throw new System.IO.IOException("proxPointer out of order"); } if (!isIndex && size % indexInterval == 0) { other.Add(lastTerm, lastTi); // add an index term } WriteTerm(term); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastTi.Set(ti); size++; }
/// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary> private int GetIndexOffset(Term term) { int lo = 0; // binary search indexTerms[] int hi = indexTerms.Length - 1; while (hi >= lo) { int mid = SupportClass.Number.URShift((lo + hi), 1); int delta = term.CompareTo(indexTerms[mid]); if (delta < 0) { hi = mid - 1; } else if (delta > 0) { lo = mid + 1; } else { return(mid); } } return(hi); }
/// <summary>Scans within block for matching term. </summary> private TermInfo ScanEnum(Term term) { SegmentTermEnum enumerator = GetEnum(); enumerator.ScanTo(term); if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) return enumerator.TermInfo(); else return null; }
// used only by assert private bool CheckDeleteTerm(Term term) { if (term != null) { System.Diagnostics.Debug.Assert(lastDeleteTerm == null || term.CompareTo(lastDeleteTerm) > 0, "lastTerm=" + lastDeleteTerm + " vs term=" + term); } lastDeleteTerm = term; return true; }
public override bool Next() { for (int i = 0; i < matchingSegments.Length; i++) { SegmentMergeInfo smi = matchingSegments[i]; if (smi == null) break; if (smi.Next()) queue.Put(smi); else smi.Close(); // done with segment } int numMatchingSegments = 0; matchingSegments[0] = null; SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); if (top == null) { term = null; return false; } term = top.term; docFreq = 0; while (top != null && term.CompareTo(top.term) == 0) { matchingSegments[numMatchingSegments++] = top; queue.Pop(); docFreq += top.termEnum.DocFreq(); // increment freq top = (SegmentMergeInfo) queue.Top(); } matchingSegments[numMatchingSegments] = null; return true; }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount); if (smi.Next()) { queue.Add(smi); } // initialize queue else { smi.Dispose(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = queue.Pop(); top = queue.Top(); } if ((System.Object)currentField != (System.Object)term.Field) { currentField = term.Field; if (termsConsumer != null) { termsConsumer.Finish(); } FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Add(smi); } // restore queue else { smi.Dispose(); // done with a segment } } } }
public override bool Next() { SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); if (top == null) { term = null; return false; } term = top.term; docFreq = 0; while (top != null && term.CompareTo(top.term) == 0) { queue.Pop(); docFreq += top.termEnum.DocFreq(); // increment freq if (top.Next()) queue.Put(top); // restore queue else top.Close(); // done with a segment top = (SegmentMergeInfo) queue.Top(); } return true; }
public override bool Next() { foreach (SegmentMergeInfo smi in matchingSegments) { if (smi == null) break; if (smi.Next()) queue.Add(smi); else smi.Dispose(); // done with segment } int numMatchingSegments = 0; matchingSegments[0] = null; SegmentMergeInfo top = queue.Top(); if (top == null) { term = null; return false; } term = top.term; docFreq = 0; while (top != null && term.CompareTo(top.term) == 0) { matchingSegments[numMatchingSegments++] = top; queue.Pop(); docFreq += top.termEnum.DocFreq(); // increment freq top = queue.Top(); } matchingSegments[numMatchingSegments] = null; return true; }
/// <summary>Returns the position of a Term in the set or -1. </summary> internal long GetPosition(Term term) { if (size == 0) return - 1; EnsureIndexIsRead(); int indexOffset = GetIndexOffset(term); SegmentTermEnum enumerator = GetThreadResources().termEnum; SeekEnum(enumerator, indexOffset); while (term.CompareTo(enumerator.Term) > 0 && enumerator.Next()) { } if (term.CompareTo(enumerator.Term) == 0) return enumerator.position; else return - 1; }
/// <summary>Returns the TermInfo for a Term in the set, or null. </summary> public /*internal*/ TermInfo Get(Term term) { if (size == 0) return null; EnsureIndexIsRead(); // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = GetEnum(); if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0)) { int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1; if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) return ScanEnum(term); // no need to seek } // random-access: must seek SeekEnum(GetIndexOffset(term)); return ScanEnum(term); }
/// <summary>Adds a new <Term, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> /*internal*/ public void Add(Term term, TermInfo ti) { if (!isIndex && term.CompareTo(lastTerm) <= 0) { throw new System.IO.IOException("term out of order (\"" + term + "\".compareTo(\"" + lastTerm + "\") <= 0)"); } if (ti.freqPointer < lastTi.freqPointer) throw new System.IO.IOException("freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"); if (ti.proxPointer < lastTi.proxPointer) throw new System.IO.IOException("proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"); if (!isIndex && size % indexInterval == 0) other.Add(lastTerm, lastTi); // add an index term WriteTerm(term); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastTi.Set(ti); size++; }
/// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary> private int GetIndexOffset(Term term) { int lo = 0; // binary search indexTerms[] int hi = indexTerms.Length - 1; while (hi >= lo) { int mid = Number.URShift((lo + hi), 1); int delta = term.CompareTo(indexTerms[mid]); if (delta < 0) hi = mid - 1; else if (delta > 0) lo = mid + 1; else return mid; } return hi; }
/// <summary>Returns the TermInfo for a Term in the set, or null. </summary> private TermInfo Get(Term term, bool useCache) { if (size == 0) return null; EnsureIndexIsRead(); TermInfo ti; ThreadResources resources = GetThreadResources(); Cache<Term, TermInfo> cache = null; if (useCache) { cache = resources.termInfoCache; // check the cache first if the term was recently looked up ti = cache.Get(term); if (ti != null) { return ti; } } // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = resources.termEnum; if (enumerator.Term != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term) >= 0)) { int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1; if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) { // no need to seek int numScans = enumerator.ScanTo(term); if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0) { ti = enumerator.TermInfo(); if (cache != null && numScans > 1) { // we only want to put this TermInfo into the cache if // scanEnum skipped more than one dictionary entry. // This prevents RangeQueries or WildcardQueries to // wipe out the cache when they iterate over a large numbers // of terms in order cache.Put(term, ti); } } else { ti = null; } return ti; } } // random-access: must seek SeekEnum(enumerator, GetIndexOffset(term)); enumerator.ScanTo(term); if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0) { ti = enumerator.TermInfo(); if (cache != null) { cache.Put(term, ti); } } else { ti = null; } return ti; }
/// <summary>Returns the TermInfo for a Term in the set, or null. </summary> private TermInfo Get(Term term, bool useCache) { if (size == 0) { return(null); } EnsureIndexIsRead(); TermInfo ti; ThreadResources resources = GetThreadResources(); Lucene.Net.Util.Cache.Cache cache = null; if (useCache) { cache = resources.termInfoCache; // check the cache first if the term was recently looked up ti = (TermInfo)cache.Get(term); if (ti != null) { return(ti); } } // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = resources.termEnum; if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0)) { int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1; if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) { // no need to seek int numScans = enumerator.ScanTo(term); if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) { ti = enumerator.TermInfo(); if (cache != null && numScans > 1) { // we only want to put this TermInfo into the cache if // scanEnum skipped more than one dictionary entry. // This prevents RangeQueries or WildcardQueries to // wipe out the cache when they iterate over a large numbers // of terms in order cache.Put(term, ti); } } else { ti = null; } return(ti); } } // random-access: must seek SeekEnum(enumerator, GetIndexOffset(term)); enumerator.ScanTo(term); if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) { ti = enumerator.TermInfo(); if (cache != null) { cache.Put(term, ti); } } else { ti = null; } return(ti); }
public int CompareTo(DeleteTerm other) { return(Term.CompareTo(other.Term)); }