/// <summary>Scans within block for matching term. </summary> private TermInfo ScanEnum(Term term) { SegmentTermEnum enumerator = GetEnum(); while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next()) { } if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) { return(enumerator.TermInfo()); } else { return(null); } }
// Term Vector support /// <summary>Skips terms to the first beyond the current whose value is /// greater or equal to <i>target</i>. <p>Returns true iff there is such /// an entry. <p>Behaves as if written: <pre> /// public boolean skipTo(Term target) { /// do { /// if (!next()) /// return false; /// } while (target > term()); /// return true; /// } /// </pre> /// Some implementations are considerably more efficient than that. /// </summary> public virtual bool SkipTo(Term target) { do { if (!Next()) return false; } while (target.CompareTo(Term()) > 0); return true; }
// Term Vector support /// <summary>Skips terms to the first beyond the current whose value is /// greater or equal to <i>target</i>. <p>Returns true iff there is such /// an entry. <p>Behaves as if written: <pre> /// public boolean skipTo(Term target) { /// do { /// if (!next()) /// return false; /// } while (target > term()); /// return true; /// } /// </pre> /// Some implementations are considerably more efficient than that. /// </summary> public virtual bool SkipTo(Term target) { do { if (!Next()) { return(false); } }while (target.CompareTo(Term()) > 0); return(true); }
private void MergeTermInfos() { int base_Renamed = 0; for (int i = 0; i < readers.Count; i++) { Monodoc.Lucene.Net.Index.IndexReader reader = (Monodoc.Lucene.Net.Index.IndexReader)readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); base_Renamed += reader.NumDocs(); if (smi.Next()) { queue.Put(smi); } // initialize queue else { smi.Close(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo)queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo)queue.Pop(); top = (SegmentMergeInfo)queue.Top(); } MergeTermInfo(match, matchSize); // add new TermInfo while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Put(smi); } // restore queue else { smi.Close(); // done with a segment } } } }
/// <summary>Returns the TermInfo for a Term in the set, or null. </summary> public /*internal*/ TermInfo Get(Term term) { if (size == 0) { return(null); } // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = GetEnum(); if (enumerator.Term() != null && ((enumerator.prev != null && term.CompareTo(enumerator.prev) > 0) || term.CompareTo(enumerator.Term()) >= 0)) { int enumOffset = (int)(enumerator.position / enumerator.indexInterval) + 1; if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) { return(ScanEnum(term)); // no need to seek } } // random-access: must seek SeekEnum(GetIndexOffset(term)); return(ScanEnum(term)); }
/// <summary>Adds a new <Term, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> public /*internal*/ void Add(Term term, TermInfo ti) { if (!isIndex && term.CompareTo(lastTerm) <= 0) { throw new System.IO.IOException("term out of order"); } if (ti.freqPointer < lastTi.freqPointer) { throw new System.IO.IOException("freqPointer out of order"); } if (ti.proxPointer < lastTi.proxPointer) { throw new System.IO.IOException("proxPointer out of order"); } if (!isIndex && size % indexInterval == 0) { other.Add(lastTerm, lastTi); // add an index term } WriteTerm(term); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastTi.Set(ti); size++; }
/// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary> private int GetIndexOffset(Term term) { int lo = 0; // binary search indexTerms[] int hi = indexTerms.Length - 1; while (hi >= lo) { int mid = (lo + hi) >> 1; int delta = term.CompareTo(indexTerms[mid]); if (delta < 0) { hi = mid - 1; } else if (delta > 0) { lo = mid + 1; } else { return(mid); } } return(hi); }
/// <summary>Returns the position of a Term in the set or -1. </summary> internal long GetPosition(Term term) { if (size == 0) return - 1; int indexOffset = GetIndexOffset(term); SeekEnum(indexOffset); SegmentTermEnum enumerator = GetEnum(); while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next()) { } if (term.CompareTo(enumerator.Term()) == 0) return enumerator.position; else return - 1; }
/// <summary>Scans within block for matching term. </summary> private TermInfo ScanEnum(Term term) { SegmentTermEnum enumerator = GetEnum(); while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next()) { } if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) return enumerator.TermInfo(); else return null; }
/// <summary>Returns the TermInfo for a Term in the set, or null. </summary> public /*internal*/ TermInfo Get(Term term) { if (size == 0) return null; // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = GetEnum(); if (enumerator.Term() != null && ((enumerator.prev != null && term.CompareTo(enumerator.prev) > 0) || term.CompareTo(enumerator.Term()) >= 0)) { int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1; if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) return ScanEnum(term); // no need to seek } // random-access: must seek SeekEnum(GetIndexOffset(term)); return ScanEnum(term); }
/// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary> private int GetIndexOffset(Term term) { int lo = 0; // binary search indexTerms[] int hi = indexTerms.Length - 1; while (hi >= lo) { int mid = (lo + hi) >> 1; int delta = term.CompareTo(indexTerms[mid]); if (delta < 0) hi = mid - 1; else if (delta > 0) lo = mid + 1; else return mid; } return hi; }
/// <summary>Adds a new <Term, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> public /*internal*/ void Add(Term term, TermInfo ti) { if (!isIndex && term.CompareTo(lastTerm) <= 0) throw new System.IO.IOException("term out of order"); if (ti.freqPointer < lastTi.freqPointer) throw new System.IO.IOException("freqPointer out of order"); if (ti.proxPointer < lastTi.proxPointer) throw new System.IO.IOException("proxPointer out of order"); if (!isIndex && size % indexInterval == 0) other.Add(lastTerm, lastTi); // add an index term WriteTerm(term); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastTi.Set(ti); size++; }
public override bool Next() { SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); if (top == null) { term = null; return false; } term = top.term; docFreq = 0; while (top != null && term.CompareTo(top.term) == 0) { queue.Pop(); docFreq += top.termEnum.DocFreq(); // increment freq if (top.Next()) queue.Put(top); // restore queue else top.Close(); // done with a segment top = (SegmentMergeInfo) queue.Top(); } return true; }