/// <summary> Advances to the first match beyond the current whose document number is /// greater than or equal to a given target. <br/> /// The implementation uses <see cref="TermDocs.SkipTo(int)" />. /// /// </summary> /// <param name="target">The target document number. /// </param> /// <returns> the matching document or -1 if none exist. /// </returns> public override int Advance(int target) { // first scan in cache for (pointer++; pointer < pointerMax; pointer++) { if (docs[pointer] >= target) { return(doc = docs[pointer]); } } // not found in cache, seek underlying stream bool result = termDocs.SkipTo(target); if (result) { pointerMax = 1; pointer = 0; docs[pointer] = doc = termDocs.Doc; freqs[pointer] = termDocs.Freq; } else { doc = NO_MORE_DOCS; } return(doc); }
public override int Advance(int target) { if (_td.SkipTo(target)) { doc = _td.Doc; while (!_docset.Get(doc)) { if (_td.Next()) { doc = _td.Doc; } else { doc = DocIdSetIterator.NO_MORE_DOCS; break; } } return(doc); } else { doc = DocIdSetIterator.NO_MORE_DOCS; return(doc); } }
public virtual bool SkipTo(int target) { if (termDocs == null) { return(false); } return(termDocs.SkipTo(target)); }
public override int Advance(int target) { if (_td.SkipTo(target)) { _doc = _td.Doc; } else { _td.Dispose(); _doc = DocIdSetIterator.NO_MORE_DOCS; } return(_doc); }
// ДОРАБОТКА использовать данную функцию для фильтрации результатов поиска public int GetMatchWordCount(IEnumerable <SampleDataFileRow> listFoundDocs, string searchTerm) { int totalFreq = 0; IndexReader reader = IndexReader.Open(DirectoryFs, true); TermDocs termDocs = reader.TermDocs(); termDocs.Seek(new Term("LineText", searchTerm)); foreach (SampleDataFileRow singleRow in listFoundDocs) { termDocs.SkipTo(singleRow.LineNumber); totalFreq += termDocs.Freq; } return(totalFreq); }
public virtual bool SkipTo(int target) { if (current != null && current.SkipTo(target - base_Renamed)) { return(true); } else if (pointer < readers.Length) { base_Renamed = starts [pointer]; current = TermDocs(pointer++); return(SkipTo(target)); } else { return(false); } }
public virtual bool SkipTo(int i) { return(in_Renamed.SkipTo(i)); }
public virtual void testSkipTo(int indexDivisor) { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term ta = new Term("content", "aaa"); for (int i = 0; i < 10; i++) { AddDoc(writer, "aaa aaa aaa aaa"); } Term tb = new Term("content", "bbb"); for (int i = 0; i < 16; i++) { AddDoc(writer, "bbb bbb bbb bbb"); } Term tc = new Term("content", "ccc"); for (int i = 0; i < 50; i++) { AddDoc(writer, "ccc ccc ccc ccc"); } // assure that we deal with a single segment writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir, null, true, indexDivisor); TermDocs tdocs = reader.TermDocs(); // without optimization (assumption skipInterval == 16) // with next tdocs.Seek(ta); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(0, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(1, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(0)); Assert.AreEqual(2, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(4)); Assert.AreEqual(4, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(9)); Assert.AreEqual(9, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(10)); // without next tdocs.Seek(ta); Assert.IsTrue(tdocs.SkipTo(0)); Assert.AreEqual(0, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(4)); Assert.AreEqual(4, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(9)); Assert.AreEqual(9, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(10)); // exactly skipInterval documents and therefore with optimization // with next tdocs.Seek(tb); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(10, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(11, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(12, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(15)); Assert.AreEqual(15, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(24)); Assert.AreEqual(24, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(25)); Assert.AreEqual(25, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(26)); // without next tdocs.Seek(tb); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(10, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(15)); Assert.AreEqual(15, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(24)); Assert.AreEqual(24, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(25)); Assert.AreEqual(25, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(26)); // much more than skipInterval documents and therefore with optimization // with next tdocs.Seek(tc); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(26, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(27, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(28, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(40)); Assert.AreEqual(40, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(57)); Assert.AreEqual(57, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(74)); Assert.AreEqual(74, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(75)); Assert.AreEqual(75, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(76)); //without next tdocs.Seek(tc); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(26, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(40)); Assert.AreEqual(40, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(57)); Assert.AreEqual(57, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(74)); Assert.AreEqual(74, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(75)); Assert.AreEqual(75, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(76)); tdocs.Close(); reader.Close(); dir.Close(); }
public override int Advance(int target) { return(_docid = _termDocs.SkipTo(target) ? _termDocs.Doc : NO_MORE_DOCS); }
public virtual bool SkipTo(int i, IState state) { return(in_Renamed.SkipTo(i, state)); }
public virtual bool SkipTo(int target) { return(termDocs.SkipTo(target)); }
public override Explanation Explain(IndexReader reader, int doc) { ComplexExplanation result = new ComplexExplanation(); result.Description = "weight(" + Query + " in " + doc + "), product of:"; Explanation expl = new Explanation(idf, idfExp.Explain()); // explain query weight Explanation queryExpl = new Explanation(); queryExpl.Description = "queryWeight(" + Query + "), product of:"; Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost"); if (Enclosing_Instance.Boost != 1.0f) { queryExpl.AddDetail(boostExpl); } queryExpl.AddDetail(expl); Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); queryExpl.AddDetail(queryNormExpl); queryExpl.Value = boostExpl.Value * expl.Value * queryNormExpl.Value; result.AddDetail(queryExpl); // explain field weight System.String field = Enclosing_Instance.term.Field; ComplexExplanation fieldExpl = new ComplexExplanation(); fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:"; Explanation tfExplanation = new Explanation(); int tf = 0; TermDocs termDocs = reader.TermDocs(enclosingInstance.term); if (termDocs != null) { try { if (termDocs.SkipTo(doc) && termDocs.Doc == doc) { tf = termDocs.Freq; } } finally { termDocs.Close(); } tfExplanation.Value = similarity.Tf(tf); tfExplanation.Description = "tf(termFreq(" + enclosingInstance.term + ")=" + tf + ")"; } else { tfExplanation.Value = 0.0f; tfExplanation.Description = "no matching term"; } fieldExpl.AddDetail(tfExplanation); fieldExpl.AddDetail(expl); Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.Norms(field); float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f; fieldNormExpl.Value = fieldNorm; fieldNormExpl.Description = "fieldNorm(field=" + field + ", doc=" + doc + ")"; fieldExpl.AddDetail(fieldNormExpl); fieldExpl.Match = tfExplanation.IsMatch; fieldExpl.Value = tfExplanation.Value * expl.Value * fieldNormExpl.Value; result.AddDetail(fieldExpl); System.Boolean?tempAux = fieldExpl.Match; result.Match = tempAux; // combine them result.Value = queryExpl.Value * fieldExpl.Value; if (queryExpl.Value == 1.0f) { return(fieldExpl); } return(result); }