public RegexTermEnum(IndexReader reader, Term term, IRegexCapabilities regexImpl) { _sField = term.Field(); string sText = term.Text(); _regexImpl = regexImpl; _regexImpl.Compile(sText); _sPre = _regexImpl.Prefix() ?? ""; SetEnum(reader.Terms(new Term(term.Field(), _sPre))); }
public virtual void TestTerms() { TermEnum terms = reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term(); Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()]; Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1); } TermDocs termDocs = reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field")); Assert.IsTrue(termDocs.Next() == true); termDocs.Seek(new Term(DocHelper.NO_NORMS_KEY, DocHelper.NO_NORMS_TEXT)); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = reader.TermPositions(); positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc() == 0); Assert.IsTrue(positions.NextPosition() >= 0); }
public virtual void TestSimpleSkip() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d1); } writer.Flush(); writer.Optimize(); writer.Close(); IndexReader reader = SegmentReader.GetOnlySegmentReader(dir); SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions(); tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit); for (int i = 0; i < 2; i++) { counter = 0; tp.Seek(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
public virtual void TestTerms() { try { TermEnum terms = reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term(); Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()]; Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1); } TermDocs termDocs = reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field")); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = reader.TermPositions(); positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc() == 0); Assert.IsTrue(positions.NextPosition() >= 0); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestSimpleSkip() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d1); } writer.Flush(); writer.Optimize(); writer.Close(); IndexReader reader = SegmentReader.GetOnlySegmentReader(dir); SegmentTermPositions tp = (SegmentTermPositions)reader.TermPositions(); tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit); for (int i = 0; i < 2; i++) { counter = 0; tp.Seek(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
public virtual void TestSimpleSkip() { Directory dir = new CountingRAMDirectory(this, new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())).SetMergePolicy(NewLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(NewTextField(term.Field(), term.Text(), Field.Store.NO)); writer.AddDocument(d1); } writer.Commit(); writer.ForceMerge(1); writer.Dispose(); AtomicReader reader = GetOnlySegmentReader(DirectoryReader.Open(dir)); for (int i = 0; i < 2; i++) { Counter = 0; DocsAndPositionsEnum tp = reader.TermPositionsEnum(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
/// <summary> /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/> /// using the leaf reader's ordinal. /// <p> /// Note: the given context must be a top-level context. /// </summary> public static TermContext Build(IndexReaderContext context, Term term) { Debug.Assert(context != null && context.IsTopLevel); string field = term.Field(); BytesRef bytes = term.Bytes(); TermContext perReaderTermState = new TermContext(context); //if (DEBUG) System.out.println("prts.build term=" + term); foreach (AtomicReaderContext ctx in context.Leaves) { //if (DEBUG) System.out.println(" r=" + leaves[i].reader); Fields fields = ctx.AtomicReader.Fields; if (fields != null) { Terms terms = fields.Terms(field); if (terms != null) { TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(bytes)) { TermState termState = termsEnum.TermState(); //if (DEBUG) System.out.println(" found"); perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } } } } return(perReaderTermState); }
/// <summary> /// Returns the number of documents containing the term /// <code>t</code>. this method returns 0 if the term or /// field does not exists. this method does not take into /// account deleted documents that have not yet been merged /// away. /// </summary> public override sealed long TotalTermFreq(Term term) { Fields fields = Fields; if (fields == null) { return(0); } Terms terms = fields.Terms(term.Field()); if (terms == null) { return(0); } TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(term.Bytes())) { return(termsEnum.TotalTermFreq()); } else { return(0); } }
public override int DocFreq(Term term) { EnsureOpen(); IndexReader reader = ((IndexReader)fieldToReader[term.Field()]); return(reader == null?0:reader.DocFreq(term)); }
public override bool Next() { if (termEnum == null) { return(false); } // another term in this field? if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { System.Collections.Comparer comparer = System.Collections.Comparer.Default; System.Collections.SortedList newList = new System.Collections.SortedList(); if (Enclosing_Instance.fieldToReader != null) { if (Enclosing_Instance.fieldToReader.Count > 0) { int index = 0; while (comparer.Compare(Enclosing_Instance.fieldToReader.GetKey(index), field) < 0) { index++; } for (; index < Enclosing_Instance.fieldToReader.Count; index++) { newList.Add(Enclosing_Instance.fieldToReader.GetKey(index), Enclosing_Instance.fieldToReader[Enclosing_Instance.fieldToReader.GetKey(index)]); } } } fieldIterator = newList.Keys.GetEnumerator(); fieldIterator.MoveNext(); System.Object generatedAux = fieldIterator.Current; // Skip field to get next one } while (fieldIterator.MoveNext()) { field = ((System.String)fieldIterator.Current); termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (System.Object)term.Field() == (System.Object)field) { return(true); } else { termEnum.Close(); } } return(false); // no more fields }
public override bool TermCompare(Term term) { prefix = base.GetPrefixTerm(); if ((System.Object)term.Field() == (System.Object)prefix.Field() && term.Text().Equals(prefix.Text())) { return true; } endEnum = true; return false; }
// used only by assert private bool CheckDeleteTerm(Term term) { if (term != null) { Debug.Assert(LastDeleteTerm == null || term.CompareTo(LastDeleteTerm) > 0, "lastTerm=" + LastDeleteTerm + " vs term=" + term); } // TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert LastDeleteTerm = term == null ? null : new Term(term.Field(), BytesRef.DeepCopyOf(term.Bytes_Renamed)); return(true); }
/// <summary> /// Returns <seealso cref="DocsAndPositionsEnum"/> for the specified /// term. this will return null if the /// field or term does not exist or positions weren't indexed. </summary> /// <seealso cref= TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum) </seealso> public DocsAndPositionsEnum TermPositionsEnum(Term term) { Debug.Assert(term.Field() != null); Debug.Assert(term.Bytes() != null); Fields fields = Fields; if (fields != null) { Terms terms = fields.Terms(term.Field()); if (terms != null) { TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(term.Bytes())) { return(termsEnum.DocsAndPositions(LiveDocs, null)); } } } return(null); }
/// <summary>Equality compare on the term </summary> public override bool TermCompare(Term term) { if (_sField == term.Field()) { string sSearchText = term.Text(); if (sSearchText.StartsWith(_sPre)) return _regexImpl.Match(sSearchText); } //eif _bEndEnum = true; return false; }
public ParallelTermEnum(ParallelReader enclosingInstance, Term term) { InitBlock(enclosingInstance); field = term.Field(); IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[field]); if (reader != null) { termEnum = reader.Terms(term); } }
/// <param name="term"> The term documents need to have in order to be a match for this filter. </param> public TermFilter(Term term) { if (term == null) { throw new System.ArgumentException("Term must not be null"); } else if (term.Field() == null) { throw new System.ArgumentException("Field must not be null"); } this.term = term; }
public virtual int[] ToDocsArray(Term term, Bits bits, IndexReader reader) { Fields fields = MultiFields.GetFields(reader); Terms cterms = fields.Terms(term.Field()); TermsEnum ctermsEnum = cterms.Iterator(null); if (ctermsEnum.SeekExact(new BytesRef(term.Text()))) { DocsEnum docsEnum = TestUtil.Docs(Random(), ctermsEnum, bits, null, DocsEnum.FLAG_NONE); return(ToArray(docsEnum)); } return(null); }
public override bool Next() { if (termEnum == null) { return(false); } // another term in this field? if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { List <string> tmpList = new List <string>(); bool m = false; //JAVA: fieldIterator = fieldToReader.tailMap(field).keySet().iterator(); //JAVA: fieldIterator.next(); // Skip field to get next one foreach (string key in Enclosing_Instance.fieldToReader.Keys) { if (key == field && m == false) { m = true; } if (m) { tmpList.Add(key); } } fieldIterator = tmpList.GetEnumerator(); } while (fieldIterator.MoveNext()) { field = fieldIterator.Current; termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (System.Object)term.Field() == (System.Object)field) { return(true); } else { termEnum.Close(); } } return(false); // no more fields }
public void Set(Term term) { if (term == null) { Reset(); return; } // copy text into the buffer SetTextLength(term.Text().Length); text = term.Text().ToCharArray(); this.field = term.Field(); this.term = term; }
public static int Count(Term t, IndexReader r) { int count = 0; DocsEnum td = TestUtil.Docs(Random(), r, t.Field(), new BytesRef(t.Text()), MultiFields.GetLiveDocs(r), null, 0); if (td != null) { while (td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { td.DocID(); count++; } } return count; }
/// <summary> /// 得到指定Term的文档 /// </summary> /// <param name="term"></param> /// <returns></returns> public IList<TermDoc> DocumentCount(Term term) { TermDocs docs = open.Reader.TermDocs(term); List<TermDoc> list = new List<TermDoc>(); while (docs.Next()) { TermDoc doc2 = new TermDoc(); doc2.Freq = docs.Freq(); doc2.Doc = docs.Doc(); doc2.Term = term; doc2.Norm = GetNorm(open.Reader, term.Field(), doc2.Doc); TermDoc item = doc2; list.Add(item); } docs.Close(); return list; }
public void Set(Term term) { if (term == null) { Reset(); return; } System.String termText = term.Text(); int termLen = termText.Length; text.SetLength(termLen); SupportClass.TextSupport.GetCharsFromString(termText, 0, termLen, text.result, 0); dirty = true; field = term.Field(); this.term = term; }
public void Set(Term term) { if (term == null) { Reset(); return; } string termText = term.Text(); int termLen = termText.Length; text.setLength(termLen); for (int i = 0; i < termLen; i++) { text.result[i] = (char)termText[i]; } dirty = true; field = term.Field(); this.term = term; }
public void Set(Term term) { if (term == null) { Reset(); return; } // copy text into the buffer SetTextLength(term.Text().Length); System.String sourceString = term.Text(); int sourceEnd = term.Text().Length; for (int i = 0; i < sourceEnd; i++) { text[i] = (char)sourceString[i]; } this.field = term.Field(); this.term = term; }
public override bool Next() { if (termEnum == null) { return(false); } // another term in this field? if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { fieldIterator = SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator(); fieldIterator.MoveNext(); // Skip field to get next one } while (fieldIterator.MoveNext()) { field = ((System.String)fieldIterator.Current); termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(new Term(field, "")); Term term = termEnum.Term(); if (term != null && (System.Object)term.Field() == (System.Object)field) { return(true); } else { termEnum.Close(); } } return(false); // no more fields }
public override bool Next() { if (termEnum == null) { return(false); } // another term in this field? if (termEnum.Next() && (object)termEnum.Term().Field() == (object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { fieldIterator = SupportClass.CollectionsSupport.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator(); } while (fieldIterator.MoveNext()) { field = fieldIterator.Current; termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (object)term.Field() == (object)field) { return(true); } else { termEnum.Close(); } } return(false); // no more fields }
//-- public virtual Term VisitTerm(Term term) { var field = term.Field(); var text = term.Text(); var visitedField = VisitField(field); var visitedText = VisitFieldText(text); if (field == visitedField && text == visitedText) return term; return new Term(visitedField, visitedText); }
private string TermToString(Term t) { var fieldName = t.Field(); var value = t.Text(); return String.Concat(fieldName, ":", value); }
public virtual void Seek(Term term) { IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]); termDocs = reader != null?reader.TermDocs(term):null; }
public override int DocFreq(Term term) { EnsureOpen(); IndexReader reader = ((IndexReader) fieldToReader[term.Field()]); return reader == null?0:reader.DocFreq(term); }
public ParallelTermEnum(ParallelReader enclosingInstance, Term term) { InitBlock(enclosingInstance); field = term.Field(); termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(term); }
public virtual void TestSkipTo(int indexDivisor) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); Term ta = new Term("content", "aaa"); for (int i = 0; i < 10; i++) { AddDoc(writer, "aaa aaa aaa aaa"); } Term tb = new Term("content", "bbb"); for (int i = 0; i < 16; i++) { AddDoc(writer, "bbb bbb bbb bbb"); } Term tc = new Term("content", "ccc"); for (int i = 0; i < 50; i++) { AddDoc(writer, "ccc ccc ccc ccc"); } // assure that we deal with a single segment writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir, indexDivisor); DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); // without optimization (assumption skipInterval == 16) // with next Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(0, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(2, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(4, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(9, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS); // without next tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0); Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(0, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(4, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(9, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS); // exactly skipInterval documents and therefore with optimization // with next tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(10, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(11, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(12, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(15, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(24, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(25, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS); // without next tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(10, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(15, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(24, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(25, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS); // much more than skipInterval documents and therefore with optimization // with next tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(26, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(27, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(28, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(40, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(57, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(74, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(75, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS); //without next tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0); Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(26, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(40, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(57, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(74, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(75, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); dir.Dispose(); }
public virtual void Seek(Term term) { termDocs = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]).TermDocs(term); }
public virtual void AddTerm(Term term, int docIDUpto) { int?current; Terms.TryGetValue(term, out current); if (current != null && docIDUpto < current) { // Only record the new number if it's greater than the // current one. this is important because if multiple // threads are replacing the same doc at nearly the // same time, it's possible that one thread that got a // higher docID is scheduled before the other // threads. If we blindly replace than we can // incorrectly get both docs indexed. return; } Terms[term] = Convert.ToInt32(docIDUpto); // note that if current != null then it means there's already a buffered // delete on that term, therefore we seem to over-count. this over-counting // is done to respect IndexWriterConfig.setMaxBufferedDeleteTerms. NumTermDeletes.IncrementAndGet(); if (current == null) { BytesUsed.AddAndGet(BYTES_PER_DEL_TERM + term.Bytes_Renamed.Length + (RamUsageEstimator.NUM_BYTES_CHAR * term.Field().Length)); } }
public virtual int[] ToDocsArray(Term term, Bits bits, IndexReader reader) { Fields fields = MultiFields.GetFields(reader); Terms cterms = fields.Terms(term.Field()); TermsEnum ctermsEnum = cterms.Iterator(null); if (ctermsEnum.SeekExact(new BytesRef(term.Text()))) { DocsEnum docsEnum = TestUtil.Docs(Random(), ctermsEnum, bits, null, DocsEnum.FLAG_NONE); return ToArray(docsEnum); } return null; }
public virtual void AddTerm(Term term, int docIDUpto) { int? current; Terms.TryGetValue(term, out current); if (current != null && docIDUpto < current) { // Only record the new number if it's greater than the // current one. this is important because if multiple // threads are replacing the same doc at nearly the // same time, it's possible that one thread that got a // higher docID is scheduled before the other // threads. If we blindly replace than we can // incorrectly get both docs indexed. return; } Terms[term] = Convert.ToInt32(docIDUpto); // note that if current != null then it means there's already a buffered // delete on that term, therefore we seem to over-count. this over-counting // is done to respect IndexWriterConfig.setMaxBufferedDeleteTerms. NumTermDeletes.IncrementAndGet(); if (current == null) { BytesUsed.AddAndGet(BYTES_PER_DEL_TERM + term.Bytes_Renamed.Length + (RamUsageEstimator.NUM_BYTES_CHAR * term.Field().Length)); } }
public override int DocFreq(Term term) { return ((IndexReader) fieldToReader[term.Field()]).DocFreq(term); }
public override void Seek(Term term) { termDocs = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]).TermPositions(term); }
public virtual void Seek(Term term) { IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]); termDocs = reader != null?reader.TermDocs(term) : null; }
internal virtual void AssertTermDocsCount(string msg, IndexReader reader, Term term, int expected) { DocsEnum tdocs = TestUtil.Docs(Random(), reader, term.Field(), new BytesRef(term.Text()), MultiFields.GetLiveDocs(reader), null, 0); int count = 0; if (tdocs != null) { while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } } Assert.AreEqual(expected, count, msg + ", count mismatch"); }
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField) { Assert.AreEqual(r1.NumDocs(), r2.NumDocs()); bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc()); int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.TermDocs(); TermDocs termDocs2 = r2.TermDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.Intern(idField); TermEnum termEnum = r1.Terms(new Term(idField, "")); do { Term term = termEnum.Term(); if (term == null || (System.Object)term.Field() != (System.Object)idField) { break; } termDocs1.Seek(termEnum); if (!termDocs1.Next()) { // This doc is deleted and wasn't replaced termDocs2.Seek(termEnum); Assert.IsFalse(termDocs2.Next()); continue; } int id1 = termDocs1.Doc(); Assert.IsFalse(termDocs1.Next()); termDocs2.Seek(termEnum); Assert.IsTrue(termDocs2.Next()); int id2 = termDocs2.Doc(); Assert.IsFalse(termDocs2.Next()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (System.Exception t) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.Console.Out.WriteLine(" d1=" + r1.Document(id1)); System.Console.Out.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2)); } catch (System.Exception e) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1); System.Console.Out.WriteLine(" d1=" + tv1); if (tv1 != null) { for (int i = 0; i < tv1.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv1[i]); } } TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2); System.Console.Out.WriteLine(" d2=" + tv2); if (tv2 != null) { for (int i = 0; i < tv2.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv2[i]); } } throw e; } }while (termEnum.Next()); termEnum.Close(); // Verify postings TermEnum termEnum1 = r1.Terms(new Term("", "")); TermEnum termEnum2 = r2.Terms(new Term("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs()]; long[] info2 = new long[r2.NumDocs()]; for (; ;) { Term term1, term2; // iterate until we get some docs int len1; for (; ;) { len1 = 0; term1 = termEnum1.Term(); if (term1 == null) { break; } termDocs1.Seek(termEnum1); while (termDocs1.Next()) { int d1 = termDocs1.Doc(); int f1 = termDocs1.Freq(); info1[len1] = (((long)d1) << 32) | f1; len1++; } if (len1 > 0) { break; } if (!termEnum1.Next()) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; term2 = termEnum2.Term(); if (term2 == null) { break; } termDocs2.Seek(termEnum2); while (termDocs2.Next()) { int d2 = termDocs2.Doc(); int f2 = termDocs2.Freq(); info2[len2] = (((long)r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) { break; } if (!termEnum2.Next()) { break; } } if (!hasDeletes) { Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq()); } Assert.AreEqual(len1, len2); if (len1 == 0) { break; // no more terms } Assert.AreEqual(term1, term2); // sort info2 to get it into ascending docid System.Array.Sort(info2, 0, len2 - 0); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i]); } termEnum1.Next(); termEnum2.Next(); } }
public void Set(Term term) { if (term == null) { Reset(); return ; } // copy text into the buffer SetTextLength(term.Text().Length); System.String sourceString = term.Text(); int sourceEnd = term.Text().Length; for (int i = 0; i < sourceEnd; i++) { text[i] = (char) sourceString[i]; } this.field = term.Field(); this.term = term; }
private OpenBitSet FastBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); bits.Set(0, reader.MaxDoc()); //assume all are valid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { if (te.DocFreq() > 1) { int lastDoc = -1; //unset potential duplicates TermDocs td = reader.TermDocs(currTerm); td.Next(); if (keepMode == KM_USE_FIRST_OCCURRENCE) { td.Next(); } do { lastDoc = td.Doc(); bits.Clear(lastDoc); } while (td.Next()); if (keepMode == KM_USE_LAST_OCCURRENCE) { //restore the last bit bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return bits; }
public override int DocFreq(Term term) { return(((IndexReader)fieldToReader[term.Field()]).DocFreq(term)); }
private OpenBitSet CorrectBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { int lastDoc = -1; //set non duplicates TermDocs td = reader.TermDocs(currTerm); if (td.Next()) { if (keepMode == KM_USE_FIRST_OCCURRENCE) { bits.Set(td.Doc()); } else { do { lastDoc = td.Doc(); } while (td.Next()); bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return bits; }
public ParallelTermEnum(ParallelReader enclosingInstance, Term term) { InitBlock(enclosingInstance); field = term.Field(); IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[field]); if (reader != null) termEnum = reader.Terms(term); }
private void contextMenuItemShowAll_Click(object sender, System.EventArgs e) { if (listTerms.SelectedItems == null) return; ListViewItem selItem = listTerms.SelectedItems[0]; if (selItem == null) return; string field = selItem.SubItems[2].Text.Trim().Substring(1, selItem.SubItems[2].Text.Trim().Length - 2); string text = selItem.SubItems[3].Text; if (field == null || text == null) return; Term t = new Term(field, text); _luke.Search(t.Field() + ":" + t.Text()); }
public override void Seek(Term term) { IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]); termDocs = reader != null?reader.TermPositions(term):null; }
private void VerifyTermDocs(Directory dir, Term term, int numDocs) { IndexReader reader = DirectoryReader.Open(dir); DocsEnum docsEnum = TestUtil.Docs(Random(), reader, term.Field(), term.Bytes(), null, null, DocsEnum.FLAG_NONE); int count = 0; while (docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } Assert.AreEqual(numDocs, count); reader.Dispose(); }
private string GetTermText(Term term) { var fieldName = term.Field(); var fieldText = term.Text(); if (fieldText == null) return null; var info = SenseNet.ContentRepository.Schema.ContentTypeManager.GetPerFieldIndexingInfo(fieldName); if (info == null) { var c = fieldText.ToCharArray(); for (int i = 0; i < c.Length; i++) if (c[i] < ' ') c[i] = '.'; return new String(c); } var fieldHandler = info.IndexFieldHandler; switch (fieldHandler.IndexFieldType) { case SenseNet.Search.Indexing.IndexFieldType.String: return GetTermText(fieldText); case SenseNet.Search.Indexing.IndexFieldType.Int: return Convert.ToString(NumericUtils.PrefixCodedToInt(fieldText), CultureInfo.InvariantCulture); case SenseNet.Search.Indexing.IndexFieldType.Long: return Convert.ToString(NumericUtils.PrefixCodedToLong(fieldText), CultureInfo.InvariantCulture); case SenseNet.Search.Indexing.IndexFieldType.Float: return Convert.ToString(NumericUtils.PrefixCodedToFloat(fieldText), CultureInfo.InvariantCulture); case SenseNet.Search.Indexing.IndexFieldType.Double: return Convert.ToString(NumericUtils.PrefixCodedToDouble(fieldText), CultureInfo.InvariantCulture); case SenseNet.Search.Indexing.IndexFieldType.DateTime: var d = new DateTime(NumericUtils.PrefixCodedToLong(fieldText)); if (d.Hour == 0 && d.Minute == 0 && d.Second == 0) return GetTermText(d.ToString("yyyy-MM-dd")); if (d.Second == 0) return GetTermText(d.ToString("yyyy-MM-dd HH:mm")); return GetTermText(d.ToString("yyyy-MM-dd HH:mm:ss")); default: throw new NotImplementedException("Unknown IndexFieldType: " + fieldHandler.IndexFieldType); } }
public void Set(Term term) { if (term == null) { Reset(); return ; } System.String termText = term.Text(); int termLen = termText.Length; text.SetLength(termLen); SupportClass.TextSupport.GetCharsFromString(termText, 0, termLen, text.result, 0); dirty = true; field = term.Field(); this.term = term; }
public void Set(Term term) { if (term == null) { Reset(); return ; } string termText = term.Text(); int termLen = termText.Length; text.setLength(termLen); for (int i = 0; i < termLen; i++) { text.result[i] = (char) termText[i]; } dirty = true; field = term.Field(); this.term = term; }
public override void Seek(Term term) { termDocs = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]).TermPositions(term); }
// DocValues updates private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate { lock (this) { Fields fields = reader.Fields; if (fields == null) { // this reader has no postings return; } // TODO: we can process the updates per DV field, from last to first so that // if multiple terms affect same document for the same field, we add an update // only once (that of the last term). To do that, we can keep a bitset which // marks which documents have already been updated. So e.g. if term T1 // updates doc 7, and then we process term T2 and it updates doc 7 as well, // we don't apply the update since we know T1 came last and therefore wins // the update. // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so // that these documents aren't even returned. string currentField = null; TermsEnum termsEnum = null; DocsEnum docs = null; //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader); foreach (DocValuesUpdate update in updates) { Term term = update.Term; int limit = update.DocIDUpto; // TODO: we traverse the terms in update order (not term order) so that we // apply the updates in the correct order, i.e. if two terms udpate the // same document, the last one that came in wins, irrespective of the // terms lexical order. // we can apply the updates in terms order if we keep an updatesGen (and // increment it with every update) and attach it to each NumericUpdate. Note // that we cannot rely only on docIDUpto because an app may send two updates // which will get same docIDUpto, yet will still need to respect the order // those updates arrived. if (!term.Field().Equals(currentField)) { // if we change the code to process updates in terms order, enable this assert // assert currentField == null || currentField.compareTo(term.field()) < 0; currentField = term.Field(); Terms terms = fields.Terms(currentField); if (terms != null) { termsEnum = terms.Iterator(termsEnum); } else { termsEnum = null; continue; // no terms in that field } } if (termsEnum == null) { continue; } // System.out.println(" term=" + term); if (termsEnum.SeekExact(term.Bytes())) { // we don't need term frequencies for this DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE); //System.out.println("BDS: got docsEnum=" + docsEnum); DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.Field, update.Type); if (dvUpdates == null) { dvUpdates = dvUpdatesContainer.NewUpdates(update.Field, update.Type, reader.MaxDoc); } int doc; while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID); if (doc >= limit) { break; // no more docs that can be updated for this term } dvUpdates.Add(doc, update.Value); } } } } }
public virtual void Seek(Term term) { termDocs = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]).TermDocs(term); }
public override void Seek(Term term) { IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]); termDocs = reader != null?reader.TermPositions(term) : null; }
public ParallelTermEnum(ParallelReader enclosingInstance, Term term) { InitBlock(enclosingInstance); field = term.Field(); termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms(term); }