private void VerifyDocFreq() { IndexReader reader = IndexReader.Open(dir); TermEnum termEnum = null; // create enumeration of all terms termEnum = reader.Terms(); // go to the first term (aaa) termEnum.Next(); // assert that term is 'aaa' Assert.AreEqual("aaa", termEnum.Term().Text()); Assert.AreEqual(200, termEnum.DocFreq()); // go to the second term (bbb) termEnum.Next(); // assert that term is 'bbb' Assert.AreEqual("bbb", termEnum.Term().Text()); Assert.AreEqual(100, termEnum.DocFreq()); termEnum.Close(); // create enumeration of terms after term 'aaa', including 'aaa' termEnum = reader.Terms(new Term("content", "aaa")); // assert that term is 'aaa' Assert.AreEqual("aaa", termEnum.Term().Text()); Assert.AreEqual(200, termEnum.DocFreq()); // go to term 'bbb' termEnum.Next(); // assert that term is 'bbb' Assert.AreEqual("bbb", termEnum.Term().Text()); Assert.AreEqual(100, termEnum.DocFreq()); termEnum.Close(); }
public override bool Next() { if (field == null) { return(false); } bool next = termEnum.Next(); // still within field? if (next && (System.Object)termEnum.Term().Field() == (System.Object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field, if any field = ((System.String)SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).GetKey(0)); if (field != null) { termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(); return(true); } return(false); // no more fields }
public override bool Next() { if (termEnum == null) { return(false); } // another term in this field? if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { System.Collections.Comparer comparer = System.Collections.Comparer.Default; System.Collections.SortedList newList = new System.Collections.SortedList(); if (Enclosing_Instance.fieldToReader != null) { if (Enclosing_Instance.fieldToReader.Count > 0) { int index = 0; while (comparer.Compare(Enclosing_Instance.fieldToReader.GetKey(index), field) < 0) { index++; } for (; index < Enclosing_Instance.fieldToReader.Count; index++) { newList.Add(Enclosing_Instance.fieldToReader.GetKey(index), Enclosing_Instance.fieldToReader[Enclosing_Instance.fieldToReader.GetKey(index)]); } } } fieldIterator = newList.Keys.GetEnumerator(); fieldIterator.MoveNext(); System.Object generatedAux = fieldIterator.Current; // Skip field to get next one } while (fieldIterator.MoveNext()) { field = ((System.String)fieldIterator.Current); termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (System.Object)term.Field() == (System.Object)field) { return(true); } else { termEnum.Close(); } } return(false); // no more fields }
public override bool Next() { if (termEnum == null) { return(false); } // another term in this field? if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { List <string> tmpList = new List <string>(); bool m = false; //JAVA: fieldIterator = fieldToReader.tailMap(field).keySet().iterator(); //JAVA: fieldIterator.next(); // Skip field to get next one foreach (string key in Enclosing_Instance.fieldToReader.Keys) { if (key == field && m == false) { m = true; } if (m) { tmpList.Add(key); } } fieldIterator = tmpList.GetEnumerator(); } while (fieldIterator.MoveNext()) { field = fieldIterator.Current; termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (System.Object)term.Field() == (System.Object)field) { return(true); } else { termEnum.Close(); } } return(false); // no more fields }
public virtual void TestTerms() { TermEnum terms = reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term(); Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()]; Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1); } TermDocs termDocs = reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field")); Assert.IsTrue(termDocs.Next() == true); termDocs.Seek(new Term(DocHelper.NO_NORMS_KEY, DocHelper.NO_NORMS_TEXT)); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = reader.TermPositions(); positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc() == 0); Assert.IsTrue(positions.NextPosition() >= 0); }
/// <summary> /// Advance to the next item. /// </summary> /// <returns></returns> public bool MoveNext() { if (this.isFirst) { this.isFirst = false; } else { if (!this.termEnum.Next()) { return(false); } } this.currentTerm = termEnum.Term(); if (this.currentTerm == null || (!this.currentTerm.Field().Equals(this.fieldName))) { return(false); } if (this.enclosing.TryParse(this.currentTerm.Text())) { if (this.termDocs != null) { this.termDocs.Seek(this.termEnum); } return(true); } return(false); }
public virtual void TestTerms() { try { TermEnum terms = reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term(); Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()]; Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1); } TermDocs termDocs = reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field")); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = reader.TermPositions(); positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc() == 0); Assert.IsTrue(positions.NextPosition() >= 0); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
private int[] docMap; // use getDocMap() internal SegmentMergeInfo(int b, TermEnum te, IndexReader r) { base_Renamed = b; reader = r; termEnum = te; term = te.Term(); }
private TermPositions postings; // use getPositions() #endregion Fields #region Constructors internal SegmentMergeInfo(int b, TermEnum te, IndexReader r) { base_Renamed = b; reader = r; termEnum = te; term = te.Term(); }
public virtual void TestFilterIndexReader_Renamed() { RAMDirectory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); Assert.IsTrue(reader.IsOptimized()); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } int NUM_DOCS = 3; TermDocs td = reader.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); reader.Close(); directory.Close(); }
private void PrintSegment(System.IO.StringWriter out_Renamed, System.String segment) { Directory directory = FSDirectory.GetDirectory(indexDir, false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { out_Renamed.Write("," + positions.NextPosition()); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
internal static void PrintSegment(System.String segment) { Directory directory = FSDirectory.GetDirectory("test", false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { System.Console.Out.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { System.Console.Out.Write(tis.Term()); System.Console.Out.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { System.Console.Out.Write(" doc=" + positions.Doc()); System.Console.Out.Write(" TF=" + positions.Freq()); System.Console.Out.Write(" pos="); System.Console.Out.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { System.Console.Out.Write("," + positions.NextPosition()); } System.Console.Out.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
private void PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si) { SegmentReader reader = SegmentReader.Get(si); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { out_Renamed.Write("," + positions.NextPosition()); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); }
internal bool Next() { if (termEnum.Next()) { term = termEnum.Term(); return(true); } else { term = null; return(false); } }
public virtual void Seek(TermEnum termEnum) { TermInfo ti; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if (termEnum is SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.fieldInfos) // optimized case ti = ((SegmentTermEnum) termEnum).TermInfo(); // punt case else ti = parent.tis.Get(termEnum.Term()); Seek(ti); }
public override bool Next() { if (termEnum == null) { return(false); } // another term in this field? if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { fieldIterator = SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator(); fieldIterator.MoveNext(); // Skip field to get next one } while (fieldIterator.MoveNext()) { field = ((System.String)fieldIterator.Current); termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(new Term(field, "")); Term term = termEnum.Term(); if (term != null && (System.Object)term.Field() == (System.Object)field) { return(true); } else { termEnum.Close(); } } return(false); // no more fields }
public override bool Next() { if (termEnum == null) { return(false); } // another term in this field? if (termEnum.Next() && (object)termEnum.Term().Field() == (object)field) { return(true); // yes, keep going } termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { fieldIterator = SupportClass.CollectionsSupport.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator(); } while (fieldIterator.MoveNext()) { field = fieldIterator.Current; termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (object)term.Field() == (object)field) { return(true); } else { termEnum.Close(); } } return(false); // no more fields }
public virtual void TestThreadSafety() { rnd = NewRandom(); int numThreads = 5; int numDocs = 50; ByteArrayPool pool = new ByteArrayPool(numThreads, 5); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); System.String field = "test"; SupportClass.ThreadClass[] ingesters = new SupportClass.ThreadClass[numThreads]; for (int i = 0; i < numThreads; i++) { ingesters[i] = new AnonymousClassThread(numDocs, field, pool, writer, this); ingesters[i].Start(); } for (int i = 0; i < numThreads; i++) { ingesters[i].Join(); } writer.Close(); IndexReader reader = IndexReader.Open(dir); TermEnum terms = reader.Terms(); while (terms.Next()) { TermPositions tp = reader.TermPositions(terms.Term()); while (tp.Next()) { int freq = tp.Freq(); for (int i = 0; i < freq; i++) { tp.NextPosition(); Assert.AreEqual(pool.BytesToString(tp.GetPayload(new byte[5], 0)), terms.Term().text_ForNUnit); } } tp.Close(); } terms.Close(); reader.Close(); Assert.AreEqual(pool.Size(), numThreads); }
public virtual void Seek(TermEnum termEnum) { TermInfo ti; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if (termEnum is SegmentTermEnum && ((SegmentTermEnum)termEnum).fieldInfos == parent.fieldInfos) { // optimized case ti = ((SegmentTermEnum)termEnum).TermInfo(); } // punt case else { ti = parent.tis.Get(termEnum.Term()); } Seek(ti); }
public virtual void TestFilterIndexReader_() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document d1 = new Document(); d1.Add(Field.Text("default", "one two")); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(Field.Text("default", "one three")); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(Field.Text("default", "two four")); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf((System.Char) 'e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } reader.Close(); }
public virtual void Seek(TermEnum termEnum) { TermInfo ti; Term term; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if (termEnum is SegmentTermEnum && ((SegmentTermEnum)termEnum).fieldInfos == parent.core.fieldInfos) { // optimized case SegmentTermEnum segmentTermEnum = ((SegmentTermEnum)termEnum); term = segmentTermEnum.Term(); ti = segmentTermEnum.TermInfo(); } else { // punt case term = termEnum.Term(); ti = parent.core.GetTermsReader().Get(term); } Seek(ti, term); }
public virtual void Seek(TermEnum termEnum) { TermInfo ti; Term term; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if (termEnum is SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.core.fieldInfos) { // optimized case SegmentTermEnum segmentTermEnum = ((SegmentTermEnum) termEnum); term = segmentTermEnum.Term(); ti = segmentTermEnum.TermInfo(); } else { // punt case term = termEnum.Term(); ti = parent.core.GetTermsReader().Get(term); } Seek(ti, term); }
public override bool Next() { if (termEnum == null) return false; // another term in this field? if (termEnum.Next() && (System.Object) termEnum.Term().Field() == (System.Object) field) return true; // yes, keep going termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { List<string> tmpList = new List<string>(); bool m = false; //JAVA: fieldIterator = fieldToReader.tailMap(field).keySet().iterator(); //JAVA: fieldIterator.next(); // Skip field to get next one foreach (string key in Enclosing_Instance.fieldToReader.Keys) { if (key == field && m==false) m = true; if (m) tmpList.Add(key); } fieldIterator = tmpList.GetEnumerator(); } while (fieldIterator.MoveNext()) { field = fieldIterator.Current; termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (System.Object) term.Field() == (System.Object) field) return true; else termEnum.Close(); } return false; // no more fields }
public virtual void Seek(TermEnum termEnum) { Seek(termEnum.Term()); }
public override bool Next() { if (termEnum == null) return false; // another term in this field? if (termEnum.Next() && (object) termEnum.Term().Field() == (object) field) return true; // yes, keep going termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { fieldIterator = SupportClass.CollectionsSupport.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator(); } while (fieldIterator.MoveNext()) { field = fieldIterator.Current; termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (object) term.Field() == (object) field) return true; else termEnum.Close(); } return false; // no more fields }
public override bool Next() { if (termEnum == null) return false; // another term in this field? if (termEnum.Next() && (System.Object) termEnum.Term().Field() == (System.Object) field) return true; // yes, keep going termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { fieldIterator = SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator(); fieldIterator.MoveNext(); // Skip field to get next one } while (fieldIterator.MoveNext()) { field = ((System.String) fieldIterator.Current); termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms(new Term(field, "")); Term term = termEnum.Term(); if (term != null && (System.Object) term.Field() == (System.Object) field) return true; else termEnum.Close(); } return false; // no more fields }
public virtual void Test1() { ParallelReader pr = new ParallelReader(); pr.Add(ir1); pr.Add(ir2); TermDocs td = pr.TermDocs(); TermEnum te = pr.Terms(); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:brown", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:fox", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:jumps", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:quick", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:the", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:brown", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:fox", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:jumps", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:quick", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:the", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:dog", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:fox", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:jumps", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:lazy", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:over", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:the", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsFalse(te.Next()); }
public override bool Next() { if (termEnum == null) return false; // another term in this field? if (termEnum.Next() && (System.Object) termEnum.Term().Field() == (System.Object) field) return true; // yes, keep going termEnum.Close(); // close old termEnum // find the next field with terms, if any if (fieldIterator == null) { System.Collections.Comparer comparer = System.Collections.Comparer.Default; System.Collections.SortedList newList = new System.Collections.SortedList(); if (Enclosing_Instance.fieldToReader != null) { if (Enclosing_Instance.fieldToReader.Count > 0) { int index = 0; while (comparer.Compare(Enclosing_Instance.fieldToReader.GetKey(index), field) < 0) index++; for (; index < Enclosing_Instance.fieldToReader.Count; index++) { newList.Add(Enclosing_Instance.fieldToReader.GetKey(index), Enclosing_Instance.fieldToReader[Enclosing_Instance.fieldToReader.GetKey(index)]); } } } fieldIterator = newList.Keys.GetEnumerator(); fieldIterator.MoveNext(); System.Object generatedAux = fieldIterator.Current; // Skip field to get next one } while (fieldIterator.MoveNext()) { field = ((System.String) fieldIterator.Current); termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms(new Term(field)); Term term = termEnum.Term(); if (term != null && (System.Object) term.Field() == (System.Object) field) return true; else termEnum.Close(); } return false; // no more fields }
public override Term Term() { return(in_Renamed.Term()); }
// FIXME: OG: remove hard-coded file names public static void Test() { System.IO.FileInfo file = new System.IO.FileInfo("words.txt"); System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes"); System.DateTime start = System.DateTime.Now; System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); System.IO.FileStream ws = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read); System.IO.StreamReader wr = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding); for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine()) { keys.Add(new Term("word", key)); } wr.Close(); System.DateTime end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words"); start = System.DateTime.Now; System.Random gen = new System.Random((System.Int32) 1251971); long fp = (gen.Next() & 0xF) + 1; long pp = (gen.Next() & 0xF) + 1; int[] docFreqs = new int[keys.Count]; long[] freqPointers = new long[keys.Count]; long[] proxPointers = new long[keys.Count]; for (int i = 0; i < keys.Count; i++) { docFreqs[i] = (gen.Next() & 0xF) + 1; freqPointers[i] = fp; proxPointers[i] = pp; fp += (gen.Next() & 0xF) + 1; ; pp += (gen.Next() & 0xF) + 1; ; } end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to generate values"); start = System.DateTime.Now; Directory store = FSDirectory.GetDirectory("test.store", true); FieldInfos fis = new FieldInfos(); TermInfosWriter writer = new TermInfosWriter(store, "words", fis); fis.Add("word", false); for (int i = 0; i < keys.Count; i++) { writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i])); } writer.Close(); end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to write table"); System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes"); start = System.DateTime.Now; TermInfosReader reader = new TermInfosReader(store, "words", fis); end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to open table"); start = System.DateTime.Now; SegmentTermEnum enumerator = reader.Terms(); for (int i = 0; i < keys.Count; i++) { enumerator.Next(); Term key = (Term)keys[i]; if (!key.Equals(enumerator.Term())) { throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i); } TermInfo ti = enumerator.TermInfo(); if (ti.docFreq != docFreqs[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i); } if (ti.freqPointer != freqPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i); } if (ti.proxPointer != proxPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i); } } end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words"); start = System.DateTime.Now; for (int i = 0; i < keys.Count; i++) { Term key = (Term)keys[i]; TermInfo ti = reader.Get(key); if (ti.docFreq != docFreqs[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i); } if (ti.freqPointer != freqPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i); } if (ti.proxPointer != proxPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i); } } end = System.DateTime.Now; System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count); System.Console.Out.WriteLine(" average milliseconds per lookup"); TermEnum e = reader.Terms(new Term("word", "azz")); System.Console.Out.WriteLine("Word after azz is " + e.Term().text); reader.Close(); store.Close(); }
public virtual void Seek(TermEnum termEnum) { Seek(termEnum.Term()); if (termEnum is MultiTermEnum) { tenum = (MultiTermEnum) termEnum; if (topReader != tenum.topReader) tenum = null; } }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { Status.TermIndexStatus status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc(); while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < -1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
/// <summary>Returns true if index is clean, else false.</summary> public static bool Check(Directory dir, bool doFix) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); try { sis.Read(dir); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read any segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not open segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read segment file version in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); if (skip) { out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); return(false); } SegmentInfos newSIS = (SegmentInfos)sis.Clone(); newSIS.Clear(); bool changed = false; int totLoseDocCount = 0; int numBadSegments = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); int toLoseDocCount = info.docCount; SegmentReader reader = null; try { out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile()); out_Renamed.WriteLine(" numFiles=" + info.Files().Count); out_Renamed.WriteLine(String.Format(nf, " size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset); out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment()); out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { out_Renamed.WriteLine(" no deletions"); } else { out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]"); } out_Renamed.Write(" test: open reader........."); reader = SegmentReader.Get(info); int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]"); } else { out_Renamed.WriteLine("OK"); } out_Renamed.Write(" test: fields, norms......."); System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL); System.Collections.IEnumerator it = fieldNames.Keys.GetEnumerator(); while (it.MoveNext()) { System.String fieldName = (System.String)it.Current; byte[] b = reader.Norms(fieldName); if (b.Length != info.docCount) { throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount); } } out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]"); out_Renamed.Write(" test: terms, freq, prox..."); TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this // term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); long termCount = 0; long totFreq = 0; long totPos = 0; while (termEnum.Next()) { termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos <= lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]"); out_Renamed.Write(" test: stored fields......."); int docCount = 0; long totFields = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { docCount++; Document doc = reader.Document(j); totFields += doc.GetFields().Count; } } if (docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs"); } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) })); out_Renamed.Write(" test: term vectors........"); int totVectors = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { totVectors += tfv.Length; } } } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) })); out_Renamed.WriteLine(""); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED"); System.String comment; if (doFix) { comment = "will remove reference to this segment (-fix is specified)"; } else { comment = "would remove reference to this segment (-fix was not specified)"; } out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); out_Renamed.WriteLine(""); totLoseDocCount += toLoseDocCount; numBadSegments++; changed = true; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper newSIS.Add(info.Clone()); } if (!changed) { out_Renamed.WriteLine("No problems were detected with this index.\n"); return(true); } else { out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected"); if (doFix) { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost"); } else { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified"); } out_Renamed.WriteLine(); } if (doFix) { out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for (int i = 0; i < 5; i++) { try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); i--; continue; } out_Renamed.WriteLine(" " + (5 - i) + "..."); } out_Renamed.Write("Writing..."); try { newSIS.Write(dir); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED; exiting"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } out_Renamed.WriteLine("OK"); out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\""); } else { out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]"); } out_Renamed.WriteLine(""); return(false); }
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField) { Assert.AreEqual(r1.NumDocs(), r2.NumDocs()); bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc()); int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.TermDocs(); TermDocs termDocs2 = r2.TermDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.Intern(idField); TermEnum termEnum = r1.Terms(new Term(idField, "")); do { Term term = termEnum.Term(); if (term == null || (System.Object)term.Field() != (System.Object)idField) { break; } termDocs1.Seek(termEnum); if (!termDocs1.Next()) { // This doc is deleted and wasn't replaced termDocs2.Seek(termEnum); Assert.IsFalse(termDocs2.Next()); continue; } int id1 = termDocs1.Doc(); Assert.IsFalse(termDocs1.Next()); termDocs2.Seek(termEnum); Assert.IsTrue(termDocs2.Next()); int id2 = termDocs2.Doc(); Assert.IsFalse(termDocs2.Next()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (System.Exception t) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.Console.Out.WriteLine(" d1=" + r1.Document(id1)); System.Console.Out.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2)); } catch (System.Exception e) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1); System.Console.Out.WriteLine(" d1=" + tv1); if (tv1 != null) { for (int i = 0; i < tv1.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv1[i]); } } TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2); System.Console.Out.WriteLine(" d2=" + tv2); if (tv2 != null) { for (int i = 0; i < tv2.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv2[i]); } } throw e; } }while (termEnum.Next()); termEnum.Close(); // Verify postings TermEnum termEnum1 = r1.Terms(new Term("", "")); TermEnum termEnum2 = r2.Terms(new Term("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs()]; long[] info2 = new long[r2.NumDocs()]; for (; ;) { Term term1, term2; // iterate until we get some docs int len1; for (; ;) { len1 = 0; term1 = termEnum1.Term(); if (term1 == null) { break; } termDocs1.Seek(termEnum1); while (termDocs1.Next()) { int d1 = termDocs1.Doc(); int f1 = termDocs1.Freq(); info1[len1] = (((long)d1) << 32) | f1; len1++; } if (len1 > 0) { break; } if (!termEnum1.Next()) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; term2 = termEnum2.Term(); if (term2 == null) { break; } termDocs2.Seek(termEnum2); while (termDocs2.Next()) { int d2 = termDocs2.Doc(); int f2 = termDocs2.Freq(); info2[len2] = (((long)r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) { break; } if (!termEnum2.Next()) { break; } } if (!hasDeletes) { Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq()); } Assert.AreEqual(len1, len2); if (len1 == 0) { break; // no more terms } Assert.AreEqual(term1, term2); // sort info2 to get it into ascending docid System.Array.Sort(info2, 0, len2 - 0); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i]); } termEnum1.Next(); termEnum2.Next(); } }