abstract public GetTermFreqVector ( int docNumber, String field ) : ITermFreqVector | ||
docNumber | int | document for which the term frequency vector is returned /// |
field | String | field for which the term frequency vector is returned. /// |
return | ITermFreqVector |
public virtual void TestMixedTermVectorSettingsSameField() { Document doc = new Document(); // f1 first without tv then with tv doc.Add(new Field("f1", "v1", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO)); doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); // f2 first with tv then without tv doc.Add(new Field("f2", "v1", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO)); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Close(); _TestUtil.CheckIndex(dir); IndexReader reader = IndexReader.Open(dir); // f1 TermFreqVector tfv1 = reader.GetTermFreqVector(0, "f1"); Assert.IsNotNull(tfv1); Assert.AreEqual(2, tfv1.GetTerms().Length, "the 'with_tv' setting should rule!"); // f2 TermFreqVector tfv2 = reader.GetTermFreqVector(0, "f2"); Assert.IsNotNull(tfv2); Assert.AreEqual(2, tfv2.GetTerms().Length, "the 'with_tv' setting should rule!"); }
public override TermFreqVector GetTermFreqVector(int n, System.String field) { EnsureOpen(); IndexReader reader = ((IndexReader)fieldToReader[field]); return(reader == null?null:reader.GetTermFreqVector(n, field)); }
public override ITermFreqVector GetTermFreqVector(int n, System.String field, IState state) { EnsureOpen(); IndexReader reader = (fieldToReader[field]); return(reader == null?null:reader.GetTermFreqVector(n, field, state)); }
private void TestTermVectors() { // check: int numDocs = reader.NumDocs(); long start = 0L; for (int docId = 0; docId < numDocs; docId++) { start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); ITermFreqVector[] vectors = reader.GetTermFreqVectors(docId, null); timeElapsed += (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - start; // verify vectors result VerifyVectors(vectors, docId); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); ITermFreqVector vector = reader.GetTermFreqVector(docId, "field", null); timeElapsed += (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - start; vectors = new ITermFreqVector[1]; vectors[0] = vector; VerifyVectors(vectors, docId); } }
public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) { EnsureOpen(); IndexReader reader = ((IndexReader)fieldToReader[field]); if (reader != null) { reader.GetTermFreqVector(docNumber, field, mapper); } }
public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper, IState state) { EnsureOpen(); foreach (var e in fieldToReader) { System.String field = e.Key; IndexReader reader = e.Value; reader.GetTermFreqVector(docNumber, field, mapper, state); } }
public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) { EnsureOpen(); foreach (KeyValuePair <string, IndexReader> e in fieldToReader) { System.String field = e.Key; IndexReader reader = e.Value; reader.GetTermFreqVector(docNumber, field, mapper); } }
public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) { EnsureOpen(); System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry)i.Current; System.String field = (System.String)e.Key; IndexReader reader = (IndexReader)e.Value; reader.GetTermFreqVector(docNumber, field, mapper); } }
public void Test_SegmentTermVector_IndexOf() { Lucene.Net.Store.RAMDirectory directory = new Lucene.Net.Store.RAMDirectory(); Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED); Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); document.Add(new Lucene.Net.Documents.Field("contents", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("a_ a0"))), Lucene.Net.Documents.Field.TermVector.WITH_OFFSETS)); writer.AddDocument(document); Lucene.Net.Index.IndexReader reader = writer.GetReader(); Lucene.Net.Index.TermPositionVector tpv = reader.GetTermFreqVector(0, "contents") as Lucene.Net.Index.TermPositionVector; //Console.WriteLine("tpv: " + tpv); int index = tpv.IndexOf("a_"); Assert.AreEqual(index, 1, "See the issue: LUCENENET-183"); }
/// <summary> /// A convenience method that tries a number of approaches to getting a token stream. /// The cost of finding there are no termVectors in the index is minimal (1000 invocations still /// registers 0 ms). So this "lazy" (flexible?) approach to coding is probably acceptable /// </summary> /// <returns>null if field not stored correctly</returns> public static TokenStream GetAnyTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer) { TokenStream ts = null; var tfv = reader.GetTermFreqVector(docId, field); if (tfv != null) { var termPositionVector = tfv as TermPositionVector; if (termPositionVector != null) { ts = GetTokenStream(termPositionVector); } } //No token info stored so fall back to analyzing raw content return ts ?? GetTokenStream(reader, docId, field, analyzer); }
// get all vectors public override TermFreqVector[] GetTermFreqVectors(int n) { System.Collections.ArrayList results = new System.Collections.ArrayList(); System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry)i.Current; IndexReader reader = (IndexReader)e.Key; System.String field = (System.String)e.Value; TermFreqVector vector = reader.GetTermFreqVector(n, field); if (vector != null) { results.Add(vector); } } return((TermFreqVector[])(results.ToArray(typeof(TermFreqVector)))); }
// get all vectors public override TermFreqVector[] GetTermFreqVectors(int n) { EnsureOpen(); List <TermFreqVector> results = new List <TermFreqVector>(); foreach (KeyValuePair <string, IndexReader> e in fieldToReader) { System.String field = e.Key; IndexReader reader = e.Value; TermFreqVector vector = reader.GetTermFreqVector(n, field); if (vector != null) { results.Add(vector); } } return(results.ToArray()); }
// get all vectors public override ITermFreqVector[] GetTermFreqVectors(int n, IState state) { EnsureOpen(); IList <ITermFreqVector> results = new List <ITermFreqVector>(); foreach (var e in fieldToReader) { System.String field = e.Key; IndexReader reader = e.Value; ITermFreqVector vector = reader.GetTermFreqVector(n, field, state); if (vector != null) { results.Add(vector); } } return(results.ToArray()); }
// get all vectors public override TermFreqVector[] GetTermFreqVectors(int n) { EnsureOpen(); List <TermFreqVector> results = new List <TermFreqVector>(); IEnumerator <KeyValuePair <string, IndexReader> > i = fieldToReader.GetEnumerator(); while (i.MoveNext()) { KeyValuePair <string, IndexReader> e = i.Current; string field = e.Key; IndexReader reader = e.Value; TermFreqVector vector = reader.GetTermFreqVector(n, field); if (vector != null) { results.Add(vector); } } return(results.ToArray()); }
public virtual void DoTestDocument() { sis.Read(dir); IndexReader reader = OpenReader(); Assert.IsTrue(reader != null); Document newDoc1 = reader.Document(0); Assert.IsTrue(newDoc1 != null); Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count); Document newDoc2 = reader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count); TermFreqVector vector = reader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); TestSegmentReader.CheckNorms(reader); }
public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field) { var tfv = reader.GetTermFreqVector(docId, field); if (tfv == null) { throw new ArgumentException(field + " in doc #" + docId + "does not have any term position data stored"); } if (tfv is TermPositionVector) { var tpv = (TermPositionVector) reader.GetTermFreqVector(docId, field); return GetTokenStream(tpv); } throw new ArgumentException(field + " in doc #" + docId + "does not have any term position data stored"); }
public override ITermFreqVector GetTermFreqVector(int docNumber, string field) { EnsureOpen(); return(in_Renamed.GetTermFreqVector(docNumber, field)); }
public virtual void TestMapper() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, mapper); var set_Renamed = mapper.TermVectorEntrySet; Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Check offsets and positions for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(1, mapper); set_Renamed = mapper.TermVectorEntrySet; Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Should have offsets and positions b/c we are munging all the fields together for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); var map = fsMapper.FieldToTerms; Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (var iterator = map.GetEnumerator(); iterator.MoveNext();) { var entry = iterator.Current; var sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.Field; if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } //Try mapper that ignores offs and positions fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); map = fsMapper.FieldToTerms; Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (var iterator = map.GetEnumerator(); iterator.MoveNext();) { var entry = iterator.Current; var sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.Field; if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } // test setDocumentNumber() IndexReader ir = IndexReader.Open(dir, true); DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); Assert.AreEqual(-1, docNumAwareMapper.GetDocumentNumber()); ir.GetTermFreqVector(0, docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, "f2", docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); ir.Close(); }
public override TermFreqVector GetTermFreqVector(int docNumber, System.String field) { return(in_Renamed.GetTermFreqVector(docNumber, field)); }
public override ITermFreqVector GetTermFreqVector(int docNumber, System.String field, IState state) { EnsureOpen(); return(in_Renamed.GetTermFreqVector(docNumber, field, state)); }