public virtual void TestTermVectorsFieldOrder() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("c", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("a", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("b", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("x", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); TermFreqVector[] v = reader.GetTermFreqVectors(0); Assert.AreEqual(4, v.Length); System.String[] expectedFields = new System.String[] { "a", "b", "c", "x" }; int[] expectedPositions = new int[] { 1, 2, 0 }; for (int i = 0; i < v.Length; i++) { TermPositionVector posVec = (TermPositionVector)v[i]; Assert.AreEqual(expectedFields[i], posVec.GetField()); System.String[] terms = posVec.GetTerms(); Assert.AreEqual(3, terms.Length); Assert.AreEqual("content", terms[0]); Assert.AreEqual("here", terms[1]); Assert.AreEqual("some", terms[2]); for (int j = 0; j < 3; j++) { int[] positions = posVec.GetTermPositions(j); Assert.AreEqual(1, positions.Length); Assert.AreEqual(expectedPositions[j], positions[0]); } } }
/// <summary> Add a complete document specified by all its term vectors. If document has no /// term vectors, add value for tvx. /// /// </summary> /// <param name="vectors"> /// </param> /// <throws> IOException </throws> public void AddAllDocVectors(TermFreqVector[] vectors) { OpenDocument(); if (vectors != null) { for (int i = 0; i < vectors.Length; i++) { bool storePositionWithTermVector = false; bool storeOffsetWithTermVector = false; try { TermPositionVector tpVector = (TermPositionVector)vectors[i]; if (tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null) { storePositionWithTermVector = true; } if (tpVector.Size() > 0 && tpVector.GetOffsets(0) != null) { storeOffsetWithTermVector = true; } FieldInfo fieldInfo = fieldInfos.FieldInfo(tpVector.GetField()); OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tpVector.Size(); j++) { AddTermInternal(tpVector.GetTerms()[j], tpVector.GetTermFrequencies()[j], tpVector.GetTermPositions(j), tpVector.GetOffsets(j)); } CloseField(); } catch (System.InvalidCastException ignore) { TermFreqVector tfVector = vectors[i]; FieldInfo fieldInfo = fieldInfos.FieldInfo(tfVector.GetField()); OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tfVector.Size(); j++) { AddTermInternal(tfVector.GetTerms()[j], tfVector.GetTermFrequencies()[j], null, null); } CloseField(); } } } CloseDocument(); }
public virtual void TestMixedVectrosVectors() { IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.Close(); searcher = new IndexSearcher(directory); Query query = new TermQuery(new Term("field", "one")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); TermFreqVector[] vector = searcher.reader_ForNUnit.GetTermFreqVectors(hits[0].doc); Assert.IsTrue(vector != null); Assert.IsTrue(vector.Length == 1); TermPositionVector tfv = (TermPositionVector)vector[0]; Assert.IsTrue(tfv.GetField().Equals("field")); System.String[] terms = tfv.GetTerms(); Assert.AreEqual(1, terms.Length); Assert.AreEqual(terms[0], "one"); Assert.AreEqual(5, tfv.GetTermFrequencies()[0]); int[] positions = tfv.GetTermPositions(0); Assert.AreEqual(5, positions.Length); for (int i = 0; i < 5; i++) { Assert.AreEqual(i, positions[i]); } TermVectorOffsetInfo[] offsets = tfv.GetOffsets(0); Assert.AreEqual(5, offsets.Length); for (int i = 0; i < 5; i++) { Assert.AreEqual(4 * i, offsets[i].GetStartOffset()); Assert.AreEqual(4 * i + 3, offsets[i].GetEndOffset()); } }