示例#1
0
        public virtual void  TestTermVectorsFieldOrder()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();

            doc.Add(new Field("c", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            doc.Add(new Field("a", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            doc.Add(new Field("b", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            doc.Add(new Field("x", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            writer.AddDocument(doc);
            writer.Close();
            IndexReader reader = IndexReader.Open(dir);

            TermFreqVector[] v = reader.GetTermFreqVectors(0);
            Assert.AreEqual(4, v.Length);
            System.String[] expectedFields    = new System.String[] { "a", "b", "c", "x" };
            int[]           expectedPositions = new int[] { 1, 2, 0 };
            for (int i = 0; i < v.Length; i++)
            {
                TermPositionVector posVec = (TermPositionVector)v[i];
                Assert.AreEqual(expectedFields[i], posVec.GetField());
                System.String[] terms = posVec.GetTerms();
                Assert.AreEqual(3, terms.Length);
                Assert.AreEqual("content", terms[0]);
                Assert.AreEqual("here", terms[1]);
                Assert.AreEqual("some", terms[2]);
                for (int j = 0; j < 3; j++)
                {
                    int[] positions = posVec.GetTermPositions(j);
                    Assert.AreEqual(1, positions.Length);
                    Assert.AreEqual(expectedPositions[j], positions[0]);
                }
            }
        }
示例#2
0
        /// <summary> Add a complete document specified by all its term vectors. If document has no
        /// term vectors, add value for tvx.
        ///
        /// </summary>
        /// <param name="vectors">
        /// </param>
        /// <throws>  IOException </throws>
        public void  AddAllDocVectors(TermFreqVector[] vectors)
        {
            OpenDocument();

            if (vectors != null)
            {
                for (int i = 0; i < vectors.Length; i++)
                {
                    bool storePositionWithTermVector = false;
                    bool storeOffsetWithTermVector   = false;

                    try
                    {
                        TermPositionVector tpVector = (TermPositionVector)vectors[i];

                        if (tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null)
                        {
                            storePositionWithTermVector = true;
                        }
                        if (tpVector.Size() > 0 && tpVector.GetOffsets(0) != null)
                        {
                            storeOffsetWithTermVector = true;
                        }

                        FieldInfo fieldInfo = fieldInfos.FieldInfo(tpVector.GetField());
                        OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);

                        for (int j = 0; j < tpVector.Size(); j++)
                        {
                            AddTermInternal(tpVector.GetTerms()[j], tpVector.GetTermFrequencies()[j], tpVector.GetTermPositions(j), tpVector.GetOffsets(j));
                        }

                        CloseField();
                    }
                    catch (System.InvalidCastException ignore)
                    {
                        TermFreqVector tfVector = vectors[i];

                        FieldInfo fieldInfo = fieldInfos.FieldInfo(tfVector.GetField());
                        OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);

                        for (int j = 0; j < tfVector.Size(); j++)
                        {
                            AddTermInternal(tfVector.GetTerms()[j], tfVector.GetTermFrequencies()[j], null, null);
                        }

                        CloseField();
                    }
                }
            }

            CloseDocument();
        }
示例#3
0
        public virtual void  TestMixedVectrosVectors()
        {
            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();

            doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
            doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            writer.AddDocument(doc);
            writer.Close();

            searcher = new IndexSearcher(directory);

            Query query = new TermQuery(new Term("field", "one"));

            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);

            TermFreqVector[] vector = searcher.reader_ForNUnit.GetTermFreqVectors(hits[0].doc);
            Assert.IsTrue(vector != null);
            Assert.IsTrue(vector.Length == 1);
            TermPositionVector tfv = (TermPositionVector)vector[0];

            Assert.IsTrue(tfv.GetField().Equals("field"));
            System.String[] terms = tfv.GetTerms();
            Assert.AreEqual(1, terms.Length);
            Assert.AreEqual(terms[0], "one");
            Assert.AreEqual(5, tfv.GetTermFrequencies()[0]);

            int[] positions = tfv.GetTermPositions(0);
            Assert.AreEqual(5, positions.Length);
            for (int i = 0; i < 5; i++)
            {
                Assert.AreEqual(i, positions[i]);
            }
            TermVectorOffsetInfo[] offsets = tfv.GetOffsets(0);
            Assert.AreEqual(5, offsets.Length);
            for (int i = 0; i < 5; i++)
            {
                Assert.AreEqual(4 * i, offsets[i].GetStartOffset());
                Assert.AreEqual(4 * i + 3, offsets[i].GetEndOffset());
            }
        }