GetTermFreqVector() abstract public method

Return a term frequency vector for the specified document and field. The returned vector contains terms and frequencies for the terms in the specified field of this document, if the field had the storeTermVector flag set. If termvectors had been stored with positions or offsets, a TermPositionVector is returned.
abstract public GetTermFreqVector ( int docNumber, String field ) : ITermFreqVector
docNumber int document for which the term frequency vector is returned ///
field String field for which the term frequency vector is returned. ///
return ITermFreqVector
Example #1
0
        public virtual void  TestMixedTermVectorSettingsSameField()
        {
            Document doc = new Document();

            // f1 first without tv then with tv
            doc.Add(new Field("f1", "v1", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO));
            doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
            // f2 first with tv then without tv
            doc.Add(new Field("f2", "v1", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
            doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO));

            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Close();

            _TestUtil.CheckIndex(dir);

            IndexReader reader = IndexReader.Open(dir);
            // f1
            TermFreqVector tfv1 = reader.GetTermFreqVector(0, "f1");

            Assert.IsNotNull(tfv1);
            Assert.AreEqual(2, tfv1.GetTerms().Length, "the 'with_tv' setting should rule!");
            // f2
            TermFreqVector tfv2 = reader.GetTermFreqVector(0, "f2");

            Assert.IsNotNull(tfv2);
            Assert.AreEqual(2, tfv2.GetTerms().Length, "the 'with_tv' setting should rule!");
        }
Example #2
0
        public override TermFreqVector GetTermFreqVector(int n, System.String field)
        {
            EnsureOpen();
            IndexReader reader = ((IndexReader)fieldToReader[field]);

            return(reader == null?null:reader.GetTermFreqVector(n, field));
        }
Example #3
0
        public override ITermFreqVector GetTermFreqVector(int n, System.String field, IState state)
        {
            EnsureOpen();
            IndexReader reader = (fieldToReader[field]);

            return(reader == null?null:reader.GetTermFreqVector(n, field, state));
        }
        private void  TestTermVectors()
        {
            // check:
            int  numDocs = reader.NumDocs();
            long start   = 0L;

            for (int docId = 0; docId < numDocs; docId++)
            {
                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                ITermFreqVector[] vectors = reader.GetTermFreqVectors(docId, null);
                timeElapsed += (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - start;

                // verify vectors result
                VerifyVectors(vectors, docId);

                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                ITermFreqVector vector = reader.GetTermFreqVector(docId, "field", null);
                timeElapsed += (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - start;

                vectors    = new ITermFreqVector[1];
                vectors[0] = vector;

                VerifyVectors(vectors, docId);
            }
        }
Example #5
0
        public override void  GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
        {
            EnsureOpen();
            IndexReader reader = ((IndexReader)fieldToReader[field]);

            if (reader != null)
            {
                reader.GetTermFreqVector(docNumber, field, mapper);
            }
        }
Example #6
0
        public override void  GetTermFreqVector(int docNumber, TermVectorMapper mapper, IState state)
        {
            EnsureOpen();

            foreach (var e in fieldToReader)
            {
                System.String field  = e.Key;
                IndexReader   reader = e.Value;
                reader.GetTermFreqVector(docNumber, field, mapper, state);
            }
        }
Example #7
0
        public override void  GetTermFreqVector(int docNumber, TermVectorMapper mapper)
        {
            EnsureOpen();

            foreach (KeyValuePair <string, IndexReader> e in fieldToReader)
            {
                System.String field  = e.Key;
                IndexReader   reader = e.Value;
                reader.GetTermFreqVector(docNumber, field, mapper);
            }
        }
Example #8
0
        public override void  GetTermFreqVector(int docNumber, TermVectorMapper mapper)
        {
            EnsureOpen();

            System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator();
            while (i.MoveNext())
            {
                System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry)i.Current;
                System.String field  = (System.String)e.Key;
                IndexReader   reader = (IndexReader)e.Value;
                reader.GetTermFreqVector(docNumber, field, mapper);
            }
        }
Example #9
0
        public void Test_SegmentTermVector_IndexOf()
        {
            Lucene.Net.Store.RAMDirectory directory = new Lucene.Net.Store.RAMDirectory();
            Lucene.Net.Analysis.Analyzer  analyzer  = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            Lucene.Net.Index.IndexWriter  writer    = new Lucene.Net.Index.IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED);
            Lucene.Net.Documents.Document document  = new Lucene.Net.Documents.Document();
            document.Add(new Lucene.Net.Documents.Field("contents", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("a_ a0"))), Lucene.Net.Documents.Field.TermVector.WITH_OFFSETS));
            writer.AddDocument(document);
            Lucene.Net.Index.IndexReader        reader = writer.GetReader();
            Lucene.Net.Index.TermPositionVector tpv    = reader.GetTermFreqVector(0, "contents") as Lucene.Net.Index.TermPositionVector;
            //Console.WriteLine("tpv: " + tpv);
            int index = tpv.IndexOf("a_");

            Assert.AreEqual(index, 1, "See the issue: LUCENENET-183");
        }
Example #10
0
        /// <summary>
        /// A convenience method that tries a number of approaches to getting a token stream.
        /// The cost of finding there are no termVectors in the index is minimal (1000 invocations still 
        /// registers 0 ms). So this "lazy" (flexible?) approach to coding is probably acceptable
        /// </summary>
        /// <returns>null if field not stored correctly</returns>
        public static TokenStream GetAnyTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer)
        {
            TokenStream ts = null;

            var tfv = reader.GetTermFreqVector(docId, field);
            if (tfv != null)
            {
                var termPositionVector = tfv as TermPositionVector;
                if (termPositionVector != null)
                {
                    ts = GetTokenStream(termPositionVector);
                }
            }
            //No token info stored so fall back to analyzing raw content
            return ts ?? GetTokenStream(reader, docId, field, analyzer);
        }
Example #11
0
 // get all vectors
 public override TermFreqVector[] GetTermFreqVectors(int n)
 {
     System.Collections.ArrayList   results = new System.Collections.ArrayList();
     System.Collections.IEnumerator i       = new System.Collections.Hashtable(fieldToReader).GetEnumerator();
     while (i.MoveNext())
     {
         System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry)i.Current;
         IndexReader    reader = (IndexReader)e.Key;
         System.String  field  = (System.String)e.Value;
         TermFreqVector vector = reader.GetTermFreqVector(n, field);
         if (vector != null)
         {
             results.Add(vector);
         }
     }
     return((TermFreqVector[])(results.ToArray(typeof(TermFreqVector))));
 }
Example #12
0
        // get all vectors
        public override TermFreqVector[] GetTermFreqVectors(int n)
        {
            EnsureOpen();
            List <TermFreqVector> results = new List <TermFreqVector>();

            foreach (KeyValuePair <string, IndexReader> e in fieldToReader)
            {
                System.String  field  = e.Key;
                IndexReader    reader = e.Value;
                TermFreqVector vector = reader.GetTermFreqVector(n, field);
                if (vector != null)
                {
                    results.Add(vector);
                }
            }
            return(results.ToArray());
        }
Example #13
0
        // get all vectors
        public override ITermFreqVector[] GetTermFreqVectors(int n, IState state)
        {
            EnsureOpen();
            IList <ITermFreqVector> results = new List <ITermFreqVector>();

            foreach (var e in fieldToReader)
            {
                System.String field  = e.Key;
                IndexReader   reader = e.Value;

                ITermFreqVector vector = reader.GetTermFreqVector(n, field, state);
                if (vector != null)
                {
                    results.Add(vector);
                }
            }
            return(results.ToArray());
        }
        // get all vectors
        public override TermFreqVector[] GetTermFreqVectors(int n)
        {
            EnsureOpen();
            List <TermFreqVector> results = new List <TermFreqVector>();
            IEnumerator <KeyValuePair <string, IndexReader> > i = fieldToReader.GetEnumerator();

            while (i.MoveNext())
            {
                KeyValuePair <string, IndexReader> e = i.Current;
                string         field  = e.Key;
                IndexReader    reader = e.Value;
                TermFreqVector vector = reader.GetTermFreqVector(n, field);
                if (vector != null)
                {
                    results.Add(vector);
                }
            }
            return(results.ToArray());
        }
        public virtual void  DoTestDocument()
        {
            sis.Read(dir);
            IndexReader reader = OpenReader();

            Assert.IsTrue(reader != null);
            Document newDoc1 = reader.Document(0);

            Assert.IsTrue(newDoc1 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count);
            Document newDoc2 = reader.Document(1);

            Assert.IsTrue(newDoc2 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count);
            TermFreqVector vector = reader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsTrue(vector != null);
            TestSegmentReader.CheckNorms(reader);
        }
Example #16
0
 public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field)
 {
     var tfv = reader.GetTermFreqVector(docId, field);
     if (tfv == null)
     {
         throw new ArgumentException(field + " in doc #" + docId
                                     + "does not have any term position data stored");
     }
     if (tfv is TermPositionVector)
     {
         var tpv = (TermPositionVector) reader.GetTermFreqVector(docId, field);
         return GetTokenStream(tpv);
     }
     throw new ArgumentException(field + " in doc #" + docId
                                 + "does not have any term position data stored");
 }
Example #17
0
 public override ITermFreqVector GetTermFreqVector(int docNumber, string field)
 {
     EnsureOpen();
     return(in_Renamed.GetTermFreqVector(docNumber, field));
 }
        public virtual void  TestMapper()
        {
            TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);

            Assert.IsTrue(reader != null);
            SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());

            reader.Get(0, mapper);
            var set_Renamed = mapper.TermVectorEntrySet;

            Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
            //three fields, 4 terms, all terms are the same
            Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
            //Check offsets and positions
            for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();)
            {
                TermVectorEntry tve = (TermVectorEntry)iterator.Current;
                Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
            }

            mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
            reader.Get(1, mapper);
            set_Renamed = mapper.TermVectorEntrySet;
            Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
            //three fields, 4 terms, all terms are the same
            Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
            //Should have offsets and positions b/c we are munging all the fields together
            for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();)
            {
                TermVectorEntry tve = (TermVectorEntry)iterator.Current;
                Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
            }


            FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());

            reader.Get(0, fsMapper);
            var map = fsMapper.FieldToTerms;

            Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            for (var iterator = map.GetEnumerator(); iterator.MoveNext();)
            {
                var entry     = iterator.Current;
                var sortedSet = entry.Value;
                Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();)
                {
                    TermVectorEntry tve = inner.Current;
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    //Check offsets and positions.
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    System.String field = tve.Field;
                    if (field.Equals(testFields[0]))
                    {
                        //should have offsets

                        Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
                    }
                    else if (field.Equals(testFields[1]))
                    {
                        //should not have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
                    }
                }
            }
            //Try mapper that ignores offs and positions
            fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator());
            reader.Get(0, fsMapper);
            map = fsMapper.FieldToTerms;
            Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            for (var iterator = map.GetEnumerator(); iterator.MoveNext();)
            {
                var entry     = iterator.Current;
                var sortedSet = entry.Value;
                Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();)
                {
                    TermVectorEntry tve = inner.Current;
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    //Check offsets and positions.
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    System.String field = tve.Field;
                    if (field.Equals(testFields[0]))
                    {
                        //should have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be");
                    }
                    else if (field.Equals(testFields[1]))
                    {
                        //should not have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
                    }
                }
            }

            // test setDocumentNumber()
            IndexReader       ir = IndexReader.Open(dir, true);
            DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();

            Assert.AreEqual(-1, docNumAwareMapper.GetDocumentNumber());

            ir.GetTermFreqVector(0, docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(1, docNumAwareMapper);
            Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(1, "f2", docNumAwareMapper);
            Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());

            ir.Close();
        }
Example #19
0
 public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
 {
     return(in_Renamed.GetTermFreqVector(docNumber, field));
 }
Example #20
0
 public override ITermFreqVector GetTermFreqVector(int docNumber, System.String field, IState state)
 {
     EnsureOpen();
     return(in_Renamed.GetTermFreqVector(docNumber, field, state));
 }