public virtual void  TestPositionReader()
        {
            TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);

            Assert.IsTrue(reader != null);
            TermPositionVector vector;

            System.String[] terms;
            vector = (TermPositionVector)reader.Get(0, testFields[0]);
            Assert.IsTrue(vector != null);
            terms = vector.GetTerms();
            Assert.IsTrue(terms != null);
            Assert.IsTrue(terms.Length == testTerms.Length);
            for (int i = 0; i < terms.Length; i++)
            {
                System.String term = terms[i];
                //System.out.println("Term: " + term);
                Assert.IsTrue(term.Equals(testTerms[i]));
                int[] positions = vector.GetTermPositions(i);
                Assert.IsTrue(positions != null);
                Assert.IsTrue(positions.Length == this.positions[i].Length);
                for (int j = 0; j < positions.Length; j++)
                {
                    int position = positions[j];
                    Assert.IsTrue(position == this.positions[i][j]);
                }
                TermVectorOffsetInfo[] offset = vector.GetOffsets(i);
                Assert.IsTrue(offset != null);
                Assert.IsTrue(offset.Length == this.offsets[i].Length);
                for (int j = 0; j < offset.Length; j++)
                {
                    TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
                    Assert.IsTrue(termVectorOffsetInfo.Equals(offsets[i][j]));
                }
            }

            ITermFreqVector freqVector = reader.Get(0, testFields[1]);             //no pos, no offset

            Assert.IsTrue(freqVector != null);
            Assert.IsTrue(freqVector is TermPositionVector == false);
            terms = freqVector.GetTerms();
            Assert.IsTrue(terms != null);
            Assert.IsTrue(terms.Length == testTerms.Length);
            for (int i = 0; i < terms.Length; i++)
            {
                System.String term = terms[i];
                //System.out.println("Term: " + term);
                Assert.IsTrue(term.Equals(testTerms[i]));
            }
        }
Beispiel #2
0
        public virtual void TestDocsEnum()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));

            for (int j = 0; j < 5; j++)
            {
                Terms vector = reader.Get(j).Terms(TestFields[0]);
                Assert.IsNotNull(vector);
                Assert.AreEqual(TestTerms.Length, vector.Size());
                TermsEnum termsEnum = vector.Iterator(null);
                DocsEnum  docsEnum  = null;
                for (int i = 0; i < TestTerms.Length; i++)
                {
                    BytesRef text = termsEnum.Next();
                    Assert.IsNotNull(text);
                    string term = text.Utf8ToString();
                    //System.out.println("Term: " + term);
                    Assert.AreEqual(TestTerms[i], term);

                    docsEnum = TestUtil.Docs(Random(), termsEnum, null, docsEnum, DocsEnum.FLAG_NONE);
                    Assert.IsNotNull(docsEnum);
                    int doc = docsEnum.DocID();
                    Assert.AreEqual(-1, doc);
                    Assert.IsTrue(docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                }
                Assert.IsNull(termsEnum.Next());
            }
            reader.Dispose();
        }
Beispiel #3
0
        public virtual void TestDocsEnum()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));

            for (int j = 0; j < 5; j++)
            {
                Terms vector = reader.Get(j).GetTerms(testFields[0]);
                Assert.IsNotNull(vector);
                Assert.AreEqual(testTerms.Length, vector.Count);
                TermsEnum termsEnum = vector.GetEnumerator();
                DocsEnum  docsEnum  = null;
                for (int i = 0; i < testTerms.Length; i++)
                {
                    Assert.IsTrue(termsEnum.MoveNext());
                    BytesRef text = termsEnum.Term;
                    string   term = text.Utf8ToString();
                    //System.out.println("Term: " + term);
                    Assert.AreEqual(testTerms[i], term);

                    docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE);
                    Assert.IsNotNull(docsEnum);
                    int doc = docsEnum.DocID;
                    Assert.AreEqual(-1, doc);
                    Assert.IsTrue(docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                }
                Assert.IsFalse(termsEnum.MoveNext());
            }
            reader.Dispose();
        }
Beispiel #4
0
 public virtual void  TestBadParams()
 {
     try
     {
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         //Bad document number, good Field number
         TermFreqVector vector = reader.Get(50, testFields[0]);
         Assert.IsTrue(vector == null);
     }
     catch (System.Exception e)
     {
         Assert.IsTrue(false);
     }
     try
     {
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         //good document number, bad Field number
         TermFreqVector vector = reader.Get(0, "f50");
         Assert.IsTrue(vector == null);
     }
     catch (System.Exception e)
     {
         Assert.IsTrue(false);
     }
 }
Beispiel #5
0
 public override Fields GetTermVectors(int docID)
 {
     TermVectorsReader termVectorsReader = TermVectorsReader;
     if (termVectorsReader == null)
     {
         return null;
     }
     CheckBounds(docID);
     return termVectorsReader.Get(docID);
 }
Beispiel #6
0
        /// <summary>Return an array of term frequency vectors for the specified document.
        /// The array contains a vector for each vectorized field in the document.
        /// Each vector vector contains term numbers and frequencies for all terms
        /// in a given vectorized field.
        /// If no such fields existed, the method returns null.
        /// </summary>
        /// <throws>  IOException </throws>
        public override TermFreqVector[] GetTermFreqVectors(int docNumber)
        {
            if (termVectorsReaderOrig == null)
                return null;

            TermVectorsReader termVectorsReader = GetTermVectorsReader();
            if (termVectorsReader == null)
                return null;

            return termVectorsReader.Get(docNumber);
        }
Beispiel #7
0
        /// <summary>Return a term frequency vector for the specified document and field. The
        /// vector returned contains term numbers and frequencies for all terms in
        /// the specified field of this document, if the field had storeTermVector
        /// flag set.  If the flag was not set, the method returns null.
        /// </summary>
        /// <throws>  IOException </throws>
        public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
        {
            // Check if this field is invalid or has no stored term vector
            FieldInfo fi = fieldInfos.FieldInfo(field);
            if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
                return null;

            TermVectorsReader termVectorsReader = GetTermVectorsReader();
            if (termVectorsReader == null)
                return null;

            return termVectorsReader.Get(docNumber, field);
        }
Beispiel #8
0
        private void  CheckTermVector(TermVectorsReader reader, int docNum, System.String field)
        {
            TermFreqVector vector = reader.Get(docNum, field);

            Assert.IsTrue(vector != null);
            System.String[] terms = vector.GetTerms();
            Assert.IsTrue(terms != null);
            Assert.IsTrue(terms.Length == testTerms.Length);
            for (int i = 0; i < terms.Length; i++)
            {
                System.String term = terms[i];
                Assert.IsTrue(term.Equals(testTerms[i]));
            }
        }
Beispiel #9
0
        /// <summary>Return an array of term frequency vectors for the specified document.
        /// The array contains a vector for each vectorized field in the document.
        /// Each vector vector contains term numbers and frequencies for all terms
        /// in a given vectorized field.
        /// If no such fields existed, the method returns null.
        /// </summary>
        /// <throws>  IOException </throws>
        public override TermFreqVector[] GetTermFreqVectors(int docNumber)
        {
            if (termVectorsReaderOrig == null)
            {
                return(null);
            }

            TermVectorsReader termVectorsReader = GetTermVectorsReader();

            if (termVectorsReader == null)
            {
                return(null);
            }

            return(termVectorsReader.Get(docNumber));
        }
        public override void  GetTermFreqVector(int docNumber, TermVectorMapper mapper)
        {
            EnsureOpen();
            if (termVectorsReaderOrig == null)
            {
                return;
            }

            TermVectorsReader termVectorsReader = GetTermVectorsReader();

            if (termVectorsReader == null)
            {
                return;
            }

            termVectorsReader.Get(docNumber, mapper);
        }
Beispiel #11
0
        public virtual void TestOffsetReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));
            Terms             vector = reader.Get(0).Terms(TestFields[0]);

            Assert.IsNotNull(vector);
            TermsEnum termsEnum = vector.Iterator(null);

            Assert.IsNotNull(termsEnum);
            Assert.AreEqual(TestTerms.Length, vector.Size());
            DocsAndPositionsEnum dpEnum = null;

            for (int i = 0; i < TestTerms.Length; i++)
            {
                BytesRef text = termsEnum.Next();
                Assert.IsNotNull(text);
                string term = text.Utf8ToString();
                Assert.AreEqual(TestTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq(), Positions[i].Length);
                for (int j = 0; j < Positions[i].Length; j++)
                {
                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq(), Positions[i].Length);
                for (int j = 0; j < Positions[i].Length; j++)
                {
                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset());
                    Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }
            reader.Dispose();
        }
        public override void  GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
        {
            EnsureOpen();
            FieldInfo fi = fieldInfos.FieldInfo(field);

            if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
            {
                return;
            }

            TermVectorsReader termVectorsReader = GetTermVectorsReader();

            if (termVectorsReader == null)
            {
                return;
            }


            termVectorsReader.Get(docNumber, field, mapper);
        }
        public virtual void  TestReader()
        {
            TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);

            Assert.IsTrue(reader != null);
            for (int j = 0; j < 5; j++)
            {
                ITermFreqVector vector = reader.Get(j, testFields[0]);
                Assert.IsTrue(vector != null);
                System.String[] terms = vector.GetTerms();
                Assert.IsTrue(terms != null);
                Assert.IsTrue(terms.Length == testTerms.Length);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    Assert.IsTrue(term.Equals(testTerms[i]));
                }
            }
        }
Beispiel #14
0
        public virtual void TestReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));

            for (int j = 0; j < 5; j++)
            {
                Terms vector = reader.Get(j).Terms(TestFields[0]);
                Assert.IsNotNull(vector);
                Assert.AreEqual(TestTerms.Length, vector.Size());
                TermsEnum termsEnum = vector.Iterator(null);
                for (int i = 0; i < TestTerms.Length; i++)
                {
                    BytesRef text = termsEnum.Next();
                    Assert.IsNotNull(text);
                    string term = text.Utf8ToString();
                    //System.out.println("Term: " + term);
                    Assert.AreEqual(TestTerms[i], term);
                }
                Assert.IsNull(termsEnum.Next());
            }
            reader.Dispose();
        }
Beispiel #15
0
        public virtual void TestReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));

            for (int j = 0; j < 5; j++)
            {
                Terms vector = reader.Get(j).GetTerms(testFields[0]);
                Assert.IsNotNull(vector);
                Assert.AreEqual(testTerms.Length, vector.Count);
                TermsEnum termsEnum = vector.GetEnumerator();
                for (int i = 0; i < testTerms.Length; i++)
                {
                    Assert.IsTrue(termsEnum.MoveNext());
                    BytesRef text = termsEnum.Term;
                    string   term = text.Utf8ToString();
                    //System.out.println("Term: " + term);
                    Assert.AreEqual(testTerms[i], term);
                }
                Assert.IsFalse(termsEnum.MoveNext());
            }
            reader.Dispose();
        }
        public virtual void  TestBadParams()
        {
            var reader = new TermVectorsReader(dir, seg, fieldInfos);

            Assert.IsTrue(reader != null);
            //Bad document number, good field number
            Assert.Throws <System.IO.IOException>(() => reader.Get(50, testFields[0]));

            reader = new TermVectorsReader(dir, seg, fieldInfos);
            Assert.IsTrue(reader != null);
            //Bad document number, no field
            Assert.Throws <System.IO.IOException>(() => reader.Get(50));

            reader = new TermVectorsReader(dir, seg, fieldInfos);
            Assert.IsTrue(reader != null);
            Assert.DoesNotThrow(() =>
            {
                //good document number, bad field number
                ITermFreqVector vector = reader.Get(0, "f50");
                Assert.IsTrue(vector == null);
            });
        }
Beispiel #17
0
 public virtual void  TestReader()
 {
     try
     {
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         TermFreqVector vector = reader.Get(0, testFields[0]);
         Assert.IsTrue(vector != null);
         System.String[] terms = vector.GetTerms();
         Assert.IsTrue(terms != null);
         Assert.IsTrue(terms.Length == testTerms.Length);
         for (int i = 0; i < terms.Length; i++)
         {
             System.String term = terms[i];
             //System.out.println("Term: " + term);
             Assert.IsTrue(term.Equals(testTerms[i]));
         }
     }
     catch (System.IO.IOException e)
     {
         System.Console.Error.WriteLine(e.StackTrace);
         Assert.IsTrue(false);
     }
 }
		public virtual void  TestOffsetReader()
		{
			TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
			Assert.IsTrue(reader != null);
			TermPositionVector vector = (TermPositionVector) reader.Get(0, testFields[0]);
			Assert.IsTrue(vector != null);
			System.String[] terms = vector.GetTerms();
			Assert.IsTrue(terms != null);
			Assert.IsTrue(terms.Length == testTerms.Length);
			for (int i = 0; i < terms.Length; i++)
			{
				System.String term = terms[i];
				//System.out.println("Term: " + term);
				Assert.IsTrue(term.Equals(testTerms[i]));
				int[] positions = vector.GetTermPositions(i);
				Assert.IsTrue(positions != null);
				Assert.IsTrue(positions.Length == this.positions[i].Length);
				for (int j = 0; j < positions.Length; j++)
				{
					int position = positions[j];
					Assert.IsTrue(position == this.positions[i][j]);
				}
				TermVectorOffsetInfo[] offset = vector.GetOffsets(i);
				Assert.IsTrue(offset != null);
				Assert.IsTrue(offset.Length == this.offsets[i].Length);
				for (int j = 0; j < offset.Length; j++)
				{
					TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
					Assert.IsTrue(termVectorOffsetInfo.Equals(offsets[i][j]));
				}
			}
		}
		public virtual void  TestReader()
		{
			TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
			Assert.IsTrue(reader != null);
			for (int j = 0; j < 5; j++)
			{
				TermFreqVector vector = reader.Get(j, testFields[0]);
				Assert.IsTrue(vector != null);
				System.String[] terms = vector.GetTerms();
				Assert.IsTrue(terms != null);
				Assert.IsTrue(terms.Length == testTerms.Length);
				for (int i = 0; i < terms.Length; i++)
				{
					System.String term = terms[i];
					//System.out.println("Term: " + term);
					Assert.IsTrue(term.Equals(testTerms[i]));
				}
			}
		}
Beispiel #20
0
        public virtual void TestPositionReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));
            //BytesRef[] terms; // LUCENENET NOTE: Not used in Lucene
            Terms vector = reader.Get(0).GetTerms(testFields[0]);

            Assert.IsNotNull(vector);
            Assert.AreEqual(testTerms.Length, vector.Count);
            TermsEnum            termsEnum = vector.GetEnumerator();
            DocsAndPositionsEnum dpEnum    = null;

            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(testTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                int doc = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                doc    = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset);
                    Assert.AreEqual(j * 10 + testTerms[i].Length, dpEnum.EndOffset);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }

            Terms freqVector = reader.Get(0).GetTerms(testFields[1]); //no pos, no offset

            Assert.IsNotNull(freqVector);
            Assert.AreEqual(testTerms.Length, freqVector.Count);
            termsEnum = freqVector.GetEnumerator();
            Assert.IsNotNull(termsEnum);
            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(testTerms[i], term);
                Assert.IsNotNull(termsEnum.Docs(null, null));
                Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos
            }
            reader.Dispose();
        }
		public virtual void  TestMapper()
		{
			TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
			Assert.IsTrue(reader != null);
			SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
			reader.Get(0, mapper);
			SortedSet<TermVectorEntry> set_Renamed = mapper.GetTermVectorEntrySet();
			Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
			//three fields, 4 terms, all terms are the same
			Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
			//Check offsets and positions
            foreach(TermVectorEntry tve in set_Renamed)
            {			
				Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
				Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
				Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
			}
			
			mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
			reader.Get(1, mapper);
			set_Renamed = mapper.GetTermVectorEntrySet();
			Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
			//three fields, 4 terms, all terms are the same
			Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
			//Should have offsets and positions b/c we are munging all the fields together
            foreach(TermVectorEntry tve in set_Renamed)
			{
				Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
				Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
				Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
			}
			
			
			FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
			reader.Get(0, fsMapper);
            IDictionary<string, SortedSet<TermVectorEntry>> map = fsMapper.GetFieldToTerms();
			Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            foreach(KeyValuePair<string,SortedSet<TermVectorEntry>> entry in new Dictionary<string, SortedSet<TermVectorEntry>>(map))
			{
				SortedSet<TermVectorEntry> sortedSet = entry.Value;
				Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                foreach(TermVectorEntry tve in sortedSet)
				{
					Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
					//Check offsets and positions.
					Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
					System.String field = tve.GetField();
					if (field.Equals(testFields[0]))
					{
						//should have offsets
						
						Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
						Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
					}
					else if (field.Equals(testFields[1]))
					{
						//should not have offsets
						
						Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
						Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
					}
				}
			}
			//Try mapper that ignores offs and positions
			fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator());
			reader.Get(0, fsMapper);
			map = fsMapper.GetFieldToTerms();
			Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            foreach(KeyValuePair<string,SortedSet<TermVectorEntry>> entry in new Dictionary<string,SortedSet<TermVectorEntry>>(map))
			{
				SortedSet<TermVectorEntry> sortedSet = entry.Value;
				Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                foreach(TermVectorEntry tve in sortedSet)
				{
					Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
					//Check offsets and positions.
					Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
					System.String field = tve.GetField();
					if (field.Equals(testFields[0]))
					{
						//should have offsets
						
						Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be");
						Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be");
					}
					else if (field.Equals(testFields[1]))
					{
						//should not have offsets
						
						Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
						Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
					}
				}
			}
			
			// test setDocumentNumber()
			IndexReader ir = IndexReader.Open(dir);
			DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();
			Assert.AreEqual(- 1, docNumAwareMapper.GetDocumentNumber());
			
			ir.GetTermFreqVector(0, docNumAwareMapper);
			Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
			docNumAwareMapper.SetDocumentNumber(- 1);
			
			ir.GetTermFreqVector(1, docNumAwareMapper);
			Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
			docNumAwareMapper.SetDocumentNumber(- 1);
			
			ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
			Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
			docNumAwareMapper.SetDocumentNumber(- 1);
			
			ir.GetTermFreqVector(1, "f2", docNumAwareMapper);
			Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
			docNumAwareMapper.SetDocumentNumber(- 1);
			
			ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
			Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
			
			ir.Close();
		}
Beispiel #22
0
		private void  CheckTermVector(TermVectorsReader reader, int docNum, System.String field)
		{
			TermFreqVector vector = reader.Get(docNum, field);
			Assert.IsTrue(vector != null);
			System.String[] terms = vector.GetTerms();
			Assert.IsTrue(terms != null);
			Assert.IsTrue(terms.Length == testTerms.Length);
			for (int i = 0; i < terms.Length; i++)
			{
				System.String term = terms[i];
				Assert.IsTrue(term.Equals(testTerms[i]));
			}
		}
        public virtual void  TestMapper()
        {
            TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);

            Assert.IsTrue(reader != null);
            SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());

            reader.Get(0, mapper);
            var set_Renamed = mapper.TermVectorEntrySet;

            Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
            //three fields, 4 terms, all terms are the same
            Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
            //Check offsets and positions
            for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();)
            {
                TermVectorEntry tve = (TermVectorEntry)iterator.Current;
                Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
            }

            mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
            reader.Get(1, mapper);
            set_Renamed = mapper.TermVectorEntrySet;
            Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
            //three fields, 4 terms, all terms are the same
            Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
            //Should have offsets and positions b/c we are munging all the fields together
            for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();)
            {
                TermVectorEntry tve = (TermVectorEntry)iterator.Current;
                Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
            }


            FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());

            reader.Get(0, fsMapper);
            var map = fsMapper.FieldToTerms;

            Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            for (var iterator = map.GetEnumerator(); iterator.MoveNext();)
            {
                var entry     = iterator.Current;
                var sortedSet = entry.Value;
                Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();)
                {
                    TermVectorEntry tve = inner.Current;
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    //Check offsets and positions.
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    System.String field = tve.Field;
                    if (field.Equals(testFields[0]))
                    {
                        //should have offsets

                        Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
                    }
                    else if (field.Equals(testFields[1]))
                    {
                        //should not have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
                    }
                }
            }
            //Try mapper that ignores offs and positions
            fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator());
            reader.Get(0, fsMapper);
            map = fsMapper.FieldToTerms;
            Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            for (var iterator = map.GetEnumerator(); iterator.MoveNext();)
            {
                var entry     = iterator.Current;
                var sortedSet = entry.Value;
                Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();)
                {
                    TermVectorEntry tve = inner.Current;
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    //Check offsets and positions.
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    System.String field = tve.Field;
                    if (field.Equals(testFields[0]))
                    {
                        //should have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be");
                    }
                    else if (field.Equals(testFields[1]))
                    {
                        //should not have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
                    }
                }
            }

            // test setDocumentNumber()
            IndexReader       ir = IndexReader.Open(dir, true);
            DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();

            Assert.AreEqual(-1, docNumAwareMapper.GetDocumentNumber());

            ir.GetTermFreqVector(0, docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(1, docNumAwareMapper);
            Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(1, "f2", docNumAwareMapper);
            Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());

            ir.Close();
        }
		public virtual void  TestBadParams()
		{
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				//Bad document number, good field number
				reader.Get(50, testFields[0]);
				Assert.Fail();
			}
			catch (System.IO.IOException e)
			{
				// expected exception
			}
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				//Bad document number, no field
				reader.Get(50);
				Assert.Fail();
			}
			catch (System.IO.IOException e)
			{
				// expected exception
			}
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				//good document number, bad field number
				TermFreqVector vector = reader.Get(0, "f50");
				Assert.IsTrue(vector == null);
			}
			catch (System.IO.IOException e)
			{
				Assert.Fail();
			}
		}
Beispiel #25
0
		public virtual void  TestReader()
		{
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				TermFreqVector vector = reader.Get(0, testFields[0]);
				Assert.IsTrue(vector != null);
				System.String[] terms = vector.GetTerms();
				Assert.IsTrue(terms != null);
				Assert.IsTrue(terms.Length == testTerms.Length);
				for (int i = 0; i < terms.Length; i++)
				{
					System.String term = terms[i];
					//System.out.println("Term: " + term);
					Assert.IsTrue(term.Equals(testTerms[i]));
				}
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}