Exemple #1
0
        /// <summary>Return a term frequency vector for the specified document and field. The
        /// vector returned contains term numbers and frequencies for all terms in
        /// the specified field of this document, if the field had storeTermVector
        /// flag set.  If the flag was not set, the method returns null.
        /// </summary>
        /// <throws>  IOException </throws>
        public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
        {
            // Check if this field is invalid or has no stored term vector
            EnsureOpen();
            FieldInfo fi = fieldInfos.FieldInfo(field);

            if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
            {
                return(null);
            }

            TermVectorsReader termVectorsReader = GetTermVectorsReader();

            if (termVectorsReader == null)
            {
                return(null);
            }

            return(termVectorsReader.Get(docNumber, field));
        }
        public virtual void  TestReader()
        {
            TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos, null);

            Assert.IsTrue(reader != null);
            for (int j = 0; j < 5; j++)
            {
                ITermFreqVector vector = reader.Get(j, testFields[0], null);
                Assert.IsTrue(vector != null);
                System.String[] terms = vector.GetTerms();
                Assert.IsTrue(terms != null);
                Assert.IsTrue(terms.Length == testTerms.Length);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    Assert.IsTrue(term.Equals(testTerms[i]));
                }
            }
        }
        public virtual void  TestBadParams()
        {
            var reader = new TermVectorsReader(dir, seg, fieldInfos);

            Assert.IsTrue(reader != null);
            //Bad document number, good field number
            Assert.Throws <System.IO.IOException>(() => reader.Get(50, testFields[0]));

            reader = new TermVectorsReader(dir, seg, fieldInfos);
            Assert.IsTrue(reader != null);
            //Bad document number, no field
            Assert.Throws <System.IO.IOException>(() => reader.Get(50));

            reader = new TermVectorsReader(dir, seg, fieldInfos);
            Assert.IsTrue(reader != null);
            Assert.DoesNotThrow(() =>
            {
                //good document number, bad field number
                ITermFreqVector vector = reader.Get(0, "f50");
                Assert.IsTrue(vector == null);
            });
        }
 public virtual void  TestBadParams()
 {
     try
     {
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         //Bad document number, good field number
         reader.Get(50, testFields[0]);
         Assert.Fail();
     }
     catch (System.IO.IOException e)
     {
         // expected exception
     }
     try
     {
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         //Bad document number, no field
         reader.Get(50);
         Assert.Fail();
     }
     catch (System.IO.IOException e)
     {
         // expected exception
     }
     try
     {
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         //good document number, bad field number
         TermFreqVector vector = reader.Get(0, "f50");
         Assert.IsTrue(vector == null);
     }
     catch (System.IO.IOException e)
     {
         Assert.Fail();
     }
 }
Exemple #5
0
        public virtual void TestReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));

            for (int j = 0; j < 5; j++)
            {
                Terms vector = reader.Get(j).GetTerms(testFields[0]);
                Assert.IsNotNull(vector);
                Assert.AreEqual(testTerms.Length, vector.Count);
                TermsEnum termsEnum = vector.GetEnumerator();
                for (int i = 0; i < testTerms.Length; i++)
                {
                    Assert.IsTrue(termsEnum.MoveNext());
                    BytesRef text = termsEnum.Term;
                    string   term = text.Utf8ToString();
                    //System.out.println("Term: " + term);
                    Assert.AreEqual(testTerms[i], term);
                }
                Assert.IsFalse(termsEnum.MoveNext());
            }
            reader.Dispose();
        }
        public virtual void TestReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));

            for (int j = 0; j < 5; j++)
            {
                Terms vector = reader.Get(j).Terms(TestFields[0]);
                Assert.IsNotNull(vector);
                Assert.AreEqual(TestTerms.Length, vector.Size());
                TermsEnum termsEnum = vector.Iterator(null);
                for (int i = 0; i < TestTerms.Length; i++)
                {
                    BytesRef text = termsEnum.Next();
                    Assert.IsNotNull(text);
                    string term = text.Utf8ToString();
                    //System.out.println("Term: " + term);
                    Assert.AreEqual(TestTerms[i], term);
                }
                Assert.IsNull(termsEnum.Next());
            }
            reader.Dispose();
        }
Exemple #7
0
        public virtual System.Object Clone()
        {
            if (tvx == null || tvd == null || tvf == null)
            {
                return(null);
            }

            TermVectorsReader clone = null;

            try
            {
                clone = (TermVectorsReader)base.MemberwiseClone();
            }
            catch (System.Exception)
            {
            }

            clone.tvx = (IndexInput)tvx.Clone();
            clone.tvd = (IndexInput)tvd.Clone();
            clone.tvf = (IndexInput)tvf.Clone();

            return(clone);
        }
Exemple #8
0
        private void  Initialize(SegmentInfo si)
        {
            segment = si.name;

            // Use compound file directory for some files, if it exists
            Directory cfsDir = Directory();

            if (Directory().FileExists(segment + ".cfs"))
            {
                cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
                cfsDir    = cfsReader;
            }

            // No compound file exists - use the multi-file format
            fieldInfos   = new FieldInfos(cfsDir, segment + ".fnm");
            fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

            tis = new TermInfosReader(cfsDir, segment, fieldInfos);

            // NOTE: the bitvector is stored using the regular directory, not cfs
            if (HasDeletions(si))
            {
                deletedDocs = new BitVector(Directory(), segment + ".del");
            }

            // make sure that all index files have been read or are kept open
            // so that if an index update removes them we'll still have them
            freqStream = cfsDir.OpenInput(segment + ".frq");
            proxStream = cfsDir.OpenInput(segment + ".prx");
            OpenNorms(cfsDir);

            if (fieldInfos.HasVectors())
            {
                // open term vector files only as needed
                termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
            }
        }
        public override void CheckIntegrity()
        {
            EnsureOpen();

            // stored fields
            FieldsReader.CheckIntegrity();

            // term vectors
            TermVectorsReader termVectorsReader = TermVectorsReader;

            if (termVectorsReader != null)
            {
                termVectorsReader.CheckIntegrity();
            }

            // terms/postings
            if (core.fields != null)
            {
                core.fields.CheckIntegrity();
            }

            // norms
            if (core.normsProducer != null)
            {
                core.normsProducer.CheckIntegrity();
            }

            // docvalues
            if (dvProducers != null)
            {
                foreach (DocValuesProducer producer in dvProducers)
                {
                    producer.CheckIntegrity();
                }
            }
        }
Exemple #10
0
 public virtual void  TestReader()
 {
     try
     {
         TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
         Assert.IsTrue(reader != null);
         TermFreqVector vector = reader.Get(0, testFields[0]);
         Assert.IsTrue(vector != null);
         System.String[] terms = vector.GetTerms();
         Assert.IsTrue(terms != null);
         Assert.IsTrue(terms.Length == testTerms.Length);
         for (int i = 0; i < terms.Length; i++)
         {
             System.String term = terms[i];
             //System.out.println("Term: " + term);
             Assert.IsTrue(term.Equals(testTerms[i]));
         }
     }
     catch (System.IO.IOException e)
     {
         System.Console.Error.WriteLine(e.StackTrace);
         Assert.IsTrue(false);
     }
 }
Exemple #11
0
        public virtual void TestPositionReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));
            //BytesRef[] terms; // LUCENENET NOTE: Not used in Lucene
            Terms vector = reader.Get(0).GetTerms(testFields[0]);

            Assert.IsNotNull(vector);
            Assert.AreEqual(testTerms.Length, vector.Count);
            TermsEnum            termsEnum = vector.GetEnumerator();
            DocsAndPositionsEnum dpEnum    = null;

            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(testTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                int doc = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                doc    = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset);
                    Assert.AreEqual(j * 10 + testTerms[i].Length, dpEnum.EndOffset);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }

            Terms freqVector = reader.Get(0).GetTerms(testFields[1]); //no pos, no offset

            Assert.IsNotNull(freqVector);
            Assert.AreEqual(testTerms.Length, freqVector.Count);
            termsEnum = freqVector.GetEnumerator();
            Assert.IsNotNull(termsEnum);
            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(testTerms[i], term);
                Assert.IsNotNull(termsEnum.Docs(null, null));
                Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos
            }
            reader.Dispose();
        }
		public virtual void  TestWriter()
		{
			try
			{
				TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
				writer.OpenDocument();
				Assert.IsTrue(writer.IsDocumentOpen() == true);
				WriteField(writer, testFields[0]);
				writer.CloseDocument();
				writer.Close();
				Assert.IsTrue(writer.IsDocumentOpen() == false);
				//Check to see the files were created
				Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVD_EXTENSION));
				Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVX_EXTENSION));
				//Now read it back in
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				CheckTermVector(reader, 0, testFields[0]);
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
		public virtual void  TestReader()
		{
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				TermFreqVector vector = reader.Get(0, testFields[0]);
				Assert.IsTrue(vector != null);
				System.String[] terms = vector.GetTerms();
				Assert.IsTrue(terms != null);
				Assert.IsTrue(terms.Length == testTerms.Length);
				for (int i = 0; i < terms.Length; i++)
				{
					System.String term = terms[i];
					//System.out.println("Term: " + term);
					Assert.IsTrue(term.Equals(testTerms[i]));
				}
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
		public virtual void  TestMapper()
		{
			TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
			Assert.IsTrue(reader != null);
			SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
			reader.Get(0, mapper);
			SortedSet<TermVectorEntry> set_Renamed = mapper.GetTermVectorEntrySet();
			Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
			//three fields, 4 terms, all terms are the same
			Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
			//Check offsets and positions
            foreach(TermVectorEntry tve in set_Renamed)
            {			
				Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
				Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
				Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
			}
			
			mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
			reader.Get(1, mapper);
			set_Renamed = mapper.GetTermVectorEntrySet();
			Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
			//three fields, 4 terms, all terms are the same
			Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
			//Should have offsets and positions b/c we are munging all the fields together
            foreach(TermVectorEntry tve in set_Renamed)
			{
				Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
				Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
				Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
			}
			
			
			FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
			reader.Get(0, fsMapper);
            IDictionary<string, SortedSet<TermVectorEntry>> map = fsMapper.GetFieldToTerms();
			Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            foreach(KeyValuePair<string,SortedSet<TermVectorEntry>> entry in new Dictionary<string, SortedSet<TermVectorEntry>>(map))
			{
				SortedSet<TermVectorEntry> sortedSet = entry.Value;
				Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                foreach(TermVectorEntry tve in sortedSet)
				{
					Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
					//Check offsets and positions.
					Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
					System.String field = tve.GetField();
					if (field.Equals(testFields[0]))
					{
						//should have offsets
						
						Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
						Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
					}
					else if (field.Equals(testFields[1]))
					{
						//should not have offsets
						
						Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
						Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
					}
				}
			}
			//Try mapper that ignores offs and positions
			fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator());
			reader.Get(0, fsMapper);
			map = fsMapper.GetFieldToTerms();
			Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            foreach(KeyValuePair<string,SortedSet<TermVectorEntry>> entry in new Dictionary<string,SortedSet<TermVectorEntry>>(map))
			{
				SortedSet<TermVectorEntry> sortedSet = entry.Value;
				Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                foreach(TermVectorEntry tve in sortedSet)
				{
					Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
					//Check offsets and positions.
					Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
					System.String field = tve.GetField();
					if (field.Equals(testFields[0]))
					{
						//should have offsets
						
						Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be");
						Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be");
					}
					else if (field.Equals(testFields[1]))
					{
						//should not have offsets
						
						Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
						Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
					}
				}
			}
			
			// test setDocumentNumber()
			IndexReader ir = IndexReader.Open(dir);
			DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();
			Assert.AreEqual(- 1, docNumAwareMapper.GetDocumentNumber());
			
			ir.GetTermFreqVector(0, docNumAwareMapper);
			Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
			docNumAwareMapper.SetDocumentNumber(- 1);
			
			ir.GetTermFreqVector(1, docNumAwareMapper);
			Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
			docNumAwareMapper.SetDocumentNumber(- 1);
			
			ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
			Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
			docNumAwareMapper.SetDocumentNumber(- 1);
			
			ir.GetTermFreqVector(1, "f2", docNumAwareMapper);
			Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
			docNumAwareMapper.SetDocumentNumber(- 1);
			
			ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
			Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
			
			ir.Close();
		}
		public virtual void  TestReader()
		{
			TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
			Assert.IsTrue(reader != null);
			for (int j = 0; j < 5; j++)
			{
				TermFreqVector vector = reader.Get(j, testFields[0]);
				Assert.IsTrue(vector != null);
				System.String[] terms = vector.GetTerms();
				Assert.IsTrue(terms != null);
				Assert.IsTrue(terms.Length == testTerms.Length);
				for (int i = 0; i < terms.Length; i++)
				{
					System.String term = terms[i];
					//System.out.println("Term: " + term);
					Assert.IsTrue(term.Equals(testTerms[i]));
				}
			}
		}
Exemple #16
0
		private void  CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc();
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				int docCount = 0;
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
Exemple #17
0
			internal void  OpenDocStores(SegmentInfo si)
			{
				lock (this)
				{
					
					System.Diagnostics.Debug.Assert(si.name.Equals(segment));
					
					if (fieldsReaderOrig == null)
					{
						Directory storeDir;
						if (si.GetDocStoreOffset() != - 1)
						{
							if (si.GetDocStoreIsCompoundFile())
							{
								System.Diagnostics.Debug.Assert(storeCFSReader == null);
								storeCFSReader = new CompoundFileReader(dir, si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
								storeDir = storeCFSReader;
								System.Diagnostics.Debug.Assert(storeDir != null);
							}
							else
							{
								storeDir = dir;
								System.Diagnostics.Debug.Assert(storeDir != null);
							}
						}
						else if (si.GetUseCompoundFile())
						{
							// In some cases, we were originally opened when CFS
							// was not used, but then we are asked to open doc
							// stores after the segment has switched to CFS
							if (cfsReader == null)
							{
								cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
							}
							storeDir = cfsReader;
							System.Diagnostics.Debug.Assert(storeDir != null);
						}
						else
						{
							storeDir = dir;
							System.Diagnostics.Debug.Assert(storeDir != null);
						}
						
						System.String storesSegment;
						if (si.GetDocStoreOffset() != - 1)
						{
							storesSegment = si.GetDocStoreSegment();
						}
						else
						{
							storesSegment = segment;
						}
						
						fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
						
						// Verify two sources of "maxDoc" agree:
						if (si.GetDocStoreOffset() == - 1 && fieldsReaderOrig.Size() != si.docCount)
						{
							throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount);
						}
						
						if (fieldInfos.HasVectors())
						{
							// open term vector files only as needed
							termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
						}
					}
				}
			}
        public virtual void TestPositionReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));

            BytesRef[] terms;
            Terms      vector = reader.Get(0).Terms(TestFields[0]);

            Assert.IsNotNull(vector);
            Assert.AreEqual(TestTerms.Length, vector.Size());
            TermsEnum            termsEnum = vector.Iterator(null);
            DocsAndPositionsEnum dpEnum    = null;

            for (int i = 0; i < TestTerms.Length; i++)
            {
                BytesRef text = termsEnum.Next();
                Assert.IsNotNull(text);
                string term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(TestTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                int doc = dpEnum.DocID();
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq(), Positions[i].Length);
                for (int j = 0; j < Positions[i].Length; j++)
                {
                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                doc    = dpEnum.DocID();
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq(), Positions[i].Length);
                for (int j = 0; j < Positions[i].Length; j++)
                {
                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset());
                    Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }

            Terms freqVector = reader.Get(0).Terms(TestFields[1]); //no pos, no offset

            Assert.IsNotNull(freqVector);
            Assert.AreEqual(TestTerms.Length, freqVector.Size());
            termsEnum = freqVector.Iterator(null);
            Assert.IsNotNull(termsEnum);
            for (int i = 0; i < TestTerms.Length; i++)
            {
                BytesRef text = termsEnum.Next();
                Assert.IsNotNull(text);
                string term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(TestTerms[i], term);
                Assert.IsNotNull(termsEnum.Docs(null, null));
                Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos
            }
            reader.Dispose();
        }
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = CfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    CfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos_Renamed;

                this.TermsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat();
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                Fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(Fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms())
                {
                    NormsProducer = codec.NormsFormat().NormsProducer(segmentReadState);
                    Debug.Assert(NormsProducer != null);
                }
                else
                {
                    NormsProducer = null;
                }

                StoredFieldsFormat sff = si.Info.Codec.StoredFieldsFormat();

                try
                {
                    FieldsReaderOrig = sff.FieldsReader(cfsDir, si.Info, fieldInfos, context);
                }
                catch (System.AccessViolationException ave)
                {
                }

                //FieldsReaderOrig = si.Info.Codec.StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors()) // open term vector files only as needed
                {
                    TermVectorsReaderOrig = si.Info.Codec.TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    TermVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
Exemple #20
0
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(()
                                                                               => (StoredFieldsReader)fieldsReaderOrig.Clone());
            termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(()
                                                                             => (termVectorsReaderOrig is null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone());

            if (termsIndexDivisor == 0)
            {
                throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fields != null);
                }
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(normsProducer != null);
                    }
                }
                else
                {
                    normsProducer = null;
                }

                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
Exemple #21
0
        private void  Initialize(SegmentInfo si)
        {
            segment = si.name;
            this.si = si;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
                    cfsDir    = cfsReader;
                }

                // No compound file exists - use the multi-file format
                fieldInfos   = new FieldInfos(cfsDir, segment + ".fnm");
                fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

                // Verify two sources of "maxDoc" agree:
                if (fieldsReader.Size() != si.docCount)
                {
                    throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos);

                // NOTE: the bitvector is stored using the regular directory, not cfs
                if (HasDeletions(si))
                {
                    deletedDocs = new BitVector(Directory(), si.GetDelFileName());

                    // Verify # deletes does not exceed maxDoc for this segment:
                    if (deletedDocs.Count() > MaxDoc())
                    {
                        throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
                    }
                }

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq");
                proxStream = cfsDir.OpenInput(segment + ".prx");
                OpenNorms(cfsDir);

                if (fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }
		private void  Initialize(SegmentInfo si)
		{
			segment = si.name;
			
			// Use compound file directory for some files, if it exists
			Directory cfsDir = Directory();
			if (Directory().FileExists(segment + ".cfs"))
			{
				cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
				cfsDir = cfsReader;
			}
			
			// No compound file exists - use the multi-file format
			fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
			fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
			
			tis = new TermInfosReader(cfsDir, segment, fieldInfos);
			
			// NOTE: the bitvector is stored using the regular directory, not cfs
			if (HasDeletions(si))
				deletedDocs = new BitVector(Directory(), segment + ".del");
			
			// make sure that all index files have been read or are kept open
			// so that if an index update removes them we'll still have them
			freqStream = cfsDir.OpenInput(segment + ".frq");
			proxStream = cfsDir.OpenInput(segment + ".prx");
			OpenNorms(cfsDir);
			
			if (fieldInfos.HasVectors())
			{
				// open term vector files only as needed
				termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
			}
		}
        private void  Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores)
        {
            segment             = si.name;
            this.si             = si;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
                    cfsDir    = cfsReader;
                }

                Directory storeDir;

                if (doOpenStores)
                {
                    if (si.GetDocStoreOffset() != -1)
                    {
                        if (si.GetDocStoreIsCompoundFile())
                        {
                            storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
                            storeDir       = storeCFSReader;
                        }
                        else
                        {
                            storeDir = Directory();
                        }
                    }
                    else
                    {
                        storeDir = cfsDir;
                    }
                }
                else
                {
                    storeDir = null;
                }

                fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");

                bool anyProx   = false;
                int  numFields = fieldInfos.Size();
                for (int i = 0; !anyProx && i < numFields; i++)
                {
                    if (!fieldInfos.FieldInfo(i).omitTf)
                    {
                        anyProx = true;
                    }
                }

                System.String fieldsSegment;

                if (si.GetDocStoreOffset() != -1)
                {
                    fieldsSegment = si.GetDocStoreSegment();
                }
                else
                {
                    fieldsSegment = segment;
                }

                if (doOpenStores)
                {
                    fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);

                    // Verify two sources of "maxDoc" agree:
                    if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                    }
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);

                LoadDeletedDocs();

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize);
                if (anyProx)
                {
                    proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize);
                }
                OpenNorms(cfsDir, readBufferSize);

                if (doOpenStores && fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    System.String vectorsSegment;
                    if (si.GetDocStoreOffset() != -1)
                    {
                        vectorsSegment = si.GetDocStoreSegment();
                    }
                    else
                    {
                        vectorsSegment = segment;
                    }
                    termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }
Exemple #24
0
		private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc();
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int docNum = 0; docNum < maxDoc; )
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						++docNum;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = docNum, numDocs = 0;
					do 
					{
						docNum++;
						numDocs++;
						if (docNum >= maxDoc)
							break;
						if (reader.IsDeleted(docNum))
						{
							docNum++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						continue;
					}
					
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
        public virtual void  TestMapper()
        {
            TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);

            Assert.IsTrue(reader != null);
            SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());

            reader.Get(0, mapper);
            var set_Renamed = mapper.TermVectorEntrySet;

            Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
            //three fields, 4 terms, all terms are the same
            Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
            //Check offsets and positions
            for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();)
            {
                TermVectorEntry tve = (TermVectorEntry)iterator.Current;
                Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
            }

            mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
            reader.Get(1, mapper);
            set_Renamed = mapper.TermVectorEntrySet;
            Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be");
            //three fields, 4 terms, all terms are the same
            Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4);
            //Should have offsets and positions b/c we are munging all the fields together
            for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();)
            {
                TermVectorEntry tve = (TermVectorEntry)iterator.Current;
                Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
            }


            FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());

            reader.Get(0, fsMapper);
            var map = fsMapper.FieldToTerms;

            Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            for (var iterator = map.GetEnumerator(); iterator.MoveNext();)
            {
                var entry     = iterator.Current;
                var sortedSet = entry.Value;
                Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();)
                {
                    TermVectorEntry tve = inner.Current;
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    //Check offsets and positions.
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    System.String field = tve.Field;
                    if (field.Equals(testFields[0]))
                    {
                        //should have offsets

                        Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be");
                    }
                    else if (field.Equals(testFields[1]))
                    {
                        //should not have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
                    }
                }
            }
            //Try mapper that ignores offs and positions
            fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator());
            reader.Get(0, fsMapper);
            map = fsMapper.FieldToTerms;
            Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length);
            for (var iterator = map.GetEnumerator(); iterator.MoveNext();)
            {
                var entry     = iterator.Current;
                var sortedSet = entry.Value;
                Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4);
                for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();)
                {
                    TermVectorEntry tve = inner.Current;
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    //Check offsets and positions.
                    Assert.IsTrue(tve != null, "tve is null and it shouldn't be");
                    System.String field = tve.Field;
                    if (field.Equals(testFields[0]))
                    {
                        //should have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be");
                    }
                    else if (field.Equals(testFields[1]))
                    {
                        //should not have offsets

                        Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be");
                        Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be");
                    }
                }
            }

            // test setDocumentNumber()
            IndexReader       ir = IndexReader.Open(dir, true);
            DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();

            Assert.AreEqual(-1, docNumAwareMapper.GetDocumentNumber());

            ir.GetTermFreqVector(0, docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(1, docNumAwareMapper);
            Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(1, "f2", docNumAwareMapper);
            Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber());
            docNumAwareMapper.SetDocumentNumber(-1);

            ir.GetTermFreqVector(0, "f1", docNumAwareMapper);
            Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber());

            ir.Close();
        }
 /// <summary> Do a bulk copy of numDocs documents from reader to our
 /// streams.  This is used to expedite merging, if the
 /// field numbers are congruent.
 /// </summary>
 internal void AddRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
 {
     long tvdPosition = tvd.GetFilePointer();
     long tvfPosition = tvf.GetFilePointer();
     long tvdStart = tvdPosition;
     long tvfStart = tvfPosition;
     for (int i = 0; i < numDocs; i++)
     {
         tvx.WriteLong(tvdPosition);
         tvdPosition += tvdLengths[i];
         tvx.WriteLong(tvfPosition);
         tvfPosition += tvfLengths[i];
     }
     tvd.CopyBytes(reader.GetTvdStream(), tvdPosition - tvdStart);
     tvf.CopyBytes(reader.GetTvfStream(), tvfPosition - tvfStart);
     System.Diagnostics.Debug.Assert(tvd.GetFilePointer() == tvdPosition);
     System.Diagnostics.Debug.Assert(tvf.GetFilePointer() == tvfPosition);
 }
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    Debug.Assert(normsProducer != null);
                }
                else
                {
                    normsProducer = null;
                }

                // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException,
                // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could
                // this be needed because we are using unchecked??

#if !NETSTANDARD
                try
                {
#endif
                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);
#if !NETSTANDARD
            }
#pragma warning disable 168
            catch (System.AccessViolationException ave)
#pragma warning restore 168
            {
            }
#endif

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
		public virtual void  TestOffsetReader()
		{
			TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
			Assert.IsTrue(reader != null);
			TermPositionVector vector = (TermPositionVector) reader.Get(0, testFields[0]);
			Assert.IsTrue(vector != null);
			System.String[] terms = vector.GetTerms();
			Assert.IsTrue(terms != null);
			Assert.IsTrue(terms.Length == testTerms.Length);
			for (int i = 0; i < terms.Length; i++)
			{
				System.String term = terms[i];
				//System.out.println("Term: " + term);
				Assert.IsTrue(term.Equals(testTerms[i]));
				int[] positions = vector.GetTermPositions(i);
				Assert.IsTrue(positions != null);
				Assert.IsTrue(positions.Length == this.positions[i].Length);
				for (int j = 0; j < positions.Length; j++)
				{
					int position = positions[j];
					Assert.IsTrue(position == this.positions[i][j]);
				}
				TermVectorOffsetInfo[] offset = vector.GetOffsets(i);
				Assert.IsTrue(offset != null);
				Assert.IsTrue(offset.Length == this.offsets[i].Length);
				for (int j = 0; j < offset.Length; j++)
				{
					TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
					Assert.IsTrue(termVectorOffsetInfo.Equals(offsets[i][j]));
				}
			}
		}
		private void  CheckTermVector(TermVectorsReader reader, int docNum, System.String field)
		{
			TermFreqVector vector = reader.Get(docNum, field);
			Assert.IsTrue(vector != null);
			System.String[] terms = vector.GetTerms();
			Assert.IsTrue(terms != null);
			Assert.IsTrue(terms.Length == testTerms.Length);
			for (int i = 0; i < terms.Length; i++)
			{
				System.String term = terms[i];
				Assert.IsTrue(term.Equals(testTerms[i]));
			}
		}
		public virtual void  TestBadParams()
		{
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				//Bad document number, good field number
				reader.Get(50, testFields[0]);
				Assert.Fail();
			}
			catch (System.IO.IOException e)
			{
				// expected exception
			}
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				//Bad document number, no field
				reader.Get(50);
				Assert.Fail();
			}
			catch (System.IO.IOException e)
			{
				// expected exception
			}
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				Assert.IsTrue(reader != null);
				//good document number, bad field number
				TermFreqVector vector = reader.Get(0, "f50");
				Assert.IsTrue(vector == null);
			}
			catch (System.IO.IOException e)
			{
				Assert.Fail();
			}
		}
		public virtual void  TestMultipleDocuments()
		{
			
			try
			{
				TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
				Assert.IsTrue(writer != null);
				for (int i = 0; i < 10; i++)
				{
					WriteDocument(writer, testFields.Length);
				}
				writer.Close();
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
			//Do some arbitrary tests
			try
			{
				TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
				for (int i = 0; i < 10; i++)
				{
					Assert.IsTrue(reader != null);
					CheckTermVector(reader, 5, testFields[0]);
					CheckTermVector(reader, 2, testFields[2]);
				}
			}
			catch (System.IO.IOException e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
				Assert.IsTrue(false);
			}
		}
Exemple #32
0
		private void  Initialize(SegmentInfo si)
		{
			segment = si.name;
			this.si = si;
			
			bool success = false;
			
			try
			{
				// Use compound file directory for some files, if it exists
				Directory cfsDir = Directory();
				if (si.GetUseCompoundFile())
				{
					cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
					cfsDir = cfsReader;
				}
				
				// No compound file exists - use the multi-file format
				fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
				fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
				
				// Verify two sources of "maxDoc" agree:
				if (fieldsReader.Size() != si.docCount)
				{
					throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
				}
				
				tis = new TermInfosReader(cfsDir, segment, fieldInfos);
				
				// NOTE: the bitvector is stored using the regular directory, not cfs
				if (HasDeletions(si))
				{
					deletedDocs = new BitVector(Directory(), si.GetDelFileName());
					
					// Verify # deletes does not exceed maxDoc for this segment:
					if (deletedDocs.Count() > MaxDoc())
					{
						throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
					}
				}
				
				// make sure that all index files have been read or are kept open
				// so that if an index update removes them we'll still have them
				freqStream = cfsDir.OpenInput(segment + ".frq");
				proxStream = cfsDir.OpenInput(segment + ".prx");
				OpenNorms(cfsDir);
				
				if (fieldInfos.HasVectors())
				{
					// open term vector files only as needed
					termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
				}
				success = true;
			}
			finally
			{
				
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above.  In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					DoClose();
				}
			}
		}
        private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores)
        {
            segment = si.name;
            this.si = si;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
                    cfsDir = cfsReader;
                }

                Directory storeDir;

                if (doOpenStores)
                {
                    if (si.GetDocStoreOffset() != - 1)
                    {
                        if (si.GetDocStoreIsCompoundFile())
                        {
                            storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
                            storeDir = storeCFSReader;
                        }
                        else
                        {
                            storeDir = Directory();
                        }
                    }
                    else
                    {
                        storeDir = cfsDir;
                    }
                }
                else
                    storeDir = null;

                fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");

                bool anyProx = false;
                int numFields = fieldInfos.Size();
                for (int i = 0; !anyProx && i < numFields; i++)
                    if (!fieldInfos.FieldInfo(i).omitTf)
                        anyProx = true;

                System.String fieldsSegment;

                if (si.GetDocStoreOffset() != - 1)
                    fieldsSegment = si.GetDocStoreSegment();
                else
                    fieldsSegment = segment;

                if (doOpenStores)
                {
                    fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);

                    // Verify two sources of "maxDoc" agree:
                    if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                    }
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);

                LoadDeletedDocs();

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize);
                if (anyProx)
                    proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize);
                OpenNorms(cfsDir, readBufferSize);

                if (doOpenStores && fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    System.String vectorsSegment;
                    if (si.GetDocStoreOffset() != - 1)
                        vectorsSegment = si.GetDocStoreSegment();
                    else
                        vectorsSegment = segment;
                    termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                }
                success = true;
            }
            finally
            {

                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }