public virtual void TestRandomPostings() { FieldInfos.Builder builder = new FieldInfos.Builder(); FieldData[] fields = new FieldData[NUM_FIELDS]; for (int i = 0; i < NUM_FIELDS; i++) { bool omitTF = 0 == (i % 3); bool storePayloads = 1 == (i % 3); fields[i] = new FieldData(this, fieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads); } // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (Directory dir = NewDirectory()) { FieldInfos fieldInfos = builder.Finish(); if (Verbose) { Console.WriteLine("TEST: now write postings"); } this.Write(fieldInfos, dir, fields, false); Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); if (Verbose) { Console.WriteLine("TEST: now read postings"); } // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (FieldsProducer terms = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR))) { Verify[] threads = new Verify[NUM_TEST_THREADS - 1]; for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { threads[i] = new Verify(this, si, fields, terms); threads[i].IsBackground = (true); threads[i].Start(); } (new Verify(this, si, fields, terms)).Run(); for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { threads[i].Join(); if (Debugging.AssertsEnabled) { Debugging.Assert(!threads[i].failed); } } } } }
public BloomFilteredFieldsProducer(BloomFilteringPostingsFormat outerInstance, SegmentReadState state) { this.outerInstance = outerInstance; var bloomFileName = IndexFileNames.SegmentFileName( state.SegmentInfo.Name, state.SegmentSuffix, BLOOM_EXTENSION); ChecksumIndexInput bloomIn = null; var success = false; try { bloomIn = state.Directory.OpenChecksumInput(bloomFileName, state.Context); var version = CodecUtil.CheckHeader(bloomIn, /*BLOOM_CODEC_NAME*/ outerInstance.Name, VERSION_START, VERSION_CURRENT); // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format var delegatePostingsFormat = ForName(bloomIn.ReadString()); _delegateFieldsProducer = delegatePostingsFormat .FieldsProducer(state); var numBlooms = bloomIn.ReadInt32(); for (var i = 0; i < numBlooms; i++) { var fieldNum = bloomIn.ReadInt32(); var bloom = FuzzySet.Deserialize(bloomIn); var fieldInfo = state.FieldInfos.FieldInfo(fieldNum); _bloomsByFieldName.Add(fieldInfo.Name, bloom); } if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(bloomIn); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(bloomIn); #pragma warning restore 612, 618 } IOUtils.Dispose(bloomIn); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(bloomIn, _delegateFieldsProducer); } } }
public virtual void TestRandomPostings() { FieldInfos.Builder builder = new FieldInfos.Builder(); FieldData[] fields = new FieldData[NUM_FIELDS]; for (int i = 0; i < NUM_FIELDS; i++) { bool omitTF = 0 == (i % 3); bool storePayloads = 1 == (i % 3); fields[i] = new FieldData(this, FieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads); } Directory dir = NewDirectory(); FieldInfos fieldInfos = builder.Finish(); if (VERBOSE) { Console.WriteLine("TEST: now write postings"); } this.Write(fieldInfos, dir, fields, false); Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); if (VERBOSE) { Console.WriteLine("TEST: now read postings"); } FieldsProducer terms = codec.PostingsFormat().FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)); Verify[] threads = new Verify[NUM_TEST_THREADS - 1]; for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { threads[i] = new Verify(this, si, fields, terms); threads[i].SetDaemon(true); threads[i].Start(); } (new Verify(this, si, fields, terms)).Run(); for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { threads[i].Join(); Debug.Assert(!threads[i].Failed); } terms.Dispose(); dir.Dispose(); }
public virtual void TestFixedPostings() { const int NUM_TERMS = 100; TermData[] terms = new TermData[NUM_TERMS]; for (int i = 0; i < NUM_TERMS; i++) { int[] docs = new int[] { i }; string text = Convert.ToString(i); terms[i] = new TermData(this, text, docs, null); } FieldInfos.Builder builder = new FieldInfos.Builder(); FieldData field = new FieldData(this, "field", builder, terms, true, false); FieldData[] fields = new FieldData[] { field }; FieldInfos fieldInfos = builder.Finish(); // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (Directory dir = NewDirectory()) { this.Write(fieldInfos, dir, fields, true); Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR))) { IEnumerator <string> fieldsEnum = reader.GetEnumerator(); fieldsEnum.MoveNext(); string fieldName = fieldsEnum.Current; Assert.IsNotNull(fieldName); Terms terms2 = reader.GetTerms(fieldName); Assert.IsNotNull(terms2); TermsEnum termsEnum = terms2.GetIterator(null); DocsEnum docsEnum = null; for (int i = 0; i < NUM_TERMS; i++) { BytesRef term = termsEnum.Next(); Assert.IsNotNull(term); Assert.AreEqual(terms[i].text2, term.Utf8ToString()); // do this twice to stress test the codec's reuse, ie, // make sure it properly fully resets (rewinds) its // internal state: for (int iter = 0; iter < 2; iter++) { docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE); Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } } Assert.IsNull(termsEnum.Next()); for (int i = 0; i < NUM_TERMS; i++) { Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND); } Assert.IsFalse(fieldsEnum.MoveNext()); } } }
internal AssertingFieldsProducer(FieldsProducer @in) { this.@in = @in; }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
public BloomFilteredFieldsProducer(SegmentReadState state) { var bloomFileName = IndexFileNames.SegmentFileName( state.SegmentInfo.Name, state.SegmentSuffix, BLOOM_EXTENSION); ChecksumIndexInput bloomIn = null; var success = false; try { bloomIn = state.Directory.OpenChecksumInput(bloomFileName, state.Context); var version = CodecUtil.CheckHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT); // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format var delegatePostingsFormat = ForName(bloomIn.ReadString()); _delegateFieldsProducer = delegatePostingsFormat .FieldsProducer(state); var numBlooms = bloomIn.ReadInt(); for (var i = 0; i < numBlooms; i++) { var fieldNum = bloomIn.ReadInt(); var bloom = FuzzySet.Deserialize(bloomIn); var fieldInfo = state.FieldInfos.FieldInfo(fieldNum); _bloomsByFieldName.Add(fieldInfo.Name, bloom); } if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(bloomIn); } else { CodecUtil.CheckEOF(bloomIn); } IOUtils.Close(bloomIn); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(bloomIn, _delegateFieldsProducer); } } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }