Beispiel #1
0
        public virtual void TestRandomPostings()
        {
            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData[] fields = new FieldData[NUM_FIELDS];
            for (int i = 0; i < NUM_FIELDS; i++)
            {
                bool omitTF        = 0 == (i % 3);
                bool storePayloads = 1 == (i % 3);
                fields[i] = new FieldData(this, fieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads);
            }

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                FieldInfos fieldInfos = builder.Finish();

                if (Verbose)
                {
                    Console.WriteLine("TEST: now write postings");
                }

                this.Write(fieldInfos, dir, fields, false);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                if (Verbose)
                {
                    Console.WriteLine("TEST: now read postings");
                }

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer terms = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    Verify[] threads = new Verify[NUM_TEST_THREADS - 1];
                    for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
                    {
                        threads[i] = new Verify(this, si, fields, terms);
                        threads[i].IsBackground = (true);
                        threads[i].Start();
                    }

                    (new Verify(this, si, fields, terms)).Run();

                    for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
                    {
                        threads[i].Join();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(!threads[i].failed);
                        }
                    }
                }
            }
        }
            public BloomFilteredFieldsProducer(BloomFilteringPostingsFormat outerInstance, SegmentReadState state)
            {
                this.outerInstance = outerInstance;
                var bloomFileName = IndexFileNames.SegmentFileName(
                    state.SegmentInfo.Name, state.SegmentSuffix, BLOOM_EXTENSION);
                ChecksumIndexInput bloomIn = null;
                var success = false;

                try
                {
                    bloomIn = state.Directory.OpenChecksumInput(bloomFileName, state.Context);
                    var version = CodecUtil.CheckHeader(bloomIn, /*BLOOM_CODEC_NAME*/ outerInstance.Name, VERSION_START, VERSION_CURRENT);
                    // Load the hash function used in the BloomFilter
                    // hashFunction = HashFunction.forName(bloomIn.readString());
                    // Load the delegate postings format
                    var delegatePostingsFormat = ForName(bloomIn.ReadString());

                    _delegateFieldsProducer = delegatePostingsFormat
                                              .FieldsProducer(state);
                    var numBlooms = bloomIn.ReadInt32();
                    for (var i = 0; i < numBlooms; i++)
                    {
                        var fieldNum  = bloomIn.ReadInt32();
                        var bloom     = FuzzySet.Deserialize(bloomIn);
                        var fieldInfo = state.FieldInfos.FieldInfo(fieldNum);
                        _bloomsByFieldName.Add(fieldInfo.Name, bloom);
                    }

                    if (version >= VERSION_CHECKSUM)
                    {
                        CodecUtil.CheckFooter(bloomIn);
                    }
                    else
                    {
#pragma warning disable 612, 618
                        CodecUtil.CheckEOF(bloomIn);
#pragma warning restore 612, 618
                    }

                    IOUtils.Dispose(bloomIn);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        IOUtils.DisposeWhileHandlingException(bloomIn, _delegateFieldsProducer);
                    }
                }
            }
Beispiel #3
0
        public virtual void TestRandomPostings()
        {
            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData[] fields = new FieldData[NUM_FIELDS];
            for (int i = 0; i < NUM_FIELDS; i++)
            {
                bool omitTF        = 0 == (i % 3);
                bool storePayloads = 1 == (i % 3);
                fields[i] = new FieldData(this, FieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads);
            }

            Directory  dir        = NewDirectory();
            FieldInfos fieldInfos = builder.Finish();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now write postings");
            }

            this.Write(fieldInfos, dir, fields, false);
            Codec       codec = Codec.Default;
            SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now read postings");
            }
            FieldsProducer terms = codec.PostingsFormat().FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));

            Verify[] threads = new Verify[NUM_TEST_THREADS - 1];
            for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
            {
                threads[i] = new Verify(this, si, fields, terms);
                threads[i].SetDaemon(true);
                threads[i].Start();
            }

            (new Verify(this, si, fields, terms)).Run();

            for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
            {
                threads[i].Join();
                Debug.Assert(!threads[i].Failed);
            }

            terms.Dispose();
            dir.Dispose();
        }
Beispiel #4
0
        public virtual void TestFixedPostings()
        {
            const int NUM_TERMS = 100;

            TermData[] terms = new TermData[NUM_TERMS];
            for (int i = 0; i < NUM_TERMS; i++)
            {
                int[]  docs = new int[] { i };
                string text = Convert.ToString(i);
                terms[i] = new TermData(this, text, docs, null);
            }

            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData field = new FieldData(this, "field", builder, terms, true, false);

            FieldData[] fields     = new FieldData[] { field };
            FieldInfos  fieldInfos = builder.Finish();

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                this.Write(fieldInfos, dir, fields, true);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    IEnumerator <string> fieldsEnum = reader.GetEnumerator();
                    fieldsEnum.MoveNext();
                    string fieldName = fieldsEnum.Current;
                    Assert.IsNotNull(fieldName);
                    Terms terms2 = reader.GetTerms(fieldName);
                    Assert.IsNotNull(terms2);

                    TermsEnum termsEnum = terms2.GetIterator(null);

                    DocsEnum docsEnum = null;
                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        BytesRef term = termsEnum.Next();
                        Assert.IsNotNull(term);
                        Assert.AreEqual(terms[i].text2, term.Utf8ToString());

                        // do this twice to stress test the codec's reuse, ie,
                        // make sure it properly fully resets (rewinds) its
                        // internal state:
                        for (int iter = 0; iter < 2; iter++)
                        {
                            docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE);
                            Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc());
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                        }
                    }
                    Assert.IsNull(termsEnum.Next());

                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND);
                    }

                    Assert.IsFalse(fieldsEnum.MoveNext());
                }
            }
        }
Beispiel #5
0
 internal AssertingFieldsProducer(FieldsProducer @in)
 {
     this.@in = @in;
 }
Beispiel #6
0
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    Debug.Assert(normsProducer != null);
                }
                else
                {
                    normsProducer = null;
                }

                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
 internal AssertingFieldsProducer(FieldsProducer @in)
 {
     this.@in = @in;
 }
            public BloomFilteredFieldsProducer(SegmentReadState state)
            {

                var bloomFileName = IndexFileNames.SegmentFileName(
                    state.SegmentInfo.Name, state.SegmentSuffix, BLOOM_EXTENSION);
                ChecksumIndexInput bloomIn = null;
                var success = false;
                try
                {
                    bloomIn = state.Directory.OpenChecksumInput(bloomFileName, state.Context);
                    var version = CodecUtil.CheckHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT);
                    // Load the hash function used in the BloomFilter
                    // hashFunction = HashFunction.forName(bloomIn.readString());
                    // Load the delegate postings format
                    var delegatePostingsFormat = ForName(bloomIn.ReadString());

                    _delegateFieldsProducer = delegatePostingsFormat
                        .FieldsProducer(state);
                    var numBlooms = bloomIn.ReadInt();
                    for (var i = 0; i < numBlooms; i++)
                    {
                        var fieldNum = bloomIn.ReadInt();
                        var bloom = FuzzySet.Deserialize(bloomIn);
                        var fieldInfo = state.FieldInfos.FieldInfo(fieldNum);
                        _bloomsByFieldName.Add(fieldInfo.Name, bloom);
                    }
                    
                    if (version >= VERSION_CHECKSUM)
                    {
                        CodecUtil.CheckFooter(bloomIn);
                    }
                    else
                    {
                        CodecUtil.CheckEOF(bloomIn);
                    }
                    
                    IOUtils.Close(bloomIn);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        IOUtils.CloseWhileHandlingException(bloomIn, _delegateFieldsProducer);
                    }
                }
            }
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    Debug.Assert(normsProducer != null);
                }
                else
                {
                    normsProducer = null;
                }

                // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException,
                // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could
                // this be needed because we are using unchecked??

#if !NETSTANDARD
                try
                {
#endif
                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);
#if !NETSTANDARD
            }
#pragma warning disable 168
            catch (System.AccessViolationException ave)
#pragma warning restore 168
            {
            }
#endif

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }