Example #1
0
            internal virtual void Reset(FieldInfo fieldInfo)
            {
                //System.out.println("pff.reset te=" + termEnum);
                this.fieldInfo = fieldInfo;

                internedFieldName = fieldInfo.Name.Intern();

                Term term = new Term(internedFieldName);

                if (termEnum == null)
                {
                    termEnum     = outerInstance.TermsDict.Terms(term);
                    seekTermEnum = outerInstance.TermsDict.Terms(term);
                    //System.out.println("  term=" + termEnum.term());
                }
                else
                {
                    outerInstance.TermsDict.SeekEnum(termEnum, term, true);
                }
                skipNext = true;

                unicodeSortOrder = outerInstance.SortTermsByUnicode;

                Term t = termEnum.Term();

                if (t != null && t.Field == internedFieldName)
                {
                    newSuffixStart  = 0;
                    prevTerm.Length = 0;
                    SurrogateDance();
                }
            }
Example #2
0
        internal virtual void SeekEnum(SegmentTermEnum enumerator, int indexOffset)
        {
            PagedBytesDataInput input = (PagedBytesDataInput)dataInput.Clone();

            input.SetPosition(indexToDataOffset.Get(indexOffset));

            // read the term
            int  fieldId = input.ReadVInt32();
            Term field   = fields[fieldId];
            Term term    = new Term(field.Field, input.ReadString());

            // read the terminfo
            var termInfo = new TermInfo();

            termInfo.DocFreq = input.ReadVInt32();
            if (termInfo.DocFreq >= skipInterval)
            {
                termInfo.SkipOffset = input.ReadVInt32();
            }
            else
            {
                termInfo.SkipOffset = 0;
            }
            termInfo.FreqPointer = input.ReadVInt64();
            termInfo.ProxPointer = input.ReadVInt64();

            long pointer = input.ReadVInt64();

            // perform the seek
            enumerator.Seek(pointer, ((long)indexOffset * totalIndexInterval) - 1, term, termInfo);
        }
Example #3
0
            internal virtual void Reset(FieldInfo fieldInfo)
            {
                //System.out.println("pff.reset te=" + termEnum);
                this.fieldInfo    = fieldInfo;
                InternedFieldName = String.Intern(fieldInfo.Name);
                Term term = new Term(InternedFieldName);

                if (TermEnum == null)
                {
                    TermEnum     = OuterInstance.TermsDict.Terms(term);
                    SeekTermEnum = OuterInstance.TermsDict.Terms(term);
                    //System.out.println("  term=" + termEnum.term());
                }
                else
                {
                    OuterInstance.TermsDict.SeekEnum(TermEnum, term, true);
                }
                SkipNext = true;

                UnicodeSortOrder = OuterInstance.SortTermsByUnicode();

                Term t = TermEnum.Term();

                if (t != null && t.Field() == InternedFieldName)
                {
                    NewSuffixStart  = 0;
                    PrevTerm.Length = 0;
                    SurrogateDance();
                }
            }
Example #4
0
 public DocsAndPositionsEnum Reset(SegmentTermEnum termEnum, IBits liveDocs)
 {
     pos.LiveDocs = liveDocs;
     pos.Seek(termEnum);
     docID = -1;
     return(this);
 }
Example #5
0
 public DocsAndPositionsEnum Reset(SegmentTermEnum termEnum, Bits liveDocs)
 {
     Pos.LiveDocs = liveDocs;
     Pos.Seek(termEnum);
     DocID_Renamed = -1;
     return(this);
 }
Example #6
0
        /// <summary>
        /// Returns the position of a <see cref="Term"/> in the set or -1. </summary>
        internal long GetPosition(Term term)
        {
            if (size == 0)
            {
                return(-1);
            }

            EnsureIndexIsRead();
            int indexOffset = index.GetIndexOffset(term);

            SegmentTermEnum enumerator = GetThreadResources().termEnum;

            index.SeekEnum(enumerator, indexOffset);

            while (CompareAsUTF16(term, enumerator.Term()) > 0 && enumerator.Next())
            {
            }

            if (CompareAsUTF16(term, enumerator.Term()) == 0)
            {
                return(enumerator.position);
            }
            else
            {
                return(-1);
            }
        }
        /// <summary>
        /// Loads the segment information at segment load time.
        /// </summary>
        /// <param name="indexEnum">
        ///          The term enum. </param>
        /// <param name="indexDivisor">
        ///          The index divisor. </param>
        /// <param name="tiiFileLength">
        ///          The size of the tii file, used to approximate the size of the
        ///          buffer. </param>
        /// <param name="totalIndexInterval">
        ///          The total index interval. </param>
        public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval)
        {
            this.totalIndexInterval = totalIndexInterval;
            indexSize    = 1 + ((int)indexEnum.size - 1) / indexDivisor;
            skipInterval = indexEnum.skipInterval;
            // this is only an inital size, it will be GCed once the build is complete
            long                 initialSize    = (long)(tiiFileLength * 1.5) / indexDivisor;
            PagedBytes           dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize));
            PagedBytesDataOutput dataOutput     = dataPagedBytes.GetDataOutput();

            int            bitEstimate  = 1 + MathUtil.Log(tiiFileLength, 2);
            GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInt32s.DEFAULT);

            string         currentField = null;
            IList <string> fieldStrs    = new List <string>();
            int            fieldCounter = -1;

            for (int i = 0; indexEnum.Next(); i++)
            {
                Term term = indexEnum.Term();
                if (currentField == null || !currentField.Equals(term.Field, StringComparison.Ordinal))
                {
                    currentField = term.Field;
                    fieldStrs.Add(currentField);
                    fieldCounter++;
                }
                TermInfo termInfo = indexEnum.TermInfo();
                indexToTerms.Set(i, dataOutput.GetPosition());
                dataOutput.WriteVInt32(fieldCounter);
                dataOutput.WriteString(term.Text());
                dataOutput.WriteVInt32(termInfo.DocFreq);
                if (termInfo.DocFreq >= skipInterval)
                {
                    dataOutput.WriteVInt32(termInfo.SkipOffset);
                }
                dataOutput.WriteVInt64(termInfo.FreqPointer);
                dataOutput.WriteVInt64(termInfo.ProxPointer);
                dataOutput.WriteVInt64(indexEnum.indexPointer);
                for (int j = 1; j < indexDivisor; j++)
                {
                    if (!indexEnum.Next())
                    {
                        break;
                    }
                }
            }

            fields = new Term[fieldStrs.Count];
            for (int i = 0; i < fields.Length; i++)
            {
                fields[i] = new Term(fieldStrs[i]);
            }

            dataPagedBytes.Freeze(true);
            dataInput         = dataPagedBytes.GetDataInput();
            indexToDataOffset = indexToTerms.Mutable;

            ramBytesUsed = fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + indexToDataOffset.RamBytesUsed();
        }
Example #8
0
 public PreDocsEnum Reset(SegmentTermEnum termEnum, IBits liveDocs)
 {
     docs.LiveDocs = liveDocs;
     docs.Seek(termEnum);
     docs.freq = 1;
     docID     = -1;
     return(this);
 }
Example #9
0
 public PreDocsEnum Reset(SegmentTermEnum termEnum, Bits liveDocs)
 {
     Docs.LiveDocs = liveDocs;
     Docs.Seek(termEnum);
     Docs.Freq_Renamed = 1;
     DocID_Renamed     = -1;
     return(this);
 }
        /// <summary>
        /// Loads the segment information at segment load time.
        /// </summary>
        /// <param name="indexEnum">
        ///          the term enum. </param>
        /// <param name="indexDivisor">
        ///          the index divisor. </param>
        /// <param name="tiiFileLength">
        ///          the size of the tii file, used to approximate the size of the
        ///          buffer. </param>
        /// <param name="totalIndexInterval">
        ///          the total index interval. </param>
        public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval)
        {
            this.TotalIndexInterval = totalIndexInterval;
            IndexSize = 1 + ((int)indexEnum.Size - 1) / indexDivisor;
            SkipInterval = indexEnum.SkipInterval;
            // this is only an inital size, it will be GCed once the build is complete
            long initialSize = (long)(tiiFileLength * 1.5) / indexDivisor;
            PagedBytes dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize));
            PagedBytesDataOutput dataOutput = dataPagedBytes.DataOutput;

            int bitEstimate = 1 + MathUtil.Log(tiiFileLength, 2);
            GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, IndexSize, PackedInts.DEFAULT);

            string currentField = null;
            IList<string> fieldStrs = new List<string>();
            int fieldCounter = -1;
            for (int i = 0; indexEnum.Next(); i++)
            {
                Term term = indexEnum.Term();
                if (currentField == null || !currentField.Equals(term.Field()))
                {
                    currentField = term.Field();
                    fieldStrs.Add(currentField);
                    fieldCounter++;
                }
                TermInfo termInfo = indexEnum.TermInfo();
                indexToTerms.Set(i, dataOutput.Position);
                dataOutput.WriteVInt(fieldCounter);
                dataOutput.WriteString(term.Text());
                dataOutput.WriteVInt(termInfo.DocFreq);
                if (termInfo.DocFreq >= SkipInterval)
                {
                    dataOutput.WriteVInt(termInfo.SkipOffset);
                }
                dataOutput.WriteVLong(termInfo.FreqPointer);
                dataOutput.WriteVLong(termInfo.ProxPointer);
                dataOutput.WriteVLong(indexEnum.IndexPointer);
                for (int j = 1; j < indexDivisor; j++)
                {
                    if (!indexEnum.Next())
                    {
                        break;
                    }
                }
            }

            Fields = new Term[fieldStrs.Count];
            for (int i = 0; i < Fields.Length; i++)
            {
                Fields[i] = new Term(fieldStrs[i]);
            }

            dataPagedBytes.Freeze(true);
            DataInput = dataPagedBytes.DataInput;
            IndexToDataOffset = indexToTerms.Mutable;

            RamBytesUsed_Renamed = Fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + IndexToDataOffset.RamBytesUsed();
        }
Example #11
0
        internal TermInfosReader(Directory dir, string seg, FieldInfos fis, IOContext context, int indexDivisor)
        {
            bool success = false;

            if (indexDivisor < 1 && indexDivisor != -1)
            {
                throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
            }

            try
            {
                directory  = dir;
                segment    = seg;
                fieldInfos = fis;

                origEnum = new SegmentTermEnum(directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), context), fieldInfos, false);
                size     = origEnum.size;

                if (indexDivisor != -1)
                {
                    // Load terms index
                    totalIndexInterval = origEnum.indexInterval * indexDivisor;

                    string          indexFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION);
                    SegmentTermEnum indexEnum     = new SegmentTermEnum(directory.OpenInput(indexFileName, context), fieldInfos, true);

                    try
                    {
                        index       = new TermInfosReaderIndex(indexEnum, indexDivisor, dir.FileLength(indexFileName), totalIndexInterval);
                        indexLength = index.Length;
                    }
                    finally
                    {
                        indexEnum.Dispose();
                    }
                }
                else
                {
                    // Do not load terms index:
                    totalIndexInterval = -1;
                    index       = null;
                    indexLength = -1;
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
Example #12
0
        internal TermInfosReader(Directory dir, string seg, FieldInfos fis, IOContext context, int indexDivisor)
        {
            bool success = false;

            if (indexDivisor < 1 && indexDivisor != -1)
            {
                throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
            }

            try
            {
                Directory = dir;
                Segment = seg;
                FieldInfos = fis;

                OrigEnum = new SegmentTermEnum(Directory.OpenInput(IndexFileNames.SegmentFileName(Segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), context), FieldInfos, false);
                Size_Renamed = OrigEnum.Size;

                if (indexDivisor != -1)
                {
                    // Load terms index
                    TotalIndexInterval = OrigEnum.IndexInterval * indexDivisor;

                    string indexFileName = IndexFileNames.SegmentFileName(Segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION);
                    SegmentTermEnum indexEnum = new SegmentTermEnum(Directory.OpenInput(indexFileName, context), FieldInfos, true);

                    try
                    {
                        Index = new TermInfosReaderIndex(indexEnum, indexDivisor, dir.FileLength(indexFileName), TotalIndexInterval);
                        IndexLength = Index.Length();
                    }
                    finally
                    {
                        indexEnum.Dispose();
                    }
                }
                else
                {
                    // Do not load terms index:
                    TotalIndexInterval = -1;
                    Index = null;
                    IndexLength = -1;
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
        public virtual void TestSeekEnum()
        {
            int             indexPosition = 3;
            SegmentTermEnum clone         = (SegmentTermEnum)TermEnum.Clone();
            Term            term          = FindTermThatWouldBeAtIndex(clone, indexPosition);
            SegmentTermEnum enumerator    = clone;

            Index.SeekEnum(enumerator, indexPosition);
            Assert.AreEqual(term, enumerator.Term());
            clone.Dispose();
        }
Example #14
0
 internal TermInfo SeekEnum(SegmentTermEnum enumerator, Term term, bool useCache)
 {
     if (useCache)
     {
         return(SeekEnum(enumerator, term, termsCache.Get(new CloneableTerm(DeepCopyOf(term))), useCache));
     }
     else
     {
         return(SeekEnum(enumerator, term, null, useCache));
     }
 }
 public static void AfterClass()
 {
     TermEnum.Dispose();
     Reader.Dispose();
     Directory.Dispose();
     TermEnum    = null;
     Reader      = null;
     Directory   = null;
     Index       = null;
     SampleTerms = null;
 }
Example #16
0
 public override void AfterClass()
 {
     TermEnum.Dispose();
     Reader.Dispose();
     Directory.Dispose();
     TermEnum    = null;
     Reader      = null;
     Directory   = null;
     Index       = null;
     SampleTerms = null;
     base.AfterClass();
 }
Example #17
0
 public override void AfterClass()
 {
     termEnum.Dispose();
     reader.Dispose();
     directory.Dispose();
     termEnum    = null;
     reader      = null;
     directory   = null;
     index       = null;
     sampleTerms = null;
     base.AfterClass();
 }
Example #18
0
        public object Clone()
        {
            // LUCENENET: MemberwiseClone() doesn't throw in .NET
            SegmentTermEnum clone = (SegmentTermEnum)base.MemberwiseClone();

            clone.input    = (IndexInput)input.Clone();
            clone.termInfo = new TermInfo(termInfo);

            clone.termBuffer = (TermBuffer)termBuffer.Clone();
            clone.prevBuffer = (TermBuffer)prevBuffer.Clone();
            clone.scanBuffer = new TermBuffer();

            return(clone);
        }
Example #19
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            // NOTE: turn off compound file, this test will open some index files directly.
            OldFormatImpersonationIsActive = true;
            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetUseCompoundFile(false);

            TermIndexInterval   = config.TermIndexInterval;
            IndexDivisor        = TestUtil.NextInt32(Random, 1, 10);
            NUMBER_OF_DOCUMENTS = AtLeast(100);
            NUMBER_OF_FIELDS    = AtLeast(Math.Max(10, 3 * TermIndexInterval * IndexDivisor / NUMBER_OF_DOCUMENTS));

            Directory = NewDirectory();

            config.SetCodec(new PreFlexRWCodec());
            LogMergePolicy mp = NewLogMergePolicy();

            // NOTE: turn off compound file, this test will open some index files directly.
            mp.NoCFSRatio = 0.0;
            config.SetMergePolicy(mp);

            Populate(Directory, config);

            DirectoryReader r0      = IndexReader.Open(Directory);
            SegmentReader   r       = LuceneTestCase.GetOnlySegmentReader(r0);
            string          segment = r.SegmentName;

            r.Dispose();

            FieldInfosReader infosReader     = (new PreFlexRWCodec()).FieldInfosFormat.FieldInfosReader;
            FieldInfos       fieldInfos      = infosReader.Read(Directory, segment, "", IOContext.READ_ONCE);
            string           segmentFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION);
            long             tiiFileLength   = Directory.FileLength(segmentFileName);
            IndexInput       input           = Directory.OpenInput(segmentFileName, NewIOContext(Random));

            TermEnum = new SegmentTermEnum(Directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), NewIOContext(Random)), fieldInfos, false);
            int totalIndexInterval = TermEnum.indexInterval * IndexDivisor;

            SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true);

            Index = new TermInfosReaderIndex(indexEnum, IndexDivisor, tiiFileLength, totalIndexInterval);
            indexEnum.Dispose();
            input.Dispose();

            Reader      = IndexReader.Open(Directory);
            SampleTerms = Sample(Random, Reader, 1000);
        }
        private Term FindTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index)
        {
            int termPosition = index * TermIndexInterval * IndexDivisor;

            for (int i = 0; i < termPosition; i++)
            {
                // TODO: this test just uses random terms, so this is always possible
                AssumeTrue("ran out of terms", termEnum.Next());
            }
            Term term = termEnum.Term();

            // An indexed term is only written when the term after
            // it exists, so, if the number of terms is 0 mod
            // termIndexInterval, the last index term will not be
            // written; so we require a term after this term
            // as well:
            AssumeTrue("ran out of terms", termEnum.Next());
            return(term);
        }
Example #21
0
        public virtual void Seek(SegmentTermEnum segmentTermEnum)
        {
            TermInfo ti;
            Term term;

            // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
            if (segmentTermEnum.FieldInfos == FieldInfos) // optimized case
            {
                term = segmentTermEnum.Term();
                ti = segmentTermEnum.TermInfo();
            } // punt case
            else
            {
                term = segmentTermEnum.Term();
                ti = Tis.Get(term);
            }

            Seek(ti, term);
        }
Example #22
0
        public virtual void Seek(SegmentTermEnum segmentTermEnum)
        {
            TermInfo ti;
            Term     term;

            // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
            if (segmentTermEnum.FieldInfos == FieldInfos) // optimized case
            {
                term = segmentTermEnum.Term();
                ti   = segmentTermEnum.TermInfo();
            } // punt case
            else
            {
                term = segmentTermEnum.Term();
                ti   = Tis.Get(term);
            }

            Seek(ti, term);
        }
Example #23
0
        public object Clone()
        {
            SegmentTermEnum clone = null;

            try
            {
                clone = (SegmentTermEnum)base.MemberwiseClone();
            }
            catch (InvalidOperationException e)
            {
            }

            clone.Input            = (IndexInput)Input.Clone();
            clone.TermInfo_Renamed = new TermInfo(TermInfo_Renamed);

            clone.TermBuffer = (TermBuffer)TermBuffer.Clone();
            clone.PrevBuffer = (TermBuffer)PrevBuffer.Clone();
            clone.ScanBuffer = new TermBuffer();

            return(clone);
        }
Example #24
0
 // called only from asserts
 private bool SameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator)
 {
     if (ti1.DocFreq != ti2.DocFreq)
     {
         return(false);
     }
     if (ti1.FreqPointer != ti2.FreqPointer)
     {
         return(false);
     }
     if (ti1.ProxPointer != ti2.ProxPointer)
     {
         return(false);
     }
     // skipOffset is only valid when docFreq >= skipInterval:
     if (ti1.DocFreq >= enumerator.skipInterval && ti1.SkipOffset != ti2.SkipOffset)
     {
         return(false);
     }
     return(true);
 }
Example #25
0
 // called only from asserts
 private static bool SameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) // LUCENENET: CA1822: Mark members as static
 {
     if (ti1.DocFreq != ti2.DocFreq)
     {
         return(false);
     }
     if (ti1.FreqPointer != ti2.FreqPointer)
     {
         return(false);
     }
     if (ti1.ProxPointer != ti2.ProxPointer)
     {
         return(false);
     }
     // skipOffset is only valid when docFreq >= skipInterval:
     if (ti1.DocFreq >= enumerator.skipInterval && ti1.SkipOffset != ti2.SkipOffset)
     {
         return(false);
     }
     return(true);
 }
Example #26
0
        public object Clone()
        {
            SegmentTermEnum clone = null;

            try
            {
                clone = (SegmentTermEnum)base.MemberwiseClone();
            }
#pragma warning disable 168
            catch (InvalidOperationException e)
#pragma warning restore 168
            {
            }

            clone.input    = (IndexInput)input.Clone();
            clone.termInfo = new TermInfo(termInfo);

            clone.termBuffer = (TermBuffer)termBuffer.Clone();
            clone.prevBuffer = (TermBuffer)prevBuffer.Clone();
            clone.scanBuffer = new TermBuffer();

            return(clone);
        }
Example #27
0
 public void CacheCurrentTerm(SegmentTermEnum enumerator)
 {
     termsCache.Put(new CloneableTerm(enumerator.Term()), new TermInfoAndOrd(enumerator.termInfo, enumerator.position));
 }
Example #28
0
            internal virtual void Reset(FieldInfo fieldInfo)
            {
                //System.out.println("pff.reset te=" + termEnum);
                this.fieldInfo = fieldInfo;
                InternedFieldName = String.Intern(fieldInfo.Name);
                Term term = new Term(InternedFieldName);
                if (TermEnum == null)
                {
                    TermEnum = OuterInstance.TermsDict.Terms(term);
                    SeekTermEnum = OuterInstance.TermsDict.Terms(term);
                    //System.out.println("  term=" + termEnum.term());
                }
                else
                {
                    OuterInstance.TermsDict.SeekEnum(TermEnum, term, true);
                }
                SkipNext = true;

                UnicodeSortOrder = OuterInstance.SortTermsByUnicode();

                Term t = TermEnum.Term();
                if (t != null && t.Field() == InternedFieldName)
                {
                    NewSuffixStart = 0;
                    PrevTerm.Length = 0;
                    SurrogateDance();
                }
            }
        public void BeforeClass()
        {
            // NOTE: turn off compound file, this test will open some index files directly.
            OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetUseCompoundFile(false);

            TermIndexInterval = config.TermIndexInterval;
            IndexDivisor = TestUtil.NextInt(Random(), 1, 10);
            NUMBER_OF_DOCUMENTS = AtLeast(100);
            NUMBER_OF_FIELDS = AtLeast(Math.Max(10, 3 * TermIndexInterval * IndexDivisor / NUMBER_OF_DOCUMENTS));

            Directory = NewDirectory();

            config.SetCodec(new PreFlexRWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE));
            LogMergePolicy mp = NewLogMergePolicy();
            // NOTE: turn off compound file, this test will open some index files directly.
            mp.NoCFSRatio = 0.0;
            config.SetMergePolicy(mp);

            Populate(Directory, config);

            DirectoryReader r0 = IndexReader.Open(Directory);
            SegmentReader r = LuceneTestCase.GetOnlySegmentReader(r0);
            string segment = r.SegmentName;
            r.Dispose();

            FieldInfosReader infosReader = (new PreFlexRWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE)).FieldInfosFormat().FieldInfosReader;
            FieldInfos fieldInfos = infosReader.Read(Directory, segment, "", IOContext.READONCE);
            string segmentFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION);
            long tiiFileLength = Directory.FileLength(segmentFileName);
            IndexInput input = Directory.OpenInput(segmentFileName, NewIOContext(Random()));
            TermEnum = new SegmentTermEnum(Directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), NewIOContext(Random())), fieldInfos, false);
            int totalIndexInterval = TermEnum.IndexInterval * IndexDivisor;

            SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true);
            Index = new TermInfosReaderIndex(indexEnum, IndexDivisor, tiiFileLength, totalIndexInterval);
            indexEnum.Dispose();
            input.Dispose();

            Reader = IndexReader.Open(Directory);
            SampleTerms = Sample(Random(), Reader, 1000);
        }
 private Term FindTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index)
 {
     int termPosition = index * TermIndexInterval * IndexDivisor;
     for (int i = 0; i < termPosition; i++)
     {
         // TODO: this test just uses random terms, so this is always possible
         AssumeTrue("ran out of terms", termEnum.Next());
     }
     Term term = termEnum.Term();
     // An indexed term is only written when the term after
     // it exists, so, if the number of terms is 0 mod
     // termIndexInterval, the last index term will not be
     // written; so we require a term after this term
     // as well:
     AssumeTrue("ran out of terms", termEnum.Next());
     return term;
 }
 public static void AfterClass()
 {
     TermEnum.Dispose();
     Reader.Dispose();
     Directory.Dispose();
     TermEnum = null;
     Reader = null;
     Directory = null;
     Index = null;
     SampleTerms = null;
 }
Example #32
0
 public PreDocsEnum Reset(SegmentTermEnum termEnum, Bits liveDocs)
 {
     Docs.LiveDocs = liveDocs;
     Docs.Seek(termEnum);
     Docs.Freq_Renamed = 1;
     DocID_Renamed = -1;
     return this;
 }
Example #33
0
 public void CacheCurrentTerm(SegmentTermEnum enumerator)
 {
     TermsCache.Put(new CloneableTerm(enumerator.Term()), new TermInfoAndOrd(enumerator.TermInfo_Renamed, enumerator.Position));
 }
Example #34
0
        internal TermInfo SeekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd, bool useCache)
        {
            if (size == 0)
            {
                return(null);
            }

            // optimize sequential access: first try scanning cached enum w/o seeking
            if (enumerator.Term() != null && ((enumerator.Prev() != null && CompareAsUTF16(term, enumerator.Prev()) > 0) || CompareAsUTF16(term, enumerator.Term()) >= 0)) // term is at or past current
            {
                int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1;
                if (indexLength == enumOffset || index.CompareTo(term, enumOffset) < 0) // but before end of block
                {
                    // no need to seek

                    TermInfo ti;
                    int      numScans = enumerator.ScanTo(term);
                    if (enumerator.Term() != null && CompareAsUTF16(term, enumerator.Term()) == 0)
                    {
                        ti = enumerator.termInfo;
                        if (numScans > 1)
                        {
                            // we only  want to put this TermInfo into the cache if
                            // scanEnum skipped more than one dictionary entry.
                            // this prevents RangeQueries or WildcardQueries to
                            // wipe out the cache when they iterate over a large numbers
                            // of terms in order
                            if (tiOrd == null)
                            {
                                if (useCache)
                                {
                                    termsCache.Put(new CloneableTerm(DeepCopyOf(term)), new TermInfoAndOrd(ti, enumerator.position));
                                }
                            }
                            else
                            {
                                Debug.Assert(SameTermInfo(ti, tiOrd, enumerator));
                                Debug.Assert(enumerator.position == tiOrd.termOrd);
                            }
                        }
                    }
                    else
                    {
                        ti = null;
                    }

                    return(ti);
                }
            }

            // random-access: must seek
            int indexPos;

            if (tiOrd != null)
            {
                indexPos = (int)(tiOrd.termOrd / totalIndexInterval);
            }
            else
            {
                // Must do binary search:
                indexPos = index.GetIndexOffset(term);
            }

            index.SeekEnum(enumerator, indexPos);
            enumerator.ScanTo(term);
            TermInfo ti_;

            if (enumerator.Term() != null && CompareAsUTF16(term, enumerator.Term()) == 0)
            {
                ti_ = enumerator.termInfo;
                if (tiOrd == null)
                {
                    if (useCache)
                    {
                        termsCache.Put(new CloneableTerm(DeepCopyOf(term)), new TermInfoAndOrd(ti_, enumerator.position));
                    }
                }
                else
                {
                    Debug.Assert(SameTermInfo(ti_, tiOrd, enumerator));
                    Debug.Assert(enumerator.position == tiOrd.termOrd);
                }
            }
            else
            {
                ti_ = null;
            }
            return(ti_);
        }
Example #35
0
            // Swap in S, in place of E:
            internal virtual bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos)
            {
                int savLength = term.Length;

                Debug.Assert(term.Offset == 0);

                // The 3 bytes starting at downTo make up 1
                // unicode character:
                Debug.Assert(IsHighBMPChar(term.Bytes, pos));

                // NOTE: we cannot make this assert, because
                // AutomatonQuery legitimately sends us malformed UTF8
                // (eg the UTF8 bytes with just 0xee)
                // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString();

                // Save the bytes && length, since we need to
                // restore this if seek "back" finds no matching
                // terms
                if (term.Bytes.Length < 4 + pos)
                {
                    term.Grow(4 + pos);
                }

                Scratch[0] = (sbyte)term.Bytes[pos];
                Scratch[1] = (sbyte)term.Bytes[pos + 1];
                Scratch[2] = (sbyte)term.Bytes[pos + 2];

                term.Bytes[pos] = unchecked((byte)0xf0);
                term.Bytes[pos + 1] = unchecked((byte)0x90);
                term.Bytes[pos + 2] = unchecked((byte)0x80);
                term.Bytes[pos + 3] = unchecked((byte)0x80);
                term.Length = 4 + pos;

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }

                // Seek "back":
                OuterInstance.TermsDict.SeekEnum(te, new Term(fieldInfo.Name, term), true);

                // Test if the term we seek'd to in fact found a
                // surrogate pair at the same position as the E:
                Term t2 = te.Term();

                // Cannot be null (or move to next field) because at
                // "worst" it'd seek to the same term we are on now,
                // unless we are being called from seek
                if (t2 == null || t2.Field() != InternedFieldName)
                {
                    return false;
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      got term=" + UnicodeUtil.ToHexString(t2.Text()));
                }

                // Now test if prefix is identical and we found
                // a non-BMP char at the same position:
                BytesRef b2 = t2.Bytes();
                Debug.Assert(b2.Offset == 0);

                bool matches;
                if (b2.Length >= term.Length && IsNonBMPChar(b2.Bytes, pos))
                {
                    matches = true;
                    for (int i = 0; i < pos; i++)
                    {
                        if (term.Bytes[i] != b2.Bytes[i])
                        {
                            matches = false;
                            break;
                        }
                    }
                }
                else
                {
                    matches = false;
                }

                // Restore term:
                term.Length = savLength;
                term.Bytes[pos] = (byte)Scratch[0];
                term.Bytes[pos + 1] = (byte)Scratch[1];
                term.Bytes[pos + 2] = (byte)Scratch[2];

                return matches;
            }
Example #36
0
 // called only from asserts
 private bool SameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator)
 {
     if (ti1.DocFreq != ti2.DocFreq)
     {
         return false;
     }
     if (ti1.FreqPointer != ti2.FreqPointer)
     {
         return false;
     }
     if (ti1.ProxPointer != ti2.ProxPointer)
     {
         return false;
     }
     // skipOffset is only valid when docFreq >= skipInterval:
     if (ti1.DocFreq >= enumerator.SkipInterval && ti1.SkipOffset != ti2.SkipOffset)
     {
         return false;
     }
     return true;
 }
        public virtual void SeekEnum(SegmentTermEnum enumerator, int indexOffset)
        {
            PagedBytesDataInput input = (PagedBytesDataInput)DataInput.Clone();

            input.Position = IndexToDataOffset.Get(indexOffset);

            // read the term
            int fieldId = input.ReadVInt();
            Term field = Fields[fieldId];
            Term term = new Term(field.Field(), input.ReadString());

            // read the terminfo
            TermInfo termInfo = new TermInfo();
            termInfo.DocFreq = input.ReadVInt();
            if (termInfo.DocFreq >= SkipInterval)
            {
                termInfo.SkipOffset = input.ReadVInt();
            }
            else
            {
                termInfo.SkipOffset = 0;
            }
            termInfo.FreqPointer = input.ReadVLong();
            termInfo.ProxPointer = input.ReadVLong();

            long pointer = input.ReadVLong();

            // perform the seek
            enumerator.Seek(pointer, ((long)indexOffset * TotalIndexInterval) - 1, term, termInfo);
        }
Example #38
0
 internal TermInfo SeekEnum(SegmentTermEnum enumerator, Term term, bool useCache)
 {
     if (useCache)
     {
         return SeekEnum(enumerator, term, TermsCache.Get(new CloneableTerm(DeepCopyOf(term))), useCache);
     }
     else
     {
         return SeekEnum(enumerator, term, null, useCache);
     }
 }
Example #39
0
 public DocsAndPositionsEnum Reset(SegmentTermEnum termEnum, Bits liveDocs)
 {
     Pos.LiveDocs = liveDocs;
     Pos.Seek(termEnum);
     DocID_Renamed = -1;
     return this;
 }
Example #40
0
            // Swap in S, in place of E:
            private bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos)
            {
                int savLength = term.Length;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(term.Offset == 0);
                }

                // The 3 bytes starting at downTo make up 1
                // unicode character:
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(IsHighBMPChar(term.Bytes, pos));
                }

                // NOTE: we cannot make this assert, because
                // AutomatonQuery legitimately sends us malformed UTF8
                // (eg the UTF8 bytes with just 0xee)
                // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString();

                // Save the bytes && length, since we need to
                // restore this if seek "back" finds no matching
                // terms
                if (term.Bytes.Length < 4 + pos)
                {
                    term.Grow(4 + pos);
                }

                scratch[0] = (sbyte)term.Bytes[pos];
                scratch[1] = (sbyte)term.Bytes[pos + 1];
                scratch[2] = (sbyte)term.Bytes[pos + 2];

                term.Bytes[pos]     = 0xf0;
                term.Bytes[pos + 1] = 0x90;
                term.Bytes[pos + 2] = 0x80;
                term.Bytes[pos + 3] = 0x80;
                term.Length         = 4 + pos;

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }

                // Seek "back":
                outerInstance.TermsDict.SeekEnum(te, new Term(fieldInfo.Name, term), true);

                // Test if the term we seek'd to in fact found a
                // surrogate pair at the same position as the E:
                Term t2 = te.Term();

                // Cannot be null (or move to next field) because at
                // "worst" it'd seek to the same term we are on now,
                // unless we are being called from seek
                if (t2 is null || t2.Field != internedFieldName)
                {
                    return(false);
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      got term=" + UnicodeUtil.ToHexString(t2.Text));
                }

                // Now test if prefix is identical and we found
                // a non-BMP char at the same position:
                BytesRef b2 = t2.Bytes;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(b2.Offset == 0);
                }

                bool matches;

                if (b2.Length >= term.Length && IsNonBMPChar(b2.Bytes, pos))
                {
                    matches = true;
                    for (int i = 0; i < pos; i++)
                    {
                        if (term.Bytes[i] != b2.Bytes[i])
                        {
                            matches = false;
                            break;
                        }
                    }
                }
                else
                {
                    matches = false;
                }

                // Restore term:
                term.Length         = savLength;
                term.Bytes[pos]     = (byte)scratch[0];
                term.Bytes[pos + 1] = (byte)scratch[1];
                term.Bytes[pos + 2] = (byte)scratch[2];

                return(matches);
            }
Example #41
0
        internal TermInfo SeekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd, bool useCache)
        {
            if (Size_Renamed == 0)
            {
                return null;
            }

            // optimize sequential access: first try scanning cached enum w/o seeking
            if (enumerator.Term() != null && ((enumerator.Prev() != null && CompareAsUTF16(term, enumerator.Prev()) > 0) || CompareAsUTF16(term, enumerator.Term()) >= 0)) // term is at or past current
            {
                int enumOffset = (int)(enumerator.Position / TotalIndexInterval) + 1;
                if (IndexLength == enumOffset || Index.CompareTo(term, enumOffset) < 0) // but before end of block
                {
                    // no need to seek

                    TermInfo ti;
                    int numScans = enumerator.ScanTo(term);
                    if (enumerator.Term() != null && CompareAsUTF16(term, enumerator.Term()) == 0)
                    {
                        ti = enumerator.TermInfo_Renamed;
                        if (numScans > 1)
                        {
                            // we only  want to put this TermInfo into the cache if
                            // scanEnum skipped more than one dictionary entry.
                            // this prevents RangeQueries or WildcardQueries to
                            // wipe out the cache when they iterate over a large numbers
                            // of terms in order
                            if (tiOrd == null)
                            {
                                if (useCache)
                                {
                                    TermsCache.Put(new CloneableTerm(DeepCopyOf(term)), new TermInfoAndOrd(ti, enumerator.Position));
                                }
                            }
                            else
                            {
                                Debug.Assert(SameTermInfo(ti, tiOrd, enumerator));
                                Debug.Assert(enumerator.Position == tiOrd.TermOrd);
                            }
                        }
                    }
                    else
                    {
                        ti = null;
                    }

                    return ti;
                }
            }

            // random-access: must seek
            int indexPos;
            if (tiOrd != null)
            {
                indexPos = (int)(tiOrd.TermOrd / TotalIndexInterval);
            }
            else
            {
                // Must do binary search:
                indexPos = Index.GetIndexOffset(term);
            }

            Index.SeekEnum(enumerator, indexPos);
            enumerator.ScanTo(term);
            TermInfo ti_;

            if (enumerator.Term() != null && CompareAsUTF16(term, enumerator.Term()) == 0)
            {
                ti_ = enumerator.TermInfo_Renamed;
                if (tiOrd == null)
                {
                    if (useCache)
                    {
                        TermsCache.Put(new CloneableTerm(DeepCopyOf(term)), new TermInfoAndOrd(ti_, enumerator.Position));
                    }
                }
                else
                {
                    Debug.Assert(SameTermInfo(ti_, tiOrd, enumerator));
                    Debug.Assert(enumerator.Position == tiOrd.TermOrd);
                }
            }
            else
            {
                ti_ = null;
            }
            return ti_;
        }