internal override void  Seek(TermInfo ti)
		{
			base.Seek(ti);
			if (ti != null)
				proxStream.Seek(ti.proxPointer);
			proxCount = 0;
		}
Esempio n. 2
0
        /// <summary>Retrieve the length (in bytes) of the tvd and tvf
        /// entries for the next numDocs starting with
        /// startDocID.  This is used for bulk copying when
        /// merging segments, if the field numbers are
        /// congruent.  Once this returns, the tvf &amp; tvd streams
        /// are seeked to the startDocID.
        /// </summary>
        internal void  RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
        {
            if (tvx == null)
            {
                for (int i = 0; i < tvdLengths.Length; i++)
                {
                    tvdLengths[i] = 0;
                }
                for (int i = 0; i < tvfLengths.Length; i++)
                {
                    tvfLengths[i] = 0;
                }
                return;
            }

            // SegmentMerger calls canReadRawDocs() first and should
            // not call us if that returns false.
            if (format < FORMAT_VERSION2)
            {
                throw new System.SystemException("cannot read raw docs with older term vector formats");
            }

            SeekTvx(startDocID);

            long tvdPosition = tvx.ReadLong();

            tvd.Seek(tvdPosition);

            long tvfPosition = tvx.ReadLong();

            tvf.Seek(tvfPosition);

            long lastTvdPosition = tvdPosition;
            long lastTvfPosition = tvfPosition;

            int count = 0;

            while (count < numDocs)
            {
                int docID = docStoreOffset + startDocID + count + 1;
                System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
                if (docID < numTotalDocs)
                {
                    tvdPosition = tvx.ReadLong();
                    tvfPosition = tvx.ReadLong();
                }
                else
                {
                    tvdPosition = tvd.Length();
                    tvfPosition = tvf.Length();
                    System.Diagnostics.Debug.Assert(count == numDocs - 1);
                }
                tvdLengths[count] = (int)(tvdPosition - lastTvdPosition);
                tvfLengths[count] = (int)(tvfPosition - lastTvfPosition);
                count++;
                lastTvdPosition = tvdPosition;
                lastTvfPosition = tvfPosition;
            }
        }
 private void  SkipPayload()
 {
     if (needToLoadPayload && payloadLength > 0)
     {
         proxStream.Seek(proxStream.GetFilePointer() + payloadLength);
     }
     needToLoadPayload = false;
 }
Esempio n. 4
0
        public virtual void  TestClonedStreamsClosing()
        {
            SetUp_2();
            CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");

            // basic clone
            IndexInput expected = dir.OpenInput("f11");

            // this test only works for FSIndexInput
            Assert.IsTrue(_TestHelper.IsSimpleFSIndexInput(expected));
            Assert.IsTrue(_TestHelper.IsSimpleFSIndexInputOpen(expected));

            IndexInput one = cr.OpenInput("f11");

            Assert.IsTrue(IsCSIndexInputOpen(one));

            IndexInput two = (IndexInput)one.Clone();

            Assert.IsTrue(IsCSIndexInputOpen(two));

            AssertSameStreams("basic clone one", expected, one);
            expected.Seek(0);
            AssertSameStreams("basic clone two", expected, two);

            // Now close the first stream
            one.Close();
            Assert.IsTrue(IsCSIndexInputOpen(one), "Only close when cr is closed");

            // The following should really fail since we couldn't expect to
            // access a file once close has been called on it (regardless of
            // buffering and/or clone magic)
            expected.Seek(0);
            two.Seek(0);
            AssertSameStreams("basic clone two/2", expected, two);


            // Now close the compound reader
            cr.Close();
            Assert.IsFalse(IsCSIndexInputOpen(one), "Now closed one");
            Assert.IsFalse(IsCSIndexInputOpen(two), "Now closed two");

            // The following may also fail since the compound stream is closed
            expected.Seek(0);
            two.Seek(0);
            //assertSameStreams("basic clone two/3", expected, two);


            // Now close the second clone
            two.Close();
            expected.Seek(0);
            two.Seek(0);
            //assertSameStreams("basic clone two/4", expected, two);

            expected.Close();
        }
Esempio n. 5
0
 private void  SeekTvx(int docNum)
 {
     if (format < FORMAT_VERSION2)
     {
         tvx.Seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE);
     }
     else
     {
         tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE);
     }
 }
Esempio n. 6
0
 /// <summary>The value of the field as a String, or null.  If null, the Reader value,
 /// binary value, or TokenStream value is used.  Exactly one of stringValue(),
 /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
 /// </summary>
 public override System.String StringValue()
 {
     Enclosing_Instance.EnsureOpen();
     if (fieldsData == null)
     {
         IndexInput localFieldsStream = GetFieldStream();
         try
         {
             localFieldsStream.Seek(pointer);
             if (isCompressed)
             {
                 byte[] b = new byte[toRead];
                 localFieldsStream.ReadBytes(b, 0, b.Length);
                 fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
             }
             else
             {
                 //read in chars b/c we already know the length we need to read
                 char[] chars = new char[toRead];
                 localFieldsStream.ReadChars(chars, 0, toRead);
                 fieldsData = new System.String(chars);
             }
         }
         catch (System.IO.IOException e)
         {
             throw new FieldReaderException(e);
         }
     }
     return(fieldsData is System.String ? (System.String)fieldsData : null);
 }
Esempio n. 7
0
 /// <summary>The value of the field in Binary, or null.  If null, the Reader value,
 /// String value, or TokenStream value is used. Exactly one of stringValue(),
 /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
 /// </summary>
 public override byte[] BinaryValue()
 {
     Enclosing_Instance.EnsureOpen();
     if (fieldsData == null)
     {
         byte[]     b = new byte[toRead];
         IndexInput localFieldsStream = GetFieldStream();
         //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
         //since they are already handling this exception when getting the document
         try
         {
             localFieldsStream.Seek(pointer);
             localFieldsStream.ReadBytes(b, 0, b.Length);
             if (isCompressed == true)
             {
                 fieldsData = Enclosing_Instance.Uncompress(b);
             }
             else
             {
                 fieldsData = b;
             }
         }
         catch (System.IO.IOException e)
         {
             throw new FieldReaderException(e);
         }
     }
     return(fieldsData is byte[] ? (byte[])fieldsData : null);
 }
Esempio n. 8
0
        /// <summary>Read norms into a pre-allocated array. </summary>
        public override void  Norms(System.String field, byte[] bytes, int offset)
        {
            lock (this)
            {
                Norm norm = (Norm)norms[field];
                if (norm == null)
                {
                    Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
                    return;
                }

                if (norm.bytes != null)
                {
                    // can copy from cache
                    Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
                    return;
                }

                IndexInput normStream = (IndexInput)norm.in_Renamed.Clone();
                try
                {
                    // read from disk
                    normStream.Seek(0);
                    normStream.ReadBytes(bytes, offset, MaxDoc());
                }
                finally
                {
                    normStream.Close();
                }
            }
        }
Esempio n. 9
0
        private void  Demo_FSIndexInputBug(Directory fsdir, System.String file)
        {
            // Setup the test file - we need more than 1024 bytes
            IndexOutput os = fsdir.CreateOutput(file, null);

            for (int i = 0; i < 2000; i++)
            {
                os.WriteByte((byte)i);
            }
            os.Close();

            IndexInput in_Renamed = fsdir.OpenInput(file, null);

            // This read primes the buffer in IndexInput
            byte b = in_Renamed.ReadByte(null);

            // Close the file
            in_Renamed.Close();

            // ERROR: this call should fail, but succeeds because the buffer
            // is still filled
            b = in_Renamed.ReadByte(null);

            // ERROR: this call should fail, but succeeds for some reason as well
            in_Renamed.Seek(1099, null);

            // OK: this call correctly fails. We are now past the 1024 internal
            // buffer, so an actual IO is attempted, which fails
            Assert.Throws <NullReferenceException>(() => in_Renamed.ReadByte(null), "expected readByte() to throw exception");
        }
Esempio n. 10
0
 internal void  Seek(long pointer, int p, Term t, TermInfo ti)
 {
     input.Seek(pointer);
     position = p;
     termBuffer.Set(t);
     prevBuffer.Reset();
     termInfo.Set(ti);
 }
Esempio n. 11
0
 private void  AssertSameStreams(System.String msg, IndexInput expected, IndexInput actual, long seekTo)
 {
     if (seekTo >= 0 && seekTo < expected.Length())
     {
         expected.Seek(seekTo);
         actual.Seek(seekTo);
         AssertSameStreams(msg + ", seek(mid)", expected, actual);
     }
 }
Esempio n. 12
0
        public virtual void  TestReadPastEOF()
        {
            SetUp_2();
            CompoundFileReader cr         = new CompoundFileReader(dir, "f.comp", null);
            IndexInput         is_Renamed = cr.OpenInput("f2", null);

            is_Renamed.Seek(is_Renamed.Length(null) - 10, null);
            byte[] b = new byte[100];
            is_Renamed.ReadBytes(b, 0, 10, null);

            Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadByte(null), "Single byte read past end of file");

            is_Renamed.Seek(is_Renamed.Length(null) - 10, null);
            Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadBytes(b, 0, 50, null), "Block read past end of file");

            is_Renamed.Close();
            cr.Close();
        }
Esempio n. 13
0
            /// <summary>Expert: implements buffer refill.  Reads bytes from the current
            /// position in the input.
            /// </summary>
            /// <param name="b">the array to read bytes into
            /// </param>
            /// <param name="offset">the offset in the array to start storing bytes
            /// </param>
            /// <param name="len">the number of bytes to read
            /// </param>
            public override void  ReadInternal(byte[] b, int offset, int len)
            {
                long start = GetFilePointer();

                if (start + len > length)
                {
                    throw new System.IO.IOException("read past EOF");
                }
                base_Renamed.Seek(fileOffset + start);
                base_Renamed.ReadBytes(b, offset, len, false);
            }
Esempio n. 14
0
 internal virtual void  Seek(TermInfo ti)
 {
     count = 0;
     if (ti == null)
     {
         df = 0;
     }
     else
     {
         df          = ti.docFreq;
         doc         = 0;
         skipDoc     = 0;
         skipCount   = 0;
         numSkips    = df / skipInterval;
         freqPointer = ti.freqPointer;
         proxPointer = ti.proxPointer;
         skipPointer = freqPointer + ti.skipOffset;
         freqStream.Seek(freqPointer);
         haveSkipped = false;
     }
 }
Esempio n. 15
0
        internal virtual void  Seek(TermInfo ti, Term term)
        {
            count = 0;
            FieldInfo fi = parent.fieldInfos.FieldInfo(term.field);

            currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
            if (ti == null)
            {
                df = 0;
            }
            else
            {
                df              = ti.docFreq;
                doc             = 0;
                freqBasePointer = ti.freqPointer;
                proxBasePointer = ti.proxPointer;
                skipPointer     = freqBasePointer + ti.skipOffset;
                freqStream.Seek(freqBasePointer);
                haveSkipped = false;
            }
        }
Esempio n. 16
0
            public override byte[] GetBinaryValue(byte[] result)
            {
                Enclosing_Instance.EnsureOpen();

                if (isBinary)
                {
                    if (fieldsData == null)
                    {
                        // Allocate new buffer if result is null or too small
                        byte[] b;
                        if (result == null || result.Length < toRead)
                        {
                            b = new byte[toRead];
                        }
                        else
                        {
                            b = result;
                        }

                        IndexInput localFieldsStream = GetFieldStream();

                        // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
                        // since they are already handling this exception when getting the document
                        try
                        {
                            localFieldsStream.Seek(pointer);
                            localFieldsStream.ReadBytes(b, 0, toRead);
                            if (isCompressed == true)
                            {
                                fieldsData = Enclosing_Instance.Uncompress(b);
                            }
                            else
                            {
                                fieldsData = b;
                            }
                        }
                        catch (System.IO.IOException e)
                        {
                            throw new FieldReaderException(e);
                        }

                        binaryOffset = 0;
                        binaryLength = toRead;
                    }

                    return((byte[])fieldsData);
                }
                else
                {
                    return(null);
                }
            }
Esempio n. 17
0
        /// <summary> Construct a FieldInfos object using the directory and the name of the file
        /// IndexInput
        /// </summary>
        /// <param name="d">The directory to open the IndexInput from
        /// </param>
        /// <param name="name">The name of the file to open the IndexInput from in the Directory
        /// </param>
        /// <throws>  IOException </throws>
        public /*internal*/ FieldInfos(Directory d, String name)
        {
            IndexInput input = d.OpenInput(name);

            try
            {
                try
                {
                    Read(input, name);
                }
                catch (System.IO.IOException)
                {
                    if (format == FORMAT_PRE)
                    {
                        // LUCENE-1623: FORMAT_PRE (before there was a
                        // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
                        // encoding; retry with input set to pre-utf8
                        input.Seek(0);
                        input.SetModifiedUTF8StringsMode();
                        byNumber.Clear();
                        byName.Clear();

                        bool rethrow = false;
                        try
                        {
                            Read(input, name);
                        }
                        catch (Exception)
                        {
                            // Ignore any new exception & set to throw original IOE
                            rethrow = true;
                        }
                        if (rethrow)
                        {
                            // Preserve stack trace
                            throw;
                        }
                    }
                    else
                    {
                        // The IOException cannot be caused by
                        // LUCENE-1623, so re-throw it
                        throw;
                    }
                }
            }
            finally
            {
                input.Close();
            }
        }
Esempio n. 18
0
        public /*internal*/ Document Doc(int n, FieldSelector fieldSelector)
        {
            SeekIndex(n);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int                 fieldNumber = fieldsStream.ReadVInt();
                FieldInfo           fi          = fieldInfos.FieldInfo(fieldNumber);
                FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);

                byte bits = fieldsStream.ReadByte();
                System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize   = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
                bool binary     = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
                //TODO: Find an alternative approach here if this list continues to grow beyond the
                //list of 5 or 6 currently here.  See Lucene 762 for discussion
                if (acceptField.Equals(FieldSelectorResult.LOAD))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
                {
                    AddFieldForMerge(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                    break;                     //Get out of this loop
                }
                else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
                {
                    AddFieldLazy(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE))
                {
                    SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
                {
                    AddFieldSize(doc, fi, binary, compressed);
                    break;
                }
                else
                {
                    SkipField(binary, compressed);
                }
            }

            return(doc);
        }
Esempio n. 19
0
        // It is not always neccessary to move the prox pointer
        // to a new document after the freq pointer has been moved.
        // Consider for example a phrase query with two terms:
        // the freq pointer for term 1 has to move to document x
        // to answer the question if the term occurs in that document. But
        // only if term 2 also matches document x, the positions have to be
        // read to figure out if term 1 and term 2 appear next
        // to each other in document x and thus satisfy the query.
        // So we move the prox pointer lazily to the document
        // as soon as positions are requested.
        private void  LazySkip()
        {
            if (lazySkipPointer != 0)
            {
                proxStream.Seek(lazySkipPointer);
                lazySkipPointer = 0;
            }

            if (lazySkipDocCount != 0)
            {
                SkipPositions(lazySkipDocCount);
                lazySkipDocCount = 0;
            }
        }
Esempio n. 20
0
        public virtual void  TestReadPastEOF()
        {
            SetUp_2();
            CompoundFileReader cr         = new CompoundFileReader(dir, "f.comp");
            IndexInput         is_Renamed = cr.OpenInput("f2");

            is_Renamed.Seek(is_Renamed.Length() - 10);
            byte[] b = new byte[100];
            is_Renamed.ReadBytes(b, 0, 10);

            try
            {
                byte test = is_Renamed.ReadByte();
                Assert.Fail("Single byte read past end of file");
            }
            catch (System.IO.IOException e)
            {
                /* success */
                //System.out.println("SUCCESS: single byte read past end of file: " + e);
            }

            is_Renamed.Seek(is_Renamed.Length() - 10);
            try
            {
                is_Renamed.ReadBytes(b, 0, 50);
                Assert.Fail("Block read past end of file");
            }
            catch (System.IO.IOException e)
            {
                /* success */
                //System.out.println("SUCCESS: block read past end of file: " + e);
            }

            is_Renamed.Close();
            cr.Close();
        }
Esempio n. 21
0
 /// <summary>The value of the field as a String, or null.  If null, the Reader value,
 /// binary value, or TokenStream value is used.  Exactly one of stringValue(),
 /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
 /// </summary>
 public override System.String StringValue()
 {
     Enclosing_Instance.EnsureOpen();
     if (isBinary)
     {
         return(null);
     }
     else
     {
         if (fieldsData == null)
         {
             IndexInput localFieldsStream = GetFieldStream();
             try
             {
                 localFieldsStream.Seek(pointer);
                 if (isCompressed)
                 {
                     byte[] b = new byte[toRead];
                     localFieldsStream.ReadBytes(b, 0, b.Length);
                     fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
                 }
                 else
                 {
                     if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                     {
                         byte[] bytes = new byte[toRead];
                         localFieldsStream.ReadBytes(bytes, 0, toRead);
                         fieldsData = System.Text.Encoding.UTF8.GetString(bytes);
                     }
                     else
                     {
                         //read in chars b/c we already know the length we need to read
                         char[] chars = new char[toRead];
                         localFieldsStream.ReadChars(chars, 0, toRead);
                         fieldsData = new System.String(chars);
                     }
                 }
             }
             catch (System.IO.IOException e)
             {
                 throw new FieldReaderException(e);
             }
         }
     }
     return((string)fieldsData);
 }
Esempio n. 22
0
        public override long Get(int index)
        {
            int  blockOffset = index / ValuesPerBlock;
            long skip        = ((long)blockOffset) << 3;

            try
            {
                @in.Seek(StartPointer + skip);

                long block         = @in.ReadLong();
                int  offsetInBlock = index % ValuesPerBlock;
                return(((long)((ulong)block >> (offsetInBlock * bitsPerValue))) & Mask);
            }
            catch (System.IO.IOException e)
            {
                throw new InvalidOperationException("failed", e);
            }
        }
 /// <summary>
 /// returns an address instance for prefix-compressed binary values.
 /// @lucene.internal
 /// </summary>
 protected internal virtual MonotonicBlockPackedReader GetIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes)
 {
     MonotonicBlockPackedReader addresses;
     long interval = bytes.AddressInterval;
     lock (AddressInstances)
     {
         MonotonicBlockPackedReader addrInstance;
         if (!AddressInstances.TryGetValue(field.Number, out addrInstance))
         {
             data.Seek(bytes.AddressesOffset);
             long size;
             if (bytes.Count % interval == 0)
             {
                 size = bytes.Count / interval;
             }
             else
             {
                 size = 1L + bytes.Count / interval;
             }
             addrInstance = new MonotonicBlockPackedReader(data, bytes.PackedIntsVersion, bytes.BlockSize, size, false);
             AddressInstances[field.Number] = addrInstance;
             RamBytesUsed_Renamed.AddAndGet(addrInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT);
         }
         addresses = addrInstance;
     }
     return addresses;
 }
 /// <summary>
 /// returns an address instance for sortedset ordinal lists
 /// @lucene.internal
 /// </summary>
 protected internal virtual MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry)
 {
     MonotonicBlockPackedReader ordIndex;
     lock (OrdIndexInstances)
     {
         MonotonicBlockPackedReader ordIndexInstance;
         if (!OrdIndexInstances.TryGetValue(field.Number, out ordIndexInstance))
         {
             data.Seek(entry.Offset);
             ordIndexInstance = new MonotonicBlockPackedReader(data, entry.PackedIntsVersion, entry.BlockSize, entry.Count, false);
             OrdIndexInstances[field.Number] = ordIndexInstance;
             RamBytesUsed_Renamed.AddAndGet(ordIndexInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT);
         }
         ordIndex = ordIndexInstance;
     }
     return ordIndex;
 }
            internal virtual TermsEnum GetTermsEnum(IndexInput input)
            {
                input.Seek(Bytes.Offset);

                return new TermsEnumAnonymousInnerClassHelper(this, input);
            }
 private void  SeekIndex(int docID)
 {
     indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L);
 }
Esempio n. 27
0
        public /*internal*/ Document Doc(int n)
        {
            indexStream.Seek(n * 8L);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int       fieldNumber = fieldsStream.ReadVInt();
                FieldInfo fi          = fieldInfos.FieldInfo(fieldNumber);

                byte bits = fieldsStream.ReadByte();

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize   = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;

                if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
                {
                    byte[] b = new byte[fieldsStream.ReadVInt()];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    if (compressed)
                    {
                        doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
                    }
                    else
                    {
                        doc.Add(new Field(fi.name, b, Field.Store.YES));
                    }
                }
                else
                {
                    Field.Index index;
                    Field.Store store = Field.Store.YES;

                    if (fi.isIndexed && tokenize)
                    {
                        index = Field.Index.TOKENIZED;
                    }
                    else if (fi.isIndexed && !tokenize)
                    {
                        index = Field.Index.UN_TOKENIZED;
                    }
                    else
                    {
                        index = Field.Index.NO;
                    }

                    Field.TermVector termVector = null;
                    if (fi.storeTermVector)
                    {
                        if (fi.storeOffsetWithTermVector)
                        {
                            if (fi.storePositionWithTermVector)
                            {
                                termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
                            }
                            else
                            {
                                termVector = Field.TermVector.WITH_OFFSETS;
                            }
                        }
                        else if (fi.storePositionWithTermVector)
                        {
                            termVector = Field.TermVector.WITH_POSITIONS;
                        }
                        else
                        {
                            termVector = Field.TermVector.YES;
                        }
                    }
                    else
                    {
                        termVector = Field.TermVector.NO;
                    }

                    if (compressed)
                    {
                        store = Field.Store.COMPRESS;
                        byte[] b = new byte[fieldsStream.ReadVInt()];
                        fieldsStream.ReadBytes(b, 0, b.Length);
                        Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                    else
                    {
                        Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                }
            }

            return(doc);
        }
Esempio n. 28
0
        /// <summary>Optimized implementation. </summary>
        public virtual bool SkipTo(int target)
        {
            if (df >= skipInterval)
            {
                // optimized case

                if (skipStream == null)
                {
                    skipStream = (IndexInput)freqStream.Clone();                      // lazily clone
                }
                if (!haveSkipped)
                {
                    // lazily seek skip stream
                    skipStream.Seek(skipPointer);
                    haveSkipped = true;
                }

                // scan skip data
                int  lastSkipDoc     = skipDoc;
                long lastFreqPointer = freqStream.GetFilePointer();
                long lastProxPointer = -1;
                int  numSkipped      = -1 - (count % skipInterval);

                while (target > skipDoc)
                {
                    lastSkipDoc     = skipDoc;
                    lastFreqPointer = freqPointer;
                    lastProxPointer = proxPointer;

                    if (skipDoc != 0 && skipDoc >= doc)
                    {
                        numSkipped += skipInterval;
                    }

                    if (skipCount >= numSkips)
                    {
                        break;
                    }

                    skipDoc     += skipStream.ReadVInt();
                    freqPointer += skipStream.ReadVInt();
                    proxPointer += skipStream.ReadVInt();

                    skipCount++;
                }

                // if we found something to skip, then skip it
                if (lastFreqPointer > freqStream.GetFilePointer())
                {
                    freqStream.Seek(lastFreqPointer);
                    SkipProx(lastProxPointer);

                    doc    = lastSkipDoc;
                    count += numSkipped;
                }
            }

            // done skipping, now just scan
            do
            {
                if (!Next())
                {
                    return(false);
                }
            }while (target > doc);
            return(true);
        }
Esempio n. 29
0
 private void  AssertSameStreams(System.String msg, IndexInput expected, IndexInput actual, long seekTo)
 {
     if (seekTo >= 0 && seekTo < expected.Length())
     {
         expected.Seek(seekTo);
         actual.Seek(seekTo);
         AssertSameStreams(msg + ", seek(mid)", expected, actual);
     }
 }
Esempio n. 30
0
        public virtual void  TestRandomAccessClones()
        {
            SetUp_2();
            CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");

            // Open two files
            IndexInput e1 = cr.OpenInput("f11");
            IndexInput e2 = cr.OpenInput("f3");

            IndexInput a1 = (IndexInput)e1.Clone();
            IndexInput a2 = (IndexInput)e2.Clone();

            // Seek the first pair
            e1.Seek(100);
            a1.Seek(100);
            Assert.AreEqual(100, e1.GetFilePointer());
            Assert.AreEqual(100, a1.GetFilePointer());
            byte be1 = e1.ReadByte();
            byte ba1 = a1.ReadByte();

            Assert.AreEqual(be1, ba1);

            // Now seek the second pair
            e2.Seek(1027);
            a2.Seek(1027);
            Assert.AreEqual(1027, e2.GetFilePointer());
            Assert.AreEqual(1027, a2.GetFilePointer());
            byte be2 = e2.ReadByte();
            byte ba2 = a2.ReadByte();

            Assert.AreEqual(be2, ba2);

            // Now make sure the first one didn't move
            Assert.AreEqual(101, e1.GetFilePointer());
            Assert.AreEqual(101, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now more the first one again, past the buffer length
            e1.Seek(1910);
            a1.Seek(1910);
            Assert.AreEqual(1910, e1.GetFilePointer());
            Assert.AreEqual(1910, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now make sure the second set didn't move
            Assert.AreEqual(1028, e2.GetFilePointer());
            Assert.AreEqual(1028, a2.GetFilePointer());
            be2 = e2.ReadByte();
            ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Move the second set back, again cross the buffer size
            e2.Seek(17);
            a2.Seek(17);
            Assert.AreEqual(17, e2.GetFilePointer());
            Assert.AreEqual(17, a2.GetFilePointer());
            be2 = e2.ReadByte();
            ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Finally, make sure the first set didn't move
            // Now make sure the first one didn't move
            Assert.AreEqual(1911, e1.GetFilePointer());
            Assert.AreEqual(1911, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            e1.Close();
            e2.Close();
            a1.Close();
            a2.Close();
            cr.Close();
        }
Esempio n. 31
0
        /// <summary> Retrieve the term vector for the given document and field</summary>
        /// <param name="docNum">The document number to retrieve the vector for
        /// </param>
        /// <param name="field">The field within the document to retrieve
        /// </param>
        /// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
        /// </returns>
        /// <throws>  IOException if there is an error reading the term vector files </throws>
        public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field)
        {
            // Check if no term vectors are available for this segment at all
            int            fieldNumber = fieldInfos.FieldNumber(field);
            TermFreqVector result      = null;

            if (tvx != null)
            {
                //We need to account for the FORMAT_SIZE at when seeking in the tvx
                //We don't need to do this in other seeks because we already have the
                // file pointer
                //that was written in another file
                tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
                //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                long position = tvx.ReadLong();

                tvd.Seek(position);
                int fieldCount = tvd.ReadVInt();
                //System.out.println("Num Fields: " + fieldCount);
                // There are only a few fields per document. We opt for a full scan
                // rather then requiring that they be ordered. We need to read through
                // all of the fields anyway to get to the tvf pointers.
                int number = 0;
                int found  = -1;
                for (int i = 0; i < fieldCount; i++)
                {
                    if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
                    {
                        number = tvd.ReadVInt();
                    }
                    else
                    {
                        number += tvd.ReadVInt();
                    }

                    if (number == fieldNumber)
                    {
                        found = i;
                    }
                }

                // This field, although valid in the segment, was not found in this
                // document
                if (found != -1)
                {
                    // Compute position in the tvf file
                    position = 0;
                    for (int i = 0; i <= found; i++)
                    {
                        position += tvd.ReadVLong();
                    }

                    result = ReadTermVector(field, position);
                }
                else
                {
                    //System.out.println("Field not found");
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
            return(result);
        }
Esempio n. 32
0
 public override void  SeekInternal(long pos)
 {
     //simOutage();
     delegate_Renamed.Seek(pos);
 }
Esempio n. 33
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <returns> The TermVector located at that position
        /// </returns>
        /// <throws>  IOException </throws>
        private SegmentTermVector ReadTermVector(System.String field, long tvfPointer)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return(new SegmentTermVector(field, null, null));
            }

            bool storePositions;
            bool storeOffsets;

            if (tvfFormat == TermVectorsWriter.FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }

            System.String[] terms     = new System.String[numTerms];
            int[]           termFreqs = new int[numTerms];

            //  we may not need these, but declare them
            int[][] positions = null;
            TermVectorOffsetInfo[][] offsets = null;
            if (storePositions)
            {
                positions = new int[numTerms][];
            }
            if (storeOffsets)
            {
                offsets = new TermVectorOffsetInfo[numTerms][];
            }

            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            char[] buffer         = new char[10];     // init the buffer with a length of 10 character
            char[] previousBuffer = new char[] {};

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;
                if (buffer.Length < totalLength)
                {
                    // increase buffer
                    buffer = null;                     // give a hint to garbage collector
                    buffer = new char[totalLength];

                    if (start > 0)
                    {
                        // just copy if necessary
                        Array.Copy(previousBuffer, 0, buffer, 0, start);
                    }
                }

                tvf.ReadChars(buffer, start, deltaLength);
                terms[i]       = new System.String(buffer, 0, totalLength);
                previousBuffer = buffer;
                int freq = tvf.ReadVInt();
                termFreqs[i] = freq;

                if (storePositions)
                {
                    //read in the positions
                    int[] pos = new int[freq];
                    positions[i] = pos;
                    int prevPosition = 0;
                    for (int j = 0; j < freq; j++)
                    {
                        pos[j]       = prevPosition + tvf.ReadVInt();
                        prevPosition = pos[j];
                    }
                }

                if (storeOffsets)
                {
                    TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
                    offsets[i] = offs;
                    int prevOffset = 0;
                    for (int j = 0; j < freq; j++)
                    {
                        int startOffset = prevOffset + tvf.ReadVInt();
                        int endOffset   = startOffset + tvf.ReadVInt();
                        offs[j]    = new TermVectorOffsetInfo(startOffset, endOffset);
                        prevOffset = endOffset;
                    }
                }
            }

            SegmentTermVector tv;

            if (storePositions || storeOffsets)
            {
                tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
            }
            else
            {
                tv = new SegmentTermVector(field, terms, termFreqs);
            }
            return(tv);
        }
Esempio n. 34
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <returns> The TermVector located at that position
        /// </returns>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (tvfFormat == FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            char[] buffer         = new char[10];     // init the buffer with a length of 10 character
            char[] previousBuffer = new char[] {};

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;
                if (buffer.Length < totalLength)
                {
                    // increase buffer
                    buffer = null;                     // give a hint to garbage collector
                    buffer = new char[totalLength];

                    if (start > 0)
                    {
                        // just copy if necessary
                        Array.Copy(previousBuffer, 0, buffer, 0, start);
                    }
                }

                tvf.ReadChars(buffer, start, deltaLength);
                System.String term = new System.String(buffer, 0, totalLength);
                previousBuffer = buffer;
                int   freq      = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions() == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets() == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset   = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }