예제 #1
0
 public void Read(IndexInput input, FieldInfos fieldInfos)
 {
     this.Term = null; // invalidate cache
     NewSuffixStart = input.ReadVInt();
     int length = input.ReadVInt();
     int totalLength = NewSuffixStart + length;
     Debug.Assert(totalLength <= ByteBlockPool.BYTE_BLOCK_SIZE - 2, "termLength=" + totalLength + ",resource=" + input);
     if (Bytes.Bytes.Length < totalLength)
     {
         Bytes.Grow(totalLength);
     }
     Bytes.Length = totalLength;
     input.ReadBytes(Bytes.Bytes, NewSuffixStart, length);
     int fieldNumber = input.ReadVInt();
     if (fieldNumber != CurrentFieldNumber)
     {
         CurrentFieldNumber = fieldNumber;
         // NOTE: too much sneakiness here, seriously this is a negative vint?!
         if (CurrentFieldNumber == -1)
         {
             Field = "";
         }
         else
         {
             Debug.Assert(fieldInfos.FieldInfo(CurrentFieldNumber) != null, CurrentFieldNumber.ToString());
             Field = String.Intern(fieldInfos.FieldInfo(CurrentFieldNumber).Name);
         }
     }
     else
     {
         Debug.Assert(Field.Equals(fieldInfos.FieldInfo(fieldNumber).Name), "currentFieldNumber=" + CurrentFieldNumber + " field=" + Field + " vs " + fieldInfos.FieldInfo(fieldNumber) == null ? "null" : fieldInfos.FieldInfo(fieldNumber).Name);
     }
 }
            public override BytesRef Next()
            {
                if (nextTerm >= numTerms)
                {
                    return(null);
                }
                term.CopyBytes(lastTerm);
                int start    = tvf.ReadVInt32();
                int deltaLen = tvf.ReadVInt32();

                term.Length = start + deltaLen;
                term.Grow(term.Length);
                tvf.ReadBytes(term.Bytes, start, deltaLen);
                freq = tvf.ReadVInt32();

                if (storePayloads)
                {
                    positions      = new int[freq];
                    payloadOffsets = new int[freq];
                    int totalPayloadLength = 0;
                    int pos = 0;
                    for (int posUpto = 0; posUpto < freq; posUpto++)
                    {
                        int code = tvf.ReadVInt32();
                        pos += (int)((uint)code >> 1);
                        positions[posUpto] = pos;
                        if ((code & 1) != 0)
                        {
                            // length change
                            lastPayloadLength = tvf.ReadVInt32();
                        }
                        payloadOffsets[posUpto] = totalPayloadLength;
                        totalPayloadLength     += lastPayloadLength;
                        Debug.Assert(totalPayloadLength >= 0);
                    }
                    payloadData = new byte[totalPayloadLength];
                    tvf.ReadBytes(payloadData, 0, payloadData.Length);
                } // no payloads
                else if (storePositions)
                {
                    // TODO: we could maybe reuse last array, if we can
                    // somehow be careful about consumer never using two
                    // D&PEnums at once...
                    positions = new int[freq];
                    int pos = 0;
                    for (int posUpto = 0; posUpto < freq; posUpto++)
                    {
                        pos += tvf.ReadVInt32();
                        positions[posUpto] = pos;
                    }
                }

                if (storeOffsets)
                {
                    startOffsets = new int[freq];
                    endOffsets   = new int[freq];
                    int offset = 0;
                    for (int posUpto = 0; posUpto < freq; posUpto++)
                    {
                        startOffsets[posUpto] = offset + tvf.ReadVInt32();
                        offset = endOffsets[posUpto] = startOffsets[posUpto] + tvf.ReadVInt32();
                    }
                }

                lastTerm.CopyBytes(term);
                nextTerm++;
                return(term);
            }
예제 #3
0
 public override void ReadBytes(byte[] b, int offset, int len)
 {
     _indexInput.ReadBytes(b, offset, len);
 }
            public override BytesRef Next()
            {
                if (NextTerm >= NumTerms)
                {
                    return(null);
                }
                Term_Renamed.CopyBytes(LastTerm);
                int start    = Tvf.ReadVInt();
                int deltaLen = Tvf.ReadVInt();

                Term_Renamed.Length = start + deltaLen;
                Term_Renamed.Grow(Term_Renamed.Length);
                Tvf.ReadBytes(Term_Renamed.Bytes, start, deltaLen);
                Freq = Tvf.ReadVInt();

                if (StorePayloads)
                {
                    Positions      = new int[Freq];
                    PayloadOffsets = new int[Freq];
                    int totalPayloadLength = 0;
                    int pos = 0;
                    for (int posUpto = 0; posUpto < Freq; posUpto++)
                    {
                        int code = Tvf.ReadVInt();
                        pos += (int)((uint)code >> 1);
                        Positions[posUpto] = pos;
                        if ((code & 1) != 0)
                        {
                            // length change
                            LastPayloadLength = Tvf.ReadVInt();
                        }
                        PayloadOffsets[posUpto] = totalPayloadLength;
                        totalPayloadLength     += LastPayloadLength;
                        Debug.Assert(totalPayloadLength >= 0);
                    }
                    PayloadData = new sbyte[totalPayloadLength];
                    Tvf.ReadBytes(PayloadData, 0, PayloadData.Length);
                } // no payloads
                else if (StorePositions)
                {
                    // TODO: we could maybe reuse last array, if we can
                    // somehow be careful about consumer never using two
                    // D&PEnums at once...
                    Positions = new int[Freq];
                    int pos = 0;
                    for (int posUpto = 0; posUpto < Freq; posUpto++)
                    {
                        pos += Tvf.ReadVInt();
                        Positions[posUpto] = pos;
                    }
                }

                if (StoreOffsets)
                {
                    StartOffsets = new int[Freq];
                    EndOffsets   = new int[Freq];
                    int offset = 0;
                    for (int posUpto = 0; posUpto < Freq; posUpto++)
                    {
                        StartOffsets[posUpto] = offset + Tvf.ReadVInt();
                        offset = EndOffsets[posUpto] = StartOffsets[posUpto] + Tvf.ReadVInt();
                    }
                }

                LastTerm.CopyBytes(Term_Renamed);
                NextTerm++;
                return(Term_Renamed);
            }
예제 #5
0
                // Does initial decode of next block of terms; this
                // doesn't actually decode the docFreq, totalTermFreq,
                // postings details (frq/prx offset, etc.) metadata;
                // it just loads them as byte[] blobs which are then
                // decoded on-demand if the metadata is ever requested
                // for any term in this block.  This enables terms-only
                // intensive consumes (eg certain MTQs, respelling) to
                // not pay the price of decoding metadata they won't
                // use.

                private bool NextBlock()
                {
                    // TODO: we still lazy-decode the byte[] for each
                    // term (the suffix), but, if we decoded
                    // all N terms up front then seeking could do a fast
                    // bsearch w/in the block...

                    //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
                    state.BlockFilePointer = input.GetFilePointer();
                    blockTermCount         = input.ReadVInt32();
                    //System.out.println("  blockTermCount=" + blockTermCount);
                    if (blockTermCount == 0)
                    {
                        return(false);
                    }
                    termBlockPrefix = input.ReadVInt32();

                    // term suffixes:
                    int len = input.ReadVInt32();

                    if (termSuffixes.Length < len)
                    {
                        termSuffixes = new byte[ArrayUtil.Oversize(len, 1)];
                    }
                    //System.out.println("  termSuffixes len=" + len);
                    input.ReadBytes(termSuffixes, 0, len);
                    termSuffixesReader.Reset(termSuffixes, 0, len);

                    // docFreq, totalTermFreq
                    len = input.ReadVInt32();
                    if (docFreqBytes.Length < len)
                    {
                        docFreqBytes = new byte[ArrayUtil.Oversize(len, 1)];
                    }
                    //System.out.println("  freq bytes len=" + len);
                    input.ReadBytes(docFreqBytes, 0, len);
                    freqReader.Reset(docFreqBytes, 0, len);

                    // metadata
                    len = input.ReadVInt32();
                    if (bytes == null)
                    {
                        bytes       = new byte[ArrayUtil.Oversize(len, 1)];
                        bytesReader = new ByteArrayDataInput();
                    }
                    else if (bytes.Length < len)
                    {
                        bytes = new byte[ArrayUtil.Oversize(len, 1)];
                    }
                    input.ReadBytes(bytes, 0, len);
                    bytesReader.Reset(bytes, 0, len);

                    metaDataUpto       = 0;
                    state.TermBlockOrd = 0;

                    blocksSinceSeek++;
                    indexIsCurrent = indexIsCurrent && (blocksSinceSeek < outerInstance.outerInstance.indexReader.Divisor);
                    //System.out.println("  indexIsCurrent=" + indexIsCurrent);

                    return(true);
                }
예제 #6
0
 protected override void ReadInternal(byte[] b, int offset, int length)
 {
     SimOutage();
     @delegate.Seek(GetFilePointer());
     @delegate.ReadBytes(b, offset, length);
 }
 private NumericDocValues LoadByteField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
     int valueSize = input.ReadInt();
     if (valueSize != 1)
     {
         throw new CorruptIndexException("invalid valueSize: " + valueSize);
     }
     int maxDoc = State.SegmentInfo.DocCount;
     var values = new byte[maxDoc];
     input.ReadBytes(values, 0, values.Length);
     RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
     return new NumericDocValuesAnonymousInnerClassHelper3(this, values);
 }
예제 #8
0
 protected override void ReadInternal(byte[] b, int offset, int length)
 {
     SimOutage();
     @delegate.Seek(Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
     @delegate.ReadBytes(b, offset, length);
 }
예제 #9
0
        /// <summary>
        /// Sole constructor. </summary>
        public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext ioContext, string segmentSuffix, int indexDivisor)
        {
            if (!InstanceFieldsInitialized)
            {
                InitializeInstanceFields();
                InstanceFieldsInitialized = true;
            }

            this.PostingsReader = postingsReader;

            this.Segment = info.Name;
            @in = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION), ioContext);

            bool success = false;
            IndexInput indexIn = null;

            try
            {
                Version = ReadHeader(@in);
                if (indexDivisor != -1)
                {
                    indexIn = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION), ioContext);
                    int indexVersion = ReadIndexHeader(indexIn);
                    if (indexVersion != Version)
                    {
                        throw new CorruptIndexException("mixmatched version files: " + @in + "=" + Version + "," + indexIn + "=" + indexVersion);
                    }
                }

                // verify
                if (indexIn != null && Version >= BlockTreeTermsWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(indexIn);
                }

                // Have PostingsReader init itself
                postingsReader.Init(@in);

                // Read per-field details
                SeekDir(@in, DirOffset);
                if (indexDivisor != -1)
                {
                    SeekDir(indexIn, IndexDirOffset);
                }

                int numFields = @in.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + @in + ")");
                }

                for (int i = 0; i < numFields; i++)
                {
                    int field = @in.ReadVInt();
                    long numTerms = @in.ReadVLong();
                    Debug.Assert(numTerms >= 0);
                    int numBytes = @in.ReadVInt();
                    BytesRef rootCode = new BytesRef(new byte[numBytes]);
                    @in.ReadBytes(rootCode.Bytes, 0, numBytes);
                    rootCode.Length = numBytes;
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                    Debug.Assert(fieldInfo != null, "field=" + field);
                    long sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong();
                    long sumDocFreq = @in.ReadVLong();
                    int docCount = @in.ReadVInt();
                    int longsSize = Version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? @in.ReadVInt() : 0;
                    if (docCount < 0 || docCount > info.DocCount) // #docs with field must be <= #docs
                    {
                        throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.DocCount + " (resource=" + @in + ")");
                    }
                    if (sumDocFreq < docCount) // #postings must be >= #docs with field
                    {
                        throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + @in + ")");
                    }
                    if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) // #positions must be >= #postings
                    {
                        throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + @in + ")");
                    }
                    long indexStartFP = indexDivisor != -1 ? indexIn.ReadVLong() : 0;

                    if (Fields.ContainsKey(fieldInfo.Name))
                    {
                        throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + @in + ")");
                    }
                    else
                    {
                        Fields[fieldInfo.Name] = new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn);
                    }
                }
                if (indexDivisor != -1)
                {
                    indexIn.Dispose();
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    // this.close() will close in:
                    IOUtils.CloseWhileHandlingException(indexIn, this);
                }
            }
        }
예제 #10
0
                // Does initial decode of next block of terms; this
                // doesn't actually decode the docFreq, totalTermFreq,
                // postings details (frq/prx offset, etc.) metadata;
                // it just loads them as byte[] blobs which are then
                // decoded on-demand if the metadata is ever requested
                // for any term in this block.  This enables terms-only
                // intensive consumes (eg certain MTQs, respelling) to
                // not pay the price of decoding metadata they won't
                // use.

                private bool NextBlock()
                {
                    // TODO: we still lazy-decode the byte[] for each
                    // term (the suffix), but, if we decoded
                    // all N terms up front then seeking could do a fast
                    // bsearch w/in the block...

                    _state.BlockFilePointer = _input.FilePointer;
                    _blockTermCount         = _input.ReadVInt();

                    if (_blockTermCount == 0)
                    {
                        return(false);
                    }

                    _termBlockPrefix = _input.ReadVInt();

                    // term suffixes:
                    int len = _input.ReadVInt();

                    if (_termSuffixes.Length < len)
                    {
                        _termSuffixes = new byte[ArrayUtil.Oversize(len, 1)];
                    }
                    //System.out.println("  termSuffixes len=" + len);
                    _input.ReadBytes(_termSuffixes, 0, len);

                    _termSuffixesReader.Reset(_termSuffixes, 0, len);

                    // docFreq, totalTermFreq
                    len = _input.ReadVInt();
                    if (_docFreqBytes.Length < len)
                    {
                        _docFreqBytes = new byte[ArrayUtil.Oversize(len, 1)];
                    }

                    _input.ReadBytes(_docFreqBytes, 0, len);
                    _freqReader.Reset(_docFreqBytes, 0, len);

                    // metadata
                    len = _input.ReadVInt();
                    if (_bytes == null)
                    {
                        _bytes       = new byte[ArrayUtil.Oversize(len, 1)];
                        _bytesReader = new ByteArrayDataInput();
                    }
                    else if (_bytes.Length < len)
                    {
                        _bytes = new byte[ArrayUtil.Oversize(len, 1)];
                    }

                    _input.ReadBytes(_bytes, 0, len);
                    _bytesReader.Reset(_bytes, 0, len);

                    _metaDataUpto       = 0;
                    _state.TermBlockOrd = 0;

                    _blocksSinceSeek++;
                    _indexIsCurrent = _indexIsCurrent && (_blocksSinceSeek < _blockTermsReader._indexReader.Divisor);

                    return(true);
                }
예제 #11
0
        public override void Close()
        {
            _fileMutex.WaitOne();
            try
            {
                string fileName = _name;

                // make sure it's all written out
                _indexOutput.Flush();

                long originalLength = _indexOutput.Length();
                _indexOutput.Close();

                Stream blobStream;
#if COMPRESSBLOBS
                // optionally put a compressor around the blob stream
                if (_azureDirectory.ShouldCompressFile(_name))
                {
                    // unfortunately, deflate stream doesn't allow seek, and we need a seekable stream
                    // to pass to the blob storage stuff, so we compress into a memory stream
                    MemoryStream compressedStream = new MemoryStream();

                    try
                    {
                        IndexInput indexInput = CacheDirectory.OpenInput(fileName);
                        using (DeflateStream compressor = new DeflateStream(compressedStream, CompressionMode.Compress, true))
                        {
                            // compress to compressedOutputStream
                            byte[] bytes = new byte[indexInput.Length()];
                            indexInput.ReadBytes(bytes, 0, (int)bytes.Length);
                            compressor.Write(bytes, 0, (int)bytes.Length);
                        }
                        indexInput.Close();

                        // seek back to beginning of comrpessed stream
                        compressedStream.Seek(0, SeekOrigin.Begin);

                        Debug.WriteLine(string.Format("COMPRESSED {0} -> {1} {2}% to {3}",
                                                      originalLength,
                                                      compressedStream.Length,
                                                      ((float)compressedStream.Length / (float)originalLength) * 100,
                                                      _name));
                    }
                    catch
                    {
                        // release the compressed stream resources if an error occurs
                        compressedStream.Dispose();
                        throw;
                    }

                    blobStream = compressedStream;
                }
                else
#endif
                {
                    blobStream = new StreamInput(CacheDirectory.OpenInput(fileName));
                }

                try
                {
                    // push the blobStream up to the cloud
                    _blob.UploadFromStream(blobStream);

                    // set the metadata with the original index file properties
                    _blob.Metadata["CachedLength"]       = originalLength.ToString();
                    _blob.Metadata["CachedLastModified"] = CacheDirectory.FileModified(fileName).ToString();
                    _blob.SetMetadata();

                    Debug.WriteLine(string.Format("PUT {1} bytes to {0} in cloud", _name, blobStream.Length));
                }
                finally
                {
                    blobStream.Dispose();
                }

#if FULLDEBUG
                Debug.WriteLine(string.Format("CLOSED WRITESTREAM {0}", _name));
#endif
                // clean up
                _indexOutput   = null;
                _blobContainer = null;
                _blob          = null;
                GC.SuppressFinalize(this);
            }
            finally
            {
                _fileMutex.ReleaseMutex();
            }
        }
예제 #12
0
 public override void  ReadBytes(byte[] b, int offset, int len)
 {
     main.ReadBytes(b, offset, len);
     digest.Update(b, offset, len);
 }
예제 #13
0
        /// <summary>
        /// Prints the filename and size of each file within a given compound file.
        /// Add the -extract flag to extract files to the current working directory.
        /// In order to make the extracted version of the index work, you have to copy
        /// the segments file from the compound index into the directory where the extracted files are stored. </summary>
        ///// <param name="args"> Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt; </param>
        public static void Main(string[] args)
        {
            string filename = null;
            bool   extract  = false;
            string dirImpl  = null;

            int j = 0;

            while (j < args.Length)
            {
                string arg = args[j];
                if ("-extract".Equals(arg, StringComparison.Ordinal))
                {
                    extract = true;
                }
                else if ("-dir-impl".Equals(arg, StringComparison.Ordinal))
                {
                    if (j == args.Length - 1)
                    {
                        // LUCENENET specific - our wrapper console shows the correct usage
                        throw new ArgumentException("ERROR: missing value for --directory-type option");
                        //Console.WriteLine("ERROR: missing value for -dir-impl option");
                        //Environment.Exit(1);
                    }
                    j++;
                    dirImpl = args[j];
                }
                else if (filename == null)
                {
                    filename = arg;
                }
                j++;
            }

            if (filename == null)
            {
                // LUCENENET specific - our wrapper console shows the correct usage
                throw new ArgumentException("ERROR: CFS-FILE is required");
                //Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>");
                //return;
            }

            Store.Directory       dir     = null;
            CompoundFileDirectory cfr     = null;
            IOContext             context = IOContext.READ;

            try
            {
                FileInfo file    = new FileInfo(filename);
                string   dirname = file.DirectoryName;
                filename = file.Name;
                if (dirImpl == null)
                {
                    dir = FSDirectory.Open(new DirectoryInfo(dirname));
                }
                else
                {
                    dir = CommandLineUtil.NewFSDirectory(dirImpl, new DirectoryInfo(dirname));
                }

                cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false);

                string[] files = cfr.ListAll();
                ArrayUtil.TimSort(files); // sort the array of filename so that the output is more readable

                for (int i = 0; i < files.Length; ++i)
                {
                    long len = cfr.FileLength(files[i]);

                    if (extract)
                    {
                        Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
                        using IndexInput ii = cfr.OpenInput(files[i], context);
                        using FileStream f  = new FileStream(files[i], FileMode.Open, FileAccess.ReadWrite);

                        // read and write with a small buffer, which is more effective than reading byte by byte
                        byte[] buffer = new byte[1024];
                        int    chunk  = buffer.Length;
                        while (len > 0)
                        {
                            int bufLen = (int)Math.Min(chunk, len);
                            ii.ReadBytes(buffer, 0, bufLen);
                            f.Write(buffer, 0, bufLen);
                            len -= bufLen;
                        }
                    }
                    else
                    {
                        Console.WriteLine(files[i] + ": " + len + " bytes");
                    }
                }
            }
            catch (IOException ioe)
            {
                Console.WriteLine(ioe.ToString());
                //Console.Write(ioe.StackTrace);
            }
            finally
            {
                try
                {
                    if (dir != null)
                    {
                        dir.Dispose();
                    }
                    if (cfr != null)
                    {
                        cfr.Dispose();
                    }
                }
                catch (IOException ioe)
                {
                    Console.WriteLine(ioe.ToString());
                    //Console.Write(ioe.StackTrace);
                }
            }
        }
예제 #14
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (format >= FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            byte[] byteBuffer;
            char[] charBuffer;
            bool   preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;

            // init the buffers
            if (preUTF8)
            {
                charBuffer = new char[10];
                byteBuffer = null;
            }
            else
            {
                charBuffer = null;
                byteBuffer = new byte[20];
            }

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;

                System.String term;

                if (preUTF8)
                {
                    // Term stored as java chars
                    if (charBuffer.Length < totalLength)
                    {
                        char[] newCharBuffer = new char[(int)(1.5 * totalLength)];
                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
                        charBuffer = newCharBuffer;
                    }
                    tvf.ReadChars(charBuffer, start, deltaLength);
                    term = new System.String(charBuffer, 0, totalLength);
                }
                else
                {
                    // Term stored as utf8 bytes
                    if (byteBuffer.Length < totalLength)
                    {
                        byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)];
                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
                        byteBuffer = newByteBuffer;
                    }
                    tvf.ReadBytes(byteBuffer, start, deltaLength);
                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
                }
                int   freq      = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset   = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
예제 #15
0
 /// <summary>
 /// Read as a bit set. </summary>
 private void ReadBits(IndexInput input)
 {
     count = input.ReadInt32();           // read count
     bits  = new byte[GetNumBytes(size)]; // allocate bits
     input.ReadBytes(bits, 0, bits.Length);
 }
예제 #16
0
 /// <summary>Read as a bit set </summary>
 private void  ReadBits(IndexInput input)
 {
     count = input.ReadInt();             // read count
     bits  = new byte[(size >> 3) + 1];   // allocate bits
     input.ReadBytes(bits, 0, bits.Length);
 }
예제 #17
0
 /// <summary>
 /// Read as a bit set </summary>
 private void ReadBits(IndexInput input)
 {
     Count_Renamed = input.ReadInt();                     // read count
     Bits          = new byte[GetNumBytes(Size_Renamed)]; // allocate bits
     input.ReadBytes(Bits, 0, Bits.Length);
 }