private void Flush()
        {
            indexWriter.WriteIndex(numBufferedDocs, fieldsStream.GetFilePointer());

            // transform end offsets into lengths
            int[] lengths = endOffsets;
            for (int i = numBufferedDocs - 1; i > 0; --i)
            {
                lengths[i] = endOffsets[i] - endOffsets[i - 1];
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(lengths[i] >= 0);
                }
            }
            WriteHeader(docBase, numBufferedDocs, numStoredFields, lengths);

            // compress stored fields to fieldsStream
            if (bufferedDocs.Length >= 2 * chunkSize)
            {
                // big chunk, slice it
                for (int compressed = 0; compressed < bufferedDocs.Length; compressed += chunkSize)
                {
                    compressor.Compress(bufferedDocs.Bytes, compressed, Math.Min(chunkSize, bufferedDocs.Length - compressed), fieldsStream);
                }
            }
            else
            {
                compressor.Compress(bufferedDocs.Bytes, 0, bufferedDocs.Length, fieldsStream);
            }

            // reset
            docBase            += numBufferedDocs;
            numBufferedDocs     = 0;
            bufferedDocs.Length = 0;
        }
Example #2
0
 /// <summary>
 /// Sets the values for the current skip data.
 /// </summary>
 // Called @ every index interval (every 128th (by default)
 // doc)
 internal void SetSkipData(int doc, bool storePayloads, int payloadLength)
 {
     this.curDoc           = doc;
     this.curStorePayloads = storePayloads;
     this.curPayloadLength = payloadLength;
     if (payloadOutput != null)
     {
         this.curPayloadPointer = payloadOutput.GetFilePointer();
     }
 }
Example #3
0
 /// <summary>
 /// Sets the values for the current skip data.
 /// </summary>
 public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength)
 {
     this.curDoc           = doc;
     this.curStorePayloads = storePayloads;
     this.curPayloadLength = payloadLength;
     this.curFreqPointer   = freqOutput.GetFilePointer();
     if (proxOutput != null)
     {
         this.curProxPointer = proxOutput.GetFilePointer();
     }
 }
Example #4
0
        private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.ToString());

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            /* values */

            long startPos = data.GetFilePointer();

            int valueCount = 0;

            foreach (BytesRef v in values)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
                valueCount++;
            }

            /* addresses */

            long maxAddress = data.GetFilePointer() - startPos;

            index.WriteInt64(maxAddress);

            Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl

            PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT);
            long currentPosition  = 0;

            foreach (BytesRef v in values)
            {
                w.Add(currentPosition);
                currentPosition += v.Length;
            }
            // write sentinel
            Debug.Assert(currentPosition == maxAddress);
            w.Add(currentPosition);
            w.Finish();

            /* ordinals */

            int maxDoc = state.SegmentInfo.DocCount;

            Debug.Assert(valueCount > 0);
            PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT);
            foreach (long n in docToOrd)
            {
                ords.Add((long)n);
            }
            ords.Finish();
        }
Example #5
0
        private void Flush()
        {
            int chunkDocs = pendingDocs.Count;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(chunkDocs > 0, "{0}", chunkDocs);
            }

            // write the index file
            indexWriter.WriteIndex(chunkDocs, vectorsStream.GetFilePointer());

            int docBase = numDocs - chunkDocs;

            vectorsStream.WriteVInt32(docBase);
            vectorsStream.WriteVInt32(chunkDocs);

            // total number of fields of the chunk
            int totalFields = FlushNumFields(chunkDocs);

            if (totalFields > 0)
            {
                // unique field numbers (sorted)
                int[] fieldNums = FlushFieldNums();
                // offsets in the array of unique field numbers
                FlushFields(totalFields, fieldNums);
                // flags (does the field have positions, offsets, payloads?)
                FlushFlags(totalFields, fieldNums);
                // number of terms of each field
                FlushNumTerms(totalFields);
                // prefix and suffix lengths for each field
                FlushTermLengths();
                // term freqs - 1 (because termFreq is always >=1) for each term
                FlushTermFreqs();
                // positions for all terms, when enabled
                FlushPositions();
                // offsets for all terms, when enabled
                FlushOffsets(fieldNums);
                // payload lengths for all terms, when enabled
                FlushPayloadLengths();

                // compress terms and payloads and write them to the output
                compressor.Compress(termSuffixes.Bytes, 0, termSuffixes.Length, vectorsStream);
            }

            // reset
            pendingDocs.Clear();
            curDoc              = null;
            curField            = null;
            termSuffixes.Length = 0;
        }
Example #6
0
        // NOTE: 4.0 file format docs are crazy/wrong here...
        private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values)
        {
            field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.ToString());

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);

            /* values */

            long startPos = data.GetFilePointer();

            foreach (BytesRef v in values)
            {
                if (v != null)
                {
                    data.WriteBytes(v.Bytes, v.Offset, v.Length);
                }
            }

            /* addresses */

            long maxAddress = data.GetFilePointer() - startPos;

            index.WriteVInt64(maxAddress);

            int maxDoc = state.SegmentInfo.DocCount;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(maxDoc != int.MaxValue);                           // unsupported by the 4.0 impl
            }
            PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT);
            long currentPosition  = 0;

            foreach (BytesRef v in values)
            {
                w.Add(currentPosition);
                if (v != null)
                {
                    currentPosition += v.Length;
                }
            }
            // write sentinel
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(currentPosition == maxAddress);
            }
            w.Add(currentPosition);
            w.Finish();
        }
Example #7
0
 /// <summary>
 /// Sets the values for the current skip data.
 /// </summary>
 public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength, bool storeOffsets, int offsetLength)
 {
     Debug.Assert(storePayloads || payloadLength == -1);
     Debug.Assert(storeOffsets || offsetLength == -1);
     this.CurDoc           = doc;
     this.CurStorePayloads = storePayloads;
     this.CurPayloadLength = payloadLength;
     this.CurStoreOffsets  = storeOffsets;
     this.CurOffsetLength  = offsetLength;
     this.CurFreqPointer   = FreqOutput.GetFilePointer();
     if (ProxOutput != null)
     {
         this.CurProxPointer = ProxOutput.GetFilePointer();
     }
 }
Example #8
0
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40StoredFieldsWriter(Directory directory, string segment, IOContext context)
        {
            Debug.Assert(directory != null);
            this.directory = directory;
            this.segment   = segment;

            bool success = false;

            try
            {
                fieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context);
                indexStream  = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context);

                CodecUtil.WriteHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT);
                CodecUtil.WriteHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT);
                Debug.Assert(HEADER_LENGTH_DAT == fieldsStream.GetFilePointer());
                Debug.Assert(HEADER_LENGTH_IDX == indexStream.GetFilePointer());
                success = true;
            }
            finally
            {
                if (!success)
                {
                    Abort();
                }
            }
        }
Example #9
0
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40TermVectorsWriter(Directory directory, string segment, IOContext context)
        {
            this.directory = directory;
            this.segment   = segment;
            bool success = false;

            try
            {
                // Open files for TermVector storage
                tvx = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION), context);
                CodecUtil.WriteHeader(tvx, Lucene40TermVectorsReader.CODEC_NAME_INDEX, Lucene40TermVectorsReader.VERSION_CURRENT);
                tvd = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context);
                CodecUtil.WriteHeader(tvd, Lucene40TermVectorsReader.CODEC_NAME_DOCS, Lucene40TermVectorsReader.VERSION_CURRENT);
                tvf = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION), context);
                CodecUtil.WriteHeader(tvf, Lucene40TermVectorsReader.CODEC_NAME_FIELDS, Lucene40TermVectorsReader.VERSION_CURRENT);
                Debug.Assert(Lucene40TermVectorsReader.HEADER_LENGTH_INDEX == tvx.GetFilePointer());
                Debug.Assert(Lucene40TermVectorsReader.HEADER_LENGTH_DOCS == tvd.GetFilePointer());
                Debug.Assert(Lucene40TermVectorsReader.HEADER_LENGTH_FIELDS == tvf.GetFilePointer());
                success = true;
            }
            finally
            {
                if (!success)
                {
                    Abort();
                }
            }
        }
Example #10
0
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(directory != null);
            }
            this.directory       = directory;
            this.segment         = si.Name;
            this.segmentSuffix   = segmentSuffix;
            this.compressionMode = compressionMode;
            this.compressor      = compressionMode.NewCompressor();
            this.chunkSize       = chunkSize;

            numDocs      = 0;
            pendingDocs  = new LinkedList <DocData>();
            termSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(chunkSize, 1));
            payloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(1, 1));
            lastTerm     = new BytesRef(ArrayUtil.Oversize(30, 1));

            bool        success     = false;
            IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context);

            try
            {
                vectorsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);

                string codecNameIdx = formatName + CODEC_SFX_IDX;
                string codecNameDat = formatName + CODEC_SFX_DAT;
                CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT);
                CodecUtil.WriteHeader(vectorsStream, codecNameDat, VERSION_CURRENT);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.GetFilePointer());
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer());
                }

                indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
                indexStream = null;

                vectorsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                vectorsStream.WriteVInt32(chunkSize);
                writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE);

                positionsBuf      = new int[1024];
                startOffsetsBuf   = new int[1024];
                lengthsBuf        = new int[1024];
                payloadLengthsBuf = new int[1024];

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(indexStream);
                    Abort();
                }
            }
        }
Example #11
0
 public override void ResetSkip()
 {
     base.ResetSkip();
     Arrays.Fill(lastSkipDoc, 0);
     Arrays.Fill(lastSkipDocPointer, docOut.GetFilePointer());
     if (fieldHasPositions)
     {
         Arrays.Fill(lastSkipPosPointer, posOut.GetFilePointer());
         if (fieldHasPayloads)
         {
             Arrays.Fill(lastPayloadByteUpto, 0);
         }
         if (fieldHasOffsets || fieldHasPayloads)
         {
             Arrays.Fill(lastSkipPayPointer, payOut.GetFilePointer());
         }
     }
 }
 public override void StartTerm()
 {
     docStartFP = docOut.GetFilePointer();
     if (fieldHasPositions)
     {
         posStartFP = posOut.GetFilePointer();
         if (fieldHasPayloads || fieldHasOffsets)
         {
             payStartFP = payOut.GetFilePointer();
         }
     }
     lastDocID      = 0;
     lastBlockDocID = -1;
     // if (DEBUG) {
     //   System.out.println("FPW.startTerm startFP=" + docStartFP);
     // }
     skipWriter.ResetSkip();
 }
Example #13
0
        private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values)
        {
            field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.ToString());

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

            // deduplicate
            SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>();

            foreach (BytesRef v in values)
            {
                dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v));
            }

            /* values */
            long startPosition  = data.GetFilePointer();
            long currentAddress = 0;
            Dictionary <BytesRef, long> valueToAddress = new Dictionary <BytesRef, long>();

            foreach (BytesRef v in dictionary)
            {
                currentAddress    = data.GetFilePointer() - startPosition;
                valueToAddress[v] = currentAddress;
                WriteVShort(data, v.Length);
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
            }

            /* ordinals */
            long totalBytes = data.GetFilePointer() - startPosition;

            index.WriteInt64(totalBytes);
            int maxDoc = state.SegmentInfo.DocCount;

            PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(currentAddress), PackedInt32s.DEFAULT);

            foreach (BytesRef v in values)
            {
                w.Add(valueToAddress[v == null ? new BytesRef() : v]);
            }
            w.Finish();
        }
Example #14
0
        protected override void Dispose(bool disposing)
        {
            if (disposing)
            {
                if (m_output != null)
                {
                    bool success = false;
                    try
                    {
                        long dirStart   = m_output.GetFilePointer();
                        int  fieldCount = fields.Count;

                        int nonNullFieldCount = 0;
                        for (int i = 0; i < fieldCount; i++)
                        {
                            SimpleFieldWriter field = fields[i];
                            if (field.numIndexTerms > 0)
                            {
                                nonNullFieldCount++;
                            }
                        }

                        m_output.WriteVInt32(nonNullFieldCount);
                        for (int i = 0; i < fieldCount; i++)
                        {
                            SimpleFieldWriter field = fields[i];
                            if (field.numIndexTerms > 0)
                            {
                                m_output.WriteVInt32(field.fieldInfo.Number);
                                m_output.WriteVInt32(field.numIndexTerms);
                                m_output.WriteVInt64(field.termsStart);
                                m_output.WriteVInt64(field.indexStart);
                                m_output.WriteVInt64(field.packedIndexStart);
                                m_output.WriteVInt64(field.packedOffsetsStart);
                            }
                        }
                        WriteTrailer(dirStart);
                        CodecUtil.WriteFooter(m_output);
                        success = true;
                    }
                    finally
                    {
                        if (success)
                        {
                            IOUtils.Dispose(m_output);
                        }
                        else
                        {
                            IOUtils.DisposeWhileHandlingException(m_output);
                        }
                        m_output = null;
                    }
                }
            }
        }
 public override void Finish(FieldInfos fis, int numDocs)
 {
     if (4 + ((long)numDocs) * 16 != tvx.GetFilePointer())
     // this is most likely a bug in Sun JRE 1.6.0_04/_05;
     // we detect that the bug has struck, here, and
     // throw an exception to prevent the corruption from
     // entering the index.  See LUCENE-1282 for
     // details.
     {
         throw new Exception("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + tvx.GetFilePointer() + " file=" + tvx.ToString() + "; now aborting this merge to prevent index corruption");
     }
 }
Example #16
0
 public override void Finish(FieldInfos fis, int numDocs)
 {
     if (HEADER_LENGTH_IDX + ((long)numDocs) * 8 != indexStream.GetFilePointer())
     // this is most likely a bug in Sun JRE 1.6.0_04/_05;
     // we detect that the bug has struck, here, and
     // throw an exception to prevent the corruption from
     // entering the index.  See LUCENE-1282 for
     // details.
     {
         throw new Exception("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.GetFilePointer() + " file=" + indexStream.ToString() + "; now aborting this merge to prevent index corruption");
     }
 }
Example #17
0
        public virtual void Test()
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("test2BPagedBytes"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }
            PagedBytes  pb         = new PagedBytes(15);
            IndexOutput dataOutput = dir.CreateOutput("foo", IOContext.DEFAULT);
            long        netBytes   = 0;
            long        seed       = Random.NextInt64();
            long        lastFP     = 0;
            Random      r2         = new Random((int)seed);

            while (netBytes < 1.1 * int.MaxValue)
            {
                int    numBytes = TestUtil.NextInt32(r2, 1, 32768);
                byte[] bytes    = new byte[numBytes];
                r2.NextBytes(bytes);
                dataOutput.WriteBytes(bytes, bytes.Length);
                long fp = dataOutput.GetFilePointer();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fp == lastFP + numBytes);
                }
                lastFP    = fp;
                netBytes += numBytes;
            }
            dataOutput.Dispose();
            IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT);

            pb.Copy(input, input.Length);
            input.Dispose();
            PagedBytes.Reader reader = pb.Freeze(true);

            r2       = new Random((int)seed);
            netBytes = 0;
            while (netBytes < 1.1 * int.MaxValue)
            {
                int numBytes = TestUtil.NextInt32(r2, 1, 32768);
                var bytes    = new byte[numBytes];
                r2.NextBytes(bytes);
                BytesRef expected = new BytesRef(bytes);

                BytesRef actual = new BytesRef();
                reader.FillSlice(actual, netBytes, numBytes);
                Assert.AreEqual(expected, actual);

                netBytes += numBytes;
            }
            dir.Dispose();
        }
        private int numBufferedDocs;   // docBase + numBufferedDocs == current doc ID

        /// <summary>
        /// Sole constructor. </summary>
        public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(directory != null);
            }
            this.directory       = directory;
            this.segment         = si.Name;
            this.segmentSuffix   = segmentSuffix;
            this.compressionMode = compressionMode;
            this.compressor      = compressionMode.NewCompressor();
            this.chunkSize       = chunkSize;
            this.docBase         = 0;
            this.bufferedDocs    = new GrowableByteArrayDataOutput(chunkSize);
            this.numStoredFields = new int[16];
            this.endOffsets      = new int[16];
            this.numBufferedDocs = 0;

            bool        success     = false;
            IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION), context);

            try
            {
                fieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);

                string codecNameIdx = formatName + CODEC_SFX_IDX;
                string codecNameDat = formatName + CODEC_SFX_DAT;
                CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT);
                CodecUtil.WriteHeader(fieldsStream, codecNameDat, VERSION_CURRENT);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer());
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer());
                }

                indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
                indexStream = null;

                fieldsStream.WriteVInt32(chunkSize);
                fieldsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT);

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(indexStream);
                    Abort();
                }
            }
        }
Example #19
0
 public override void StartTerm()
 {
     freqStart = freqOut.GetFilePointer();
     //if (DEBUG) System.out.println("SPW: startTerm freqOut.fp=" + freqStart);
     if (proxOut != null)
     {
         proxStart = proxOut.GetFilePointer();
     }
     // force first payload to write its length
     lastPayloadLength = -1;
     // force first offset to write its length
     lastOffsetLength = -1;
     skipListWriter.ResetSkip();
 }
Example #20
0
        /// <summary>Called when we are done adding docs to this term. </summary>
        public override void FinishTerm(BlockTermState state)
        {
            SepTermState state_ = (SepTermState)state;

            // TODO: -- wasteful we are counting this in two places?
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(state_.DocFreq > 0);
                Debugging.Assert(state_.DocFreq == df);
            }

            state_.DocIndex = docOut.GetIndex();
            state_.DocIndex.CopyFrom(docIndex, false);
            if (indexOptions != IndexOptions.DOCS_ONLY)
            {
                state_.FreqIndex = freqOut.GetIndex();
                state_.FreqIndex.CopyFrom(freqIndex, false);
                if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    state_.PosIndex = posOut.GetIndex();
                    state_.PosIndex.CopyFrom(posIndex, false);
                }
                else
                {
                    state_.PosIndex = null;
                }
            }
            else
            {
                state_.FreqIndex = null;
                state_.PosIndex  = null;
            }

            if (df >= skipMinimum)
            {
                state_.SkipFP = skipOut.GetFilePointer();
                //System.out.println("  skipFP=" + skipFP);
                skipListWriter.WriteSkip(skipOut);
                //System.out.println("    numBytes=" + (skipOut.getFilePointer()-skipFP));
            }
            else
            {
                state_.SkipFP = -1;
            }
            state_.PayloadFP = payloadStart;

            lastDocID = 0;
            df        = 0;
        }
Example #21
0
        public virtual void TestLargeWrites()
        {
            IndexOutput os = Dir.CreateOutput("testBufferStart.txt", NewIOContext(Random));

            var largeBuf = new byte[2048];

            for (int i = 0; i < largeBuf.Length; i++)
            {
                largeBuf[i] = (byte)unchecked ((sbyte)(new Random(1).NextDouble() * 256));
            }

            long currentPos = os.GetFilePointer();

            os.WriteBytes(largeBuf, largeBuf.Length);

            try
            {
                Assert.AreEqual(currentPos + largeBuf.Length, os.GetFilePointer());
            }
            finally
            {
                os.Dispose();
            }
        }
Example #22
0
        public override void StartTerm()
        {
            docIndex.Mark();

            if (indexOptions != IndexOptions.DOCS_ONLY)
            {
                freqIndex.Mark();
            }

            if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                posIndex.Mark();
                payloadStart      = payloadOut.GetFilePointer();
                lastPayloadLength = -1;
            }

            skipListWriter.ResetSkip(docIndex, freqIndex, posIndex);
        }
        protected override void Dispose(bool disposing)
        {
            if (disposing)
            {
                if (m_output != null)
                {
                    try
                    {
                        long dirStart   = m_output.GetFilePointer();
                        int  fieldCount = fields.Count;

                        int nonNullFieldCount = 0;
                        for (int i = 0; i < fieldCount; i++)
                        {
                            FSTFieldWriter field = fields[i];
                            if (field.fst != null)
                            {
                                nonNullFieldCount++;
                            }
                        }

                        m_output.WriteVInt32(nonNullFieldCount);
                        for (int i = 0; i < fieldCount; i++)
                        {
                            FSTFieldWriter field = fields[i];
                            if (field.fst != null)
                            {
                                m_output.WriteVInt32(field.fieldInfo.Number);
                                m_output.WriteVInt64(field.indexStart);
                            }
                        }
                        WriteTrailer(dirStart);
                        CodecUtil.WriteFooter(m_output);
                    }
                    finally
                    {
                        m_output.Dispose();
                        m_output = null;
                    }
                }
            }
        }
Example #24
0
        /// <summary>
        /// Writes the buffered skip lists to the given output.
        /// </summary>
        /// <param name="output"> The <see cref="IndexOutput"/> the skip lists shall be written to. </param>
        /// <returns> The pointer the skip list starts. </returns>
        public virtual long WriteSkip(IndexOutput output)
        {
            long skipPointer = output.GetFilePointer();

            //System.out.println("skipper.writeSkip fp=" + skipPointer);
            if (skipBuffer == null || skipBuffer.Length == 0)
            {
                return(skipPointer);
            }

            for (int level = m_numberOfSkipLevels - 1; level > 0; level--)
            {
                long length = skipBuffer[level].GetFilePointer();
                if (length > 0)
                {
                    output.WriteVInt64(length);
                    skipBuffer[level].WriteTo(output);
                }
            }
            skipBuffer[0].WriteTo(output);

            return(skipPointer);
        }
Example #25
0
 // Writes the contents of buffer into the fields stream
 // and adds a new entry for this document into the index
 // stream.  this assumes the buffer was already written
 // in the correct fields format.
 public override void StartDocument(int numStoredFields)
 {
     indexStream.WriteInt64(fieldsStream.GetFilePointer());
     fieldsStream.WriteVInt32(numStoredFields);
 }
Example #26
0
 public override long GetFilePointer()
 {
     return(io.GetFilePointer());
 }
Example #27
0
 public override long GetFilePointer()
 {
     return(_cacheDirIndexOutput.GetFilePointer());
 }
 public override long GetFilePointer()
 {
     return(@delegate.GetFilePointer());
 }
 public override long GetFilePointer()
 {
     return(_indexOutput.GetFilePointer());
 }
        internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage)
        {
            long count    = 0;
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            bool missing  = false;

            // TODO: more efficient?
            JCG.HashSet <long> uniqueValues = null;

            if (optimizeStorage)
            {
                uniqueValues = new JCG.HashSet <long>();

                foreach (long?nv in values)
                {
                    long v;
                    if (nv == null)
                    {
                        v       = 0;
                        missing = true;
                    }
                    else
                    {
                        v = nv.Value;
                    }

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
            }
            else
            {
                foreach (var nv in values)
                {
                    ++count;
                }
            }

            long delta = maxValue - minValue;

            int format;

            if (uniqueValues != null && (delta < 0L || PackedInt32s.BitsRequired(uniqueValues.Count - 1) < PackedInt32s.BitsRequired(delta)) && count <= int.MaxValue)
            {
                format = TABLE_COMPRESSED;
            }
            else if (gcd != 0 && gcd != 1)
            {
                format = GCD_COMPRESSED;
            }
            else
            {
                format = DELTA_COMPRESSED;
            }
            meta.WriteVInt32(field.Number);
            meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC);
            meta.WriteVInt32(format);
            if (missing)
            {
                meta.WriteInt64(data.GetFilePointer());
                WriteMissingBitset(values);
            }
            else
            {
                meta.WriteInt64(-1L);
            }
            meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
            meta.WriteInt64(data.GetFilePointer());
            meta.WriteVInt64(count);
            meta.WriteVInt32(BLOCK_SIZE);

            switch (format)
            {
            case GCD_COMPRESSED:
                meta.WriteInt64(minValue);
                meta.WriteInt64(gcd);
                BlockPackedWriter quotientWriter = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    quotientWriter.Add((nv.GetValueOrDefault() - minValue) / gcd);
                }
                quotientWriter.Finish();
                break;

            case DELTA_COMPRESSED:
                BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add(nv.GetValueOrDefault());
                }
                writer.Finish();
                break;

            case TABLE_COMPRESSED:
                // LUCENENET NOTE: diming an array and then using .CopyTo() for better efficiency than LINQ .ToArray()
                long[] decode = new long[uniqueValues.Count];
                uniqueValues.CopyTo(decode, 0);
                Dictionary <long, int> encode = new Dictionary <long, int>();
                meta.WriteVInt32(decode.Length);
                for (int i = 0; i < decode.Length; i++)
                {
                    meta.WriteInt64(decode[i]);
                    encode[decode[i]] = i;
                }
                int bitsRequired = PackedInt32s.BitsRequired(uniqueValues.Count - 1);
                PackedInt32s.Writer ordsWriter = PackedInt32s.GetWriterNoHeader(data, PackedInt32s.Format.PACKED, (int)count, bitsRequired, PackedInt32s.DEFAULT_BUFFER_SIZE);
                foreach (long?nv in values)
                {
                    ordsWriter.Add(encode[nv.GetValueOrDefault()]);
                }
                ordsWriter.Finish();
                break;

            default:
                throw new InvalidOperationException();
            }
        }