Exemple #1
0
        /// <summary>
        /// Adds a new <<fieldNumber, termBytes>, TermInfo> pair to the set.
        ///  Term must be lexicographically greater than all previous Terms added.
        ///  TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        public void Add(int fieldNumber, BytesRef term, TermInfo ti)
        {
            Debug.Assert(CompareToLastTerm(fieldNumber, term) < 0 || (IsIndex && term.Length == 0 && LastTerm.Length == 0), "Terms are out of order: field=" + FieldName(FieldInfos, fieldNumber) + " (number " + fieldNumber + ")" + " lastField=" + FieldName(FieldInfos, LastFieldNumber) + " (number " + LastFieldNumber + ")" + " text=" + term.Utf8ToString() + " lastText=" + LastTerm.Utf8ToString());

            Debug.Assert(ti.FreqPointer >= LastTi.FreqPointer, "freqPointer out of order (" + ti.FreqPointer + " < " + LastTi.FreqPointer + ")");
            Debug.Assert(ti.ProxPointer >= LastTi.ProxPointer, "proxPointer out of order (" + ti.ProxPointer + " < " + LastTi.ProxPointer + ")");

            if (!IsIndex && Size % IndexInterval == 0)
            {
                Other.Add(LastFieldNumber, LastTerm, LastTi);       // add an index term
            }
            WriteTerm(fieldNumber, term);                           // write term

            Output.WriteVInt(ti.DocFreq);                           // write doc freq
            Output.WriteVLong(ti.FreqPointer - LastTi.FreqPointer); // write pointers
            Output.WriteVLong(ti.ProxPointer - LastTi.ProxPointer);

            if (ti.DocFreq >= SkipInterval)
            {
                Output.WriteVInt(ti.SkipOffset);
            }

            if (IsIndex)
            {
                Output.WriteVLong(Other.Output.FilePointer - LastIndexPointer);
                LastIndexPointer = Other.Output.FilePointer; // write pointer
            }

            LastFieldNumber = fieldNumber;
            LastTi.Set(ti);
            Size++;
        }
 public override void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
 {
     if (termCount > 0)
     {
         @out.WriteVInt(termCount);
         @out.WriteVInt(field.Number);
         if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY)
         {
             @out.WriteVLong(sumTotalTermFreq);
         }
         @out.WriteVLong(sumDocFreq);
         @out.WriteVInt(docCount);
         FST <BytesRef> fst = builder.Finish();
         fst.Save(@out);
         //System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
     }
 }
        private void Persist()
        {
            lock (this)
            {
                string      fileName = SNAPSHOTS_PREFIX + NextWriteGen;
                IndexOutput @out     = Dir.CreateOutput(fileName, IOContext.DEFAULT);
                bool        success  = false;
                try
                {
                    CodecUtil.WriteHeader(@out, CODEC_NAME, VERSION_CURRENT);
                    @out.WriteVInt(RefCounts.Count);
                    foreach (KeyValuePair <long, int> ent in RefCounts)
                    {
                        @out.WriteVLong(ent.Key);
                        @out.WriteVInt(ent.Value);
                    }
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        IOUtils.CloseWhileHandlingException(@out);
                        try
                        {
                            Dir.DeleteFile(fileName);
                        }
                        catch (Exception e)
                        {
                            // Suppress so we keep throwing original exception
                        }
                    }
                    else
                    {
                        IOUtils.Close(@out);
                    }
                }

                Dir.Sync(/*Collections.singletonList(*/ new[] { fileName } /*)*/);

                if (NextWriteGen > 0)
                {
                    string lastSaveFile = SNAPSHOTS_PREFIX + (NextWriteGen - 1);
                    try
                    {
                        Dir.DeleteFile(lastSaveFile);
                    }
                    catch (IOException ioe)
                    {
                        // OK: likely it didn't exist
                    }
                }

                NextWriteGen++;
            }
        }
        /// <summary>Adds a new &lt;fieldNumber, termBytes&gt;, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        internal void  Add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti)
        {
            System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 ||
                                            (isIndex && termBytesLength == 0 && lastTermBytesLength == 0),
                                            "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + " (number " + fieldNumber + ")" +
                                            " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" +
                                            " text=" + System.Text.Encoding.UTF8.GetString(termBytes, 0, termBytesLength) + " lastText=" + System.Text.Encoding.UTF8.GetString(lastTermBytes, 0, lastTermBytesLength));

            System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
            System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");

            if (!isIndex && size % indexInterval == 0)
            {
                other.Add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term
            }
            WriteTerm(fieldNumber, termBytes, termBytesLength);                         // write term

            output.WriteVInt(ti.docFreq);                                               // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer);                     // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.FilePointer - lastIndexPointer);
                lastIndexPointer = other.output.FilePointer;                 // write pointer
            }

            lastFieldNumber = fieldNumber;
            lastTi.Set(ti);
            size++;
        }
Exemple #5
0
        // NOTE: 4.0 file format docs are crazy/wrong here...
        private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);

            /* values */

            long startPos = data.FilePointer;

            foreach (BytesRef v in values)
            {
                if (v != null)
                {
                    data.WriteBytes(v.Bytes, v.Offset, v.Length);
                }
            }

            /* addresses */

            long maxAddress = data.FilePointer - startPos;

            index.WriteVLong(maxAddress);

            int maxDoc = State.SegmentInfo.DocCount;

            Debug.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl

            PackedInts.Writer w  = PackedInts.GetWriter(index, maxDoc + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT);
            long currentPosition = 0;

            foreach (BytesRef v in values)
            {
                w.Add(currentPosition);
                if (v != null)
                {
                    currentPosition += v.Length;
                }
            }
            // write sentinel
            Debug.Assert(currentPosition == maxAddress);
            w.Add(currentPosition);
            w.Finish();
        }
        public override void Dispose()
        {
            if (Output == null)
            {
                return;
            }

            try
            {
                long dirStart   = Output.FilePointer;
                int  fieldCount = _fields.Count;

                int nonNullFieldCount = 0;
                for (int i = 0; i < fieldCount; i++)
                {
                    FstFieldWriter field = _fields[i];
                    if (field.Fst != null)
                    {
                        nonNullFieldCount++;
                    }
                }

                Output.WriteVInt(nonNullFieldCount);
                for (int i = 0; i < fieldCount; i++)
                {
                    FstFieldWriter field = _fields[i];
                    if (field.Fst != null)
                    {
                        Output.WriteVInt(field.FieldInfo.Number);
                        Output.WriteVLong(field.IndexStart);
                    }
                }
                WriteTrailer(dirStart);
                CodecUtil.WriteFooter(Output);
            }
            finally
            {
                Output.Dispose();
                Output = null;
            }
        }
Exemple #7
0
        internal void  FinishDocument(PerDoc perDoc)
        {
            lock (this)
            {
                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start"));

                InitTermVectorsWriter();

                Fill(perDoc.docID);

                // Append term vectors to the real outputs:
                tvx.WriteLong(tvd.FilePointer);
                tvx.WriteLong(tvf.FilePointer);
                tvd.WriteVInt(perDoc.numVectorFields);
                if (perDoc.numVectorFields > 0)
                {
                    for (int i = 0; i < perDoc.numVectorFields; i++)
                    {
                        tvd.WriteVInt(perDoc.fieldNumbers[i]);
                    }
                    System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]);
                    long lastPos = perDoc.fieldPointers[0];
                    for (int i = 1; i < perDoc.numVectorFields; i++)
                    {
                        long pos = perDoc.fieldPointers[i];
                        tvd.WriteVLong(pos - lastPos);
                        lastPos = pos;
                    }
                    perDoc.perDocTvf.WriteTo(tvf);
                    perDoc.numVectorFields = 0;
                }

                System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.DocStoreOffset);

                lastDocID++;
                perDoc.Reset();
                Free(perDoc);
                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end"));
            }
        }
        /// <summary> Writes the buffered skip lists to the given output.
        ///
        /// </summary>
        /// <param name="output">the IndexOutput the skip lists shall be written to
        /// </param>
        /// <returns> the pointer the skip list starts
        /// </returns>
        internal virtual long WriteSkip(IndexOutput output)
        {
            long skipPointer = output.FilePointer;

            if (skipBuffer == null || skipBuffer.Length == 0)
            {
                return(skipPointer);
            }

            for (int level = numberOfSkipLevels - 1; level > 0; level--)
            {
                long length = skipBuffer[level].FilePointer;
                if (length > 0)
                {
                    output.WriteVLong(length);
                    skipBuffer[level].WriteTo(output);
                }
            }
            skipBuffer[0].WriteTo(output);

            return(skipPointer);
        }
Exemple #9
0
        /// <summary>
        /// Writes the buffered skip lists to the given output.
        /// </summary>
        /// <param name="output"> the IndexOutput the skip lists shall be written to </param>
        /// <returns> the pointer the skip list starts </returns>
        public virtual long WriteSkip(IndexOutput output)
        {
            long skipPointer = output.FilePointer;

            //System.out.println("skipper.writeSkip fp=" + skipPointer);
            if (SkipBuffer == null || SkipBuffer.Length == 0)
            {
                return(skipPointer);
            }

            for (int level = NumberOfSkipLevels - 1; level > 0; level--)
            {
                long length = SkipBuffer[level].FilePointer;
                if (length > 0)
                {
                    output.WriteVLong(length);
                    SkipBuffer[level].WriteTo(output);
                }
            }
            SkipBuffer[0].WriteTo(output);

            return(skipPointer);
        }
Exemple #10
0
        private void WriteBlock()
        {
            Debug.Assert(BlockChunks > 0);
            FieldsIndexOut.WriteVInt(BlockChunks);

            // The trick here is that we only store the difference from the average start
            // pointer or doc base, this helps save bits per value.
            // And in order to prevent a few chunks that would be far from the average to
            // raise the number of bits per value for all of them, we only encode blocks
            // of 1024 chunks at once
            // See LUCENE-4512

            // doc bases
            int avgChunkDocs;

            if (BlockChunks == 1)
            {
                avgChunkDocs = 0;
            }
            else
            {
                avgChunkDocs = (int)Math.Round((float)(BlockDocs - DocBaseDeltas[BlockChunks - 1]) / (BlockChunks - 1));
            }
            FieldsIndexOut.WriteVInt(TotalDocs - BlockDocs); // docBase
            FieldsIndexOut.WriteVInt(avgChunkDocs);
            int  docBase  = 0;
            long maxDelta = 0;

            for (int i = 0; i < BlockChunks; ++i)
            {
                int delta = docBase - avgChunkDocs * i;
                maxDelta |= MoveSignToLowOrderBit(delta);
                docBase  += DocBaseDeltas[i];
            }

            int bitsPerDocBase = PackedInts.BitsRequired(maxDelta);

            FieldsIndexOut.WriteVInt(bitsPerDocBase);
            PackedInts.Writer writer = PackedInts.GetWriterNoHeader(FieldsIndexOut, PackedInts.Format.PACKED, BlockChunks, bitsPerDocBase, 1);
            docBase = 0;
            for (int i = 0; i < BlockChunks; ++i)
            {
                long delta = docBase - avgChunkDocs * i;
                Debug.Assert(PackedInts.BitsRequired(MoveSignToLowOrderBit(delta)) <= writer.BitsPerValue());
                writer.Add(MoveSignToLowOrderBit(delta));
                docBase += DocBaseDeltas[i];
            }
            writer.Finish();

            // start pointers
            FieldsIndexOut.WriteVLong(FirstStartPointer);
            long avgChunkSize;

            if (BlockChunks == 1)
            {
                avgChunkSize = 0;
            }
            else
            {
                avgChunkSize = (MaxStartPointer - FirstStartPointer) / (BlockChunks - 1);
            }
            FieldsIndexOut.WriteVLong(avgChunkSize);
            long startPointer = 0;

            maxDelta = 0;
            for (int i = 0; i < BlockChunks; ++i)
            {
                startPointer += StartPointerDeltas[i];
                long delta = startPointer - avgChunkSize * i;
                maxDelta |= MoveSignToLowOrderBit(delta);
            }

            int bitsPerStartPointer = PackedInts.BitsRequired(maxDelta);

            FieldsIndexOut.WriteVInt(bitsPerStartPointer);
            writer       = PackedInts.GetWriterNoHeader(FieldsIndexOut, PackedInts.Format.PACKED, BlockChunks, bitsPerStartPointer, 1);
            startPointer = 0;
            for (int i = 0; i < BlockChunks; ++i)
            {
                startPointer += StartPointerDeltas[i];
                long delta = startPointer - avgChunkSize * i;
                Debug.Assert(PackedInts.BitsRequired(MoveSignToLowOrderBit(delta)) <= writer.BitsPerValue());
                writer.Add(MoveSignToLowOrderBit(delta));
            }
            writer.Finish();
        }
Exemple #11
0
        /// <summary> Add a complete document specified by all its term vectors. If document has no
        /// term vectors, add value for tvx.
        ///
        /// </summary>
        /// <param name="vectors">
        /// </param>
        /// <throws>  IOException </throws>
        public void  AddAllDocVectors(ITermFreqVector[] vectors)
        {
            tvx.WriteLong(tvd.FilePointer);
            tvx.WriteLong(tvf.FilePointer);

            if (vectors != null)
            {
                int numFields = vectors.Length;
                tvd.WriteVInt(numFields);

                var fieldPointers = new long[numFields];

                for (int i = 0; i < numFields; i++)
                {
                    fieldPointers[i] = tvf.FilePointer;

                    int fieldNumber = fieldInfos.FieldNumber(vectors[i].Field);

                    // 1st pass: write field numbers to tvd
                    tvd.WriteVInt(fieldNumber);

                    int numTerms = vectors[i].Size;
                    tvf.WriteVInt(numTerms);

                    TermPositionVector tpVector;

                    byte bits;
                    bool storePositions;
                    bool storeOffsets;

                    if (vectors[i] is TermPositionVector)
                    {
                        // May have positions & offsets
                        tpVector       = (TermPositionVector)vectors[i];
                        storePositions = tpVector.Size > 0 && tpVector.GetTermPositions(0) != null;
                        storeOffsets   = tpVector.Size > 0 && tpVector.GetOffsets(0) != null;
                        bits           = (byte)((storePositions?TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR: (byte)0) + (storeOffsets?TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR: (byte)0));
                    }
                    else
                    {
                        tpVector       = null;
                        bits           = 0;
                        storePositions = false;
                        storeOffsets   = false;
                    }

                    tvf.WriteVInt(bits);

                    System.String[] terms = vectors[i].GetTerms();
                    int[]           freqs = vectors[i].GetTermFrequencies();

                    int utf8Upto = 0;
                    utf8Results[1].length = 0;

                    for (int j = 0; j < numTerms; j++)
                    {
                        UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]);

                        int start  = StringHelper.BytesDifference(utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length);
                        int length = utf8Results[utf8Upto].length - start;
                        tvf.WriteVInt(start);                                        // write shared prefix length
                        tvf.WriteVInt(length);                                       // write delta length
                        tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes
                        utf8Upto = 1 - utf8Upto;

                        int termFreq = freqs[j];

                        tvf.WriteVInt(termFreq);

                        if (storePositions)
                        {
                            int[] positions = tpVector.GetTermPositions(j);
                            if (positions == null)
                            {
                                throw new System.SystemException("Trying to write positions that are null!");
                            }
                            System.Diagnostics.Debug.Assert(positions.Length == termFreq);

                            // use delta encoding for positions
                            int lastPosition = 0;
                            foreach (int position in positions)
                            {
                                tvf.WriteVInt(position - lastPosition);
                                lastPosition = position;
                            }
                        }

                        if (storeOffsets)
                        {
                            TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j);
                            if (offsets == null)
                            {
                                throw new System.SystemException("Trying to write offsets that are null!");
                            }
                            System.Diagnostics.Debug.Assert(offsets.Length == termFreq);

                            // use delta encoding for offsets
                            int lastEndOffset = 0;
                            foreach (TermVectorOffsetInfo t in offsets)
                            {
                                int startOffset = t.StartOffset;
                                int endOffset   = t.EndOffset;
                                tvf.WriteVInt(startOffset - lastEndOffset);
                                tvf.WriteVInt(endOffset - startOffset);
                                lastEndOffset = endOffset;
                            }
                        }
                    }
                }

                // 2nd pass: write field pointers to tvd
                if (numFields > 1)
                {
                    long lastFieldPointer = fieldPointers[0];
                    for (int i = 1; i < numFields; i++)
                    {
                        long fieldPointer = fieldPointers[i];
                        tvd.WriteVLong(fieldPointer - lastFieldPointer);
                        lastFieldPointer = fieldPointer;
                    }
                }
            }
            else
            {
                tvd.WriteVInt(0);
            }
        }