Пример #1
0
        private void  WriteTerm(Term term)
        {
            int start  = StringHelper.StringDifference(lastTerm.text, term.text);
            int length = term.text.Length - start;

            output.WriteVInt(start);                              // write shared prefix length
            output.WriteVInt(length);                             // write delta length
            output.WriteChars(term.text, start, length);          // write delta chars

            output.WriteVInt(fieldInfos.FieldNumber(term.field)); // write field num

            lastTerm = term;
        }
Пример #2
0
        private void  WriteField()
        {
            // remember where this field is written
            currentField.tvfPointer = tvf.GetFilePointer();
            //System.out.println("Field Pointer: " + currentField.tvfPointer);

            int size = terms.Count;

            tvf.WriteVInt(size);

            bool storePositions = currentField.storePositions;
            bool storeOffsets   = currentField.storeOffsets;
            byte bits           = (byte)(0x0);

            if (storePositions)
            {
                bits |= STORE_POSITIONS_WITH_TERMVECTOR;
            }
            if (storeOffsets)
            {
                bits |= STORE_OFFSET_WITH_TERMVECTOR;
            }
            tvf.WriteByte(bits);

            System.String lastTermText = "";
            for (int i = 0; i < size; i++)
            {
                TVTerm term   = (TVTerm)terms[i];
                int    start  = StringHelper.StringDifference(lastTermText, term.termText);
                int    length = term.termText.Length - start;
                tvf.WriteVInt(start);                         // write shared prefix length
                tvf.WriteVInt(length);                        // write delta length
                tvf.WriteChars(term.termText, start, length); // write delta chars
                tvf.WriteVInt(term.freq);
                lastTermText = term.termText;

                if (storePositions)
                {
                    if (term.positions == null)
                    {
                        throw new System.SystemException("Trying to write positions that are null!");
                    }

                    // use delta encoding for positions
                    int position = 0;
                    for (int j = 0; j < term.freq; j++)
                    {
                        tvf.WriteVInt(term.positions[j] - position);
                        position = term.positions[j];
                    }
                }

                if (storeOffsets)
                {
                    if (term.offsets == null)
                    {
                        throw new System.SystemException("Trying to write offsets that are null!");
                    }

                    // use delta encoding for offsets
                    int position = 0;
                    for (int j = 0; j < term.freq; j++)
                    {
                        tvf.WriteVInt(term.offsets[j].GetStartOffset() - position);
                        tvf.WriteVInt(term.offsets[j].GetEndOffset() - term.offsets[j].GetStartOffset());                         //Save the diff between the two.
                        position = term.offsets[j].GetEndOffset();
                    }
                }
            }
        }
Пример #3
0
        /// <summary> Add a complete document specified by all its term vectors. If document has no
        /// term vectors, add value for tvx.
        ///
        /// </summary>
        /// <param name="vectors">
        /// </param>
        /// <throws>  IOException </throws>
        public void  AddAllDocVectors(TermFreqVector[] vectors)
        {
            tvx.WriteLong(tvd.GetFilePointer());

            if (vectors != null)
            {
                int numFields = vectors.Length;
                tvd.WriteVInt(numFields);

                long[] fieldPointers = new long[numFields];

                for (int i = 0; i < numFields; i++)
                {
                    fieldPointers[i] = tvf.GetFilePointer();

                    int fieldNumber = fieldInfos.FieldNumber(vectors[i].GetField());

                    // 1st pass: write field numbers to tvd
                    tvd.WriteVInt(fieldNumber);

                    int numTerms = vectors[i].Size();
                    tvf.WriteVInt(numTerms);

                    TermPositionVector tpVector;

                    byte bits;
                    bool storePositions;
                    bool storeOffsets;

                    if (vectors[i] is TermPositionVector)
                    {
                        // May have positions & offsets
                        tpVector       = (TermPositionVector)vectors[i];
                        storePositions = tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null;
                        storeOffsets   = tpVector.Size() > 0 && tpVector.GetOffsets(0) != null;
                        bits           = (byte)((storePositions ? TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR : (byte)0) + (storeOffsets ? TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR : (byte)0));
                    }
                    else
                    {
                        tpVector       = null;
                        bits           = 0;
                        storePositions = false;
                        storeOffsets   = false;
                    }

                    tvf.WriteVInt(bits);

                    System.String[] terms = vectors[i].GetTerms();
                    int[]           freqs = vectors[i].GetTermFrequencies();

                    System.String lastTermText = "";
                    for (int j = 0; j < numTerms; j++)
                    {
                        System.String termText = terms[j];
                        int           start    = StringHelper.StringDifference(lastTermText, termText);
                        int           length   = termText.Length - start;
                        tvf.WriteVInt(start);                         // write shared prefix length
                        tvf.WriteVInt(length);                        // write delta length
                        tvf.WriteChars(termText, start, length);      // write delta chars
                        lastTermText = termText;

                        int termFreq = freqs[j];

                        tvf.WriteVInt(termFreq);

                        if (storePositions)
                        {
                            int[] positions = tpVector.GetTermPositions(j);
                            if (positions == null)
                            {
                                throw new System.SystemException("Trying to write positions that are null!");
                            }
                            System.Diagnostics.Debug.Assert(positions.Length == termFreq);

                            // use delta encoding for positions
                            int lastPosition = 0;
                            for (int k = 0; k < positions.Length; k++)
                            {
                                int position = positions[k];
                                tvf.WriteVInt(position - lastPosition);
                                lastPosition = position;
                            }
                        }

                        if (storeOffsets)
                        {
                            TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j);
                            if (offsets == null)
                            {
                                throw new System.SystemException("Trying to write offsets that are null!");
                            }
                            System.Diagnostics.Debug.Assert(offsets.Length == termFreq);

                            // use delta encoding for offsets
                            int lastEndOffset = 0;
                            for (int k = 0; k < offsets.Length; k++)
                            {
                                int startOffset = offsets[k].GetStartOffset();
                                int endOffset   = offsets[k].GetEndOffset();
                                tvf.WriteVInt(startOffset - lastEndOffset);
                                tvf.WriteVInt(endOffset - startOffset);
                                lastEndOffset = endOffset;
                            }
                        }
                    }
                }

                // 2nd pass: write field pointers to tvd
                long lastFieldPointer = 0;
                for (int i = 0; i < numFields; i++)
                {
                    long fieldPointer = fieldPointers[i];
                    tvd.WriteVLong(fieldPointer - lastFieldPointer);
                    lastFieldPointer = fieldPointer;
                }
            }
            else
            {
                tvd.WriteVInt(0);
            }
        }