Exemple #1
0
        /// <summary>Increments the enumeration to the next element.  True if one exists.</summary>
        public override bool Next()
        {
            if (position++ >= size - 1)
            {
                term = null;
                return(false);
            }

            prev = term;
            term = ReadTerm();

            termInfo.docFreq      = input.ReadVInt();        // read doc freq
            termInfo.freqPointer += input.ReadVLong();       // read freq pointer
            termInfo.proxPointer += input.ReadVLong();       // read prox pointer

            if (format == -1)
            {
                //  just read skipOffset in order to increment  file pointer;
                // value is never used since skipTo is switched off
                if (!isIndex)
                {
                    if (termInfo.docFreq > formatM1SkipInterval)
                    {
                        termInfo.skipOffset = input.ReadVInt();
                    }
                }
            }
            else
            {
                if (termInfo.docFreq >= skipInterval)
                {
                    termInfo.skipOffset = input.ReadVInt();
                }
            }

            if (isIndex)
            {
                indexPointer += input.ReadVLong();                 // read index pointer
            }
            return(true);
        }
Exemple #2
0
        /// <summary> Retrieve the term vector for the given document and Field</summary>
        /// <param name="docNum">The document number to retrieve the vector for
        /// </param>
        /// <param name="Field">The Field within the document to retrieve
        /// </param>
        /// <returns> The TermFreqVector for the document and Field or null
        /// </returns>
        public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field)
        {
            lock (this)
            {
                // Check if no term vectors are available for this segment at all
                int            fieldNumber = fieldInfos.FieldNumber(field);
                TermFreqVector result      = null;
                if (tvx != null)
                {
                    try
                    {
                        //We need to account for the FORMAT_SIZE at when seeking in the tvx
                        //We don't need to do this in other seeks because we already have the file pointer
                        //that was written in another file
                        tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
                        //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                        long position = tvx.ReadLong();

                        tvd.Seek(position);
                        int fieldCount = tvd.ReadVInt();
                        //System.out.println("Num Fields: " + fieldCount);
                        // There are only a few fields per document. We opt for a full scan
                        // rather then requiring that they be ordered. We need to read through
                        // all of the fields anyway to get to the tvf pointers.
                        int number = 0;
                        int found  = -1;
                        for (int i = 0; i < fieldCount; i++)
                        {
                            number += tvd.ReadVInt();
                            if (number == fieldNumber)
                            {
                                found = i;
                            }
                        }

                        // This Field, although valid in the segment, was not found in this document
                        if (found != -1)
                        {
                            // Compute position in the tvf file
                            position = 0;
                            for (int i = 0; i <= found; i++)
                            {
                                position += tvd.ReadVLong();
                            }
                            result = ReadTermVector(field, position);
                        }
                        else
                        {
                            //System.out.println("Field not found");
                        }
                    }
                    catch (System.Exception e)
                    {
                        //System.Console.Out.WriteLine(e.StackTrace);
                    }
                }
                else
                {
                    System.Console.Out.WriteLine("No tvx file");
                }
                return(result);
            }
        }