Exemple #1
0
 internal override void  Seek(TermInfo ti)
 {
     base.Seek(ti);
     if (ti != null)
     {
         proxStream.Seek(ti.proxPointer);
     }
     proxCount = 0;
 }
        /// <summary>Read norms into a pre-allocated array. </summary>
        public override void  Norms(System.String field, byte[] bytes, int offset)
        {
            lock (this)
            {
                Norm norm = (Norm)norms[field];
                if (norm == null)
                {
                    return; // use zeros in array
                }
                if (norm.bytes != null)
                {
                    // can copy from cache
                    Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
                    return;
                }

                InputStream normStream = (InputStream)norm.in_Renamed.Clone();
                try
                {
                    // read from disk
                    normStream.Seek(0);
                    normStream.ReadBytes(bytes, offset, MaxDoc());
                }
                finally
                {
                    normStream.Close();
                }
            }
        }
Exemple #3
0
 internal void  Seek(long pointer, int p, Term t, TermInfo ti)
 {
     input.Seek(pointer);
     position = p;
     term     = t;
     prev     = null;
     termInfo.Set(ti);
     GrowBuffer(term.text.Length);             // copy term text into buffer
 }
 internal virtual void  Seek(TermInfo ti)
 {
     count = 0;
     if (ti == null)
     {
         df = 0;
     }
     else
     {
         df          = ti.docFreq;
         doc         = 0;
         skipDoc     = 0;
         skipCount   = 0;
         numSkips    = df / skipInterval;
         freqPointer = ti.freqPointer;
         proxPointer = ti.proxPointer;
         skipPointer = freqPointer + ti.skipOffset;
         freqStream.Seek(freqPointer);
         haveSkipped = false;
     }
 }
Exemple #5
0
 /// <summary>Expert: implements buffer refill.  Reads bytes from the current
 /// position in the input.
 /// </summary>
 /// <param name="b">the array to read bytes into
 /// </param>
 /// <param name="offset">the offset in the array to start storing bytes
 /// </param>
 /// <param name="length">the number of bytes to read
 /// </param>
 public override void  ReadInternal(byte[] b, int offset, int len)
 {
     lock (base_Renamed)
     {
         long start = GetFilePointer();
         if (start + len > length)
         {
             throw new System.IO.IOException("read past EOF");
         }
         base_Renamed.Seek(fileOffset + start);
         base_Renamed.ReadBytes(b, offset, len);
     }
 }
Exemple #6
0
        /// <summary> </summary>
        /// <param name="fieldNum">The Field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <returns> The TermVector located at that position
        /// </returns>
        /// <throws>  IOException </throws>
        private SegmentTermVector ReadTermVector(System.String field, long tvfPointer)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector
            if (numTerms == 0)
            {
                return(new SegmentTermVector(field, null, null));
            }

            int length = numTerms + tvf.ReadVInt();

            System.String[] terms = new System.String[numTerms];

            int[] termFreqs = new int[numTerms];

            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            char[]        buffer         = new char[] {};
            System.String previousString = "";
            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;
                if (buffer.Length < totalLength)
                {
                    buffer = new char[totalLength];
                    for (int j = 0; j < previousString.Length; j++)
                    {
                        // copy contents
                        buffer[j] = previousString[j];
                    }
                }
                tvf.ReadChars(buffer, start, deltaLength);
                terms[i]       = new System.String(buffer, 0, totalLength);
                previousString = terms[i];
                termFreqs[i]   = tvf.ReadVInt();
            }
            SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs);

            return(tv);
        }
Exemple #7
0
        public /*internal*/ Document Doc(int n)
        {
            indexStream.Seek(n * 8L);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int       fieldNumber = fieldsStream.ReadVInt();
                FieldInfo fi          = fieldInfos.FieldInfo(fieldNumber);

                byte bits = fieldsStream.ReadByte();

                doc.Add(new Field(fi.name, fieldsStream.ReadString(), true, fi.isIndexed, (bits & 1) != 0, fi.storeTermVector));                 // vector
            }

            return(doc);
        }
Exemple #8
0
        /// <summary> Retrieve the term vector for the given document and Field</summary>
        /// <param name="docNum">The document number to retrieve the vector for
        /// </param>
        /// <param name="Field">The Field within the document to retrieve
        /// </param>
        /// <returns> The TermFreqVector for the document and Field or null
        /// </returns>
        public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field)
        {
            lock (this)
            {
                // Check if no term vectors are available for this segment at all
                int            fieldNumber = fieldInfos.FieldNumber(field);
                TermFreqVector result      = null;
                if (tvx != null)
                {
                    try
                    {
                        //We need to account for the FORMAT_SIZE at when seeking in the tvx
                        //We don't need to do this in other seeks because we already have the file pointer
                        //that was written in another file
                        tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
                        //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                        long position = tvx.ReadLong();

                        tvd.Seek(position);
                        int fieldCount = tvd.ReadVInt();
                        //System.out.println("Num Fields: " + fieldCount);
                        // There are only a few fields per document. We opt for a full scan
                        // rather then requiring that they be ordered. We need to read through
                        // all of the fields anyway to get to the tvf pointers.
                        int number = 0;
                        int found  = -1;
                        for (int i = 0; i < fieldCount; i++)
                        {
                            number += tvd.ReadVInt();
                            if (number == fieldNumber)
                            {
                                found = i;
                            }
                        }

                        // This Field, although valid in the segment, was not found in this document
                        if (found != -1)
                        {
                            // Compute position in the tvf file
                            position = 0;
                            for (int i = 0; i <= found; i++)
                            {
                                position += tvd.ReadVLong();
                            }
                            result = ReadTermVector(field, position);
                        }
                        else
                        {
                            //System.out.println("Field not found");
                        }
                    }
                    catch (System.Exception e)
                    {
                        //System.Console.Out.WriteLine(e.StackTrace);
                    }
                }
                else
                {
                    System.Console.Out.WriteLine("No tvx file");
                }
                return(result);
            }
        }
        /// <summary>Optimized implementation. </summary>
        public virtual bool SkipTo(int target)
        {
            if (df >= skipInterval)
            {
                // optimized case

                if (skipStream == null)
                {
                    skipStream = (InputStream)freqStream.Clone(); // lazily clone
                }
                if (!haveSkipped)
                {
                    // lazily seek skip stream
                    skipStream.Seek(skipPointer);
                    haveSkipped = true;
                }

                // scan skip data
                int  lastSkipDoc     = skipDoc;
                long lastFreqPointer = freqStream.GetFilePointer();
                long lastProxPointer = -1;
                int  numSkipped      = -1 - (count % skipInterval);

                while (target > skipDoc)
                {
                    lastSkipDoc     = skipDoc;
                    lastFreqPointer = freqPointer;
                    lastProxPointer = proxPointer;

                    if (skipDoc != 0 && skipDoc >= doc)
                    {
                        numSkipped += skipInterval;
                    }

                    if (skipCount >= numSkips)
                    {
                        break;
                    }

                    skipDoc     += skipStream.ReadVInt();
                    freqPointer += skipStream.ReadVInt();
                    proxPointer += skipStream.ReadVInt();

                    skipCount++;
                }

                // if we found something to skip, then skip it
                if (lastFreqPointer > freqStream.GetFilePointer())
                {
                    freqStream.Seek(lastFreqPointer);
                    SkipProx(lastProxPointer);

                    doc    = lastSkipDoc;
                    count += numSkipped;
                }
            }

            // done skipping, now just scan
            do
            {
                if (!Next())
                {
                    return(false);
                }
            }while (target > doc);
            return(true);
        }