Пример #1
0
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            foreach (Field field  in doc.Fields())
            {
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            foreach (Field field in doc.Fields())
            {
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= 1;
                    }
                    fieldsStream.WriteByte(bits);

                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        private int AppendPostings(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df      = 0; // number of docs w/ term

            ResetSkip();
            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.postings;
                int   base_Renamed        = smi.base_Renamed;
                int[] docMap = smi.docMap;
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc]; // map around deletions
                    }
                    doc += base_Renamed;   // convert to merged space

                    if (doc < lastDoc)
                    {
                        throw new System.SystemException("docs out of order");
                    }

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        BufferSkip(lastDoc);
                    }

                    int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
                    lastDoc = doc;

                    int freq = postings.Freq();
                    if (freq == 1)
                    {
                        freqOutput.WriteVInt(docCode | 1); // write doc & freq=1
                    }
                    else
                    {
                        freqOutput.WriteVInt(docCode); // write doc
                        freqOutput.WriteVInt(freq);    // write frequency in doc
                    }

                    int lastPosition = 0; // write position deltas
                    for (int j = 0; j < freq; j++)
                    {
                        int position = postings.NextPosition();
                        proxOutput.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                }
            }
            return(df);
        }
Пример #3
0
        /// <summary>Adds a new <Term, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        public /*internal*/ void  Add(Term term, TermInfo ti)
        {
            if (!isIndex && term.CompareTo(lastTerm) <= 0)
            {
                throw new System.IO.IOException("term out of order");
            }
            if (ti.freqPointer < lastTi.freqPointer)
            {
                throw new System.IO.IOException("freqPointer out of order");
            }
            if (ti.proxPointer < lastTi.proxPointer)
            {
                throw new System.IO.IOException("proxPointer out of order");
            }

            if (!isIndex && size % indexInterval == 0)
            {
                other.Add(lastTerm, lastTi);                        // add an index term
            }
            WriteTerm(term);                                        // write term
            output.WriteVInt(ti.docFreq);                           // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
                lastIndexPointer = other.output.GetFilePointer();                 // write pointer
            }

            lastTi.Set(ti);
            size++;
        }
Пример #4
0
 public void  Write(OutputStream output)
 {
     output.WriteVInt(Size());
     for (int i = 0; i < Size(); i++)
     {
         FieldInfo fi   = FieldInfo(i);
         byte      bits = (byte)(0x0);
         if (fi.isIndexed)
         {
             bits |= (byte)(0x1);
         }
         if (fi.storeTermVector)
         {
             bits |= (byte)(0x2);
         }
         output.WriteString(fi.name);
         //Was REMOVE
         //output.writeByte((byte)(fi.isIndexed ? 1 : 0));
         output.WriteByte(bits);
     }
 }
Пример #5
0
        /// <summary>Merge files with the extensions added up to now.
        /// All files with these extensions are combined sequentially into the
        /// compound stream. After successful merge, the source files
        /// are deleted.
        /// </summary>
        public void  Close()
        {
            if (merged)
            {
                throw new System.SystemException("Merge already performed");
            }

            if ((entries.Count == 0))
            {
                throw new System.SystemException("No entries to merge have been defined");
            }

            merged = true;

            // open the compound stream
            OutputStream os = null;

            try
            {
                os = directory.CreateFile(fileName);

                // Write the number of entries
                os.WriteVInt(entries.Count);

                // Write the directory with all offsets at 0.
                // Remember the positions of directory entries so that we can
                // adjust the offsets later
                System.Collections.IEnumerator it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.directoryOffset = os.GetFilePointer();
                    os.WriteLong(0);                     // for now
                    os.WriteString(fe.file);
                }

                // Open the files and copy their data into the stream.
                // Remeber the locations of each file's data section.
                byte[] buffer = new byte[1024];
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.dataOffset = os.GetFilePointer();
                    CopyFile(fe, os, buffer);
                }

                // Write the data offsets into the directory of the compound stream
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    os.Seek(fe.directoryOffset);
                    os.WriteLong(fe.dataOffset);
                }

                // Close the output stream. Set the os to null before trying to
                // close so that if an exception occurs during the close, the
                // finally clause below will not attempt to close the stream
                // the second time.
                OutputStream tmp = os;
                os = null;
                tmp.Close();
            }
            finally
            {
                if (os != null)
                {
                    try
                    {
                        os.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
                }
            }
        }
Пример #6
0
        private void  WritePostings(Posting[] postings, System.String segment)
        {
            OutputStream      freq = null, prox = null;
            TermInfosWriter   tis              = null;
            TermVectorsWriter termVectorWriter = null;

            try
            {
                //open files for inverse index storage
                freq = directory.CreateFile(segment + ".frq");
                prox = directory.CreateFile(segment + ".prx");
                tis  = new TermInfosWriter(directory, segment, fieldInfos);
                TermInfo      ti           = new TermInfo();
                System.String currentField = null;

                for (int i = 0; i < postings.Length; i++)
                {
                    Posting posting = postings[i];

                    // add an entry to the dictionary with pointers to prox and freq files
                    ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1);
                    tis.Add(posting.term, ti);

                    // add an entry to the freq file
                    int postingFreq = posting.freq;
                    if (postingFreq == 1)
                    {
                        // optimize freq=1
                        freq.WriteVInt(1);
                    }
                    // set low bit of doc num.
                    else
                    {
                        freq.WriteVInt(0);           // the document number
                        freq.WriteVInt(postingFreq); // frequency in doc
                    }

                    int   lastPosition = 0; // write positions
                    int[] positions    = posting.positions;
                    for (int j = 0; j < postingFreq; j++)
                    {
                        // use delta-encoding
                        int position = positions[j];
                        prox.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                    // check to see if we switched to a new Field
                    System.String termField = posting.term.Field();
                    if ((System.Object)currentField != (System.Object)termField)
                    {
                        // changing Field - see if there is something to save
                        currentField = termField;
                        FieldInfo fi = fieldInfos.FieldInfo(currentField);
                        if (fi.storeTermVector)
                        {
                            if (termVectorWriter == null)
                            {
                                termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
                                termVectorWriter.OpenDocument();
                            }
                            termVectorWriter.OpenField(currentField);
                        }
                        else if (termVectorWriter != null)
                        {
                            termVectorWriter.CloseField();
                        }
                    }
                    if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
                    {
                        termVectorWriter.AddTerm(posting.term.Text(), postingFreq);
                    }
                }
                if (termVectorWriter != null)
                {
                    termVectorWriter.CloseDocument();
                }
            }
            finally
            {
                // make an effort to close all streams we can but remember and re-throw
                // the first exception encountered in this process
                System.IO.IOException keep = null;
                if (freq != null)
                {
                    try
                    {
                        freq.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (prox != null)
                {
                    try
                    {
                        prox.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (tis != null)
                {
                    try
                    {
                        tis.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (termVectorWriter != null)
                {
                    try
                    {
                        termVectorWriter.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (keep != null)
                {
                    throw new System.IO.IOException(keep.StackTrace);
                }
            }
        }
Пример #7
0
		public void  Write(OutputStream output)
		{
			output.WriteVInt(Size());
			for (int i = 0; i < Size(); i++)
			{
				FieldInfo fi = FieldInfo(i);
				byte bits = (byte) (0x0);
				if (fi.isIndexed)
					bits |= (byte) (0x1);
				if (fi.storeTermVector)
					bits |= (byte) (0x2);
				output.WriteString(fi.name);
				//Was REMOVE
				//output.writeByte((byte)(fi.isIndexed ? 1 : 0));
				output.WriteByte(bits);
			}
		}