예제 #1
0
        /// <summary> Save this segment's info.</summary>
        internal void  Write(IndexOutput output)
        {
            output.WriteString(name);
            output.WriteInt(docCount);
            output.WriteLong(delGen);
            output.WriteInt(docStoreOffset);
            if (docStoreOffset != -1)
            {
                output.WriteString(docStoreSegment);
                output.WriteByte((byte)(docStoreIsCompoundFile?1:0));
            }

            output.WriteByte((byte)(hasSingleNormFile?1:0));
            if (normGen == null)
            {
                output.WriteInt(NO);
            }
            else
            {
                output.WriteInt(normGen.Length);
                for (int j = 0; j < normGen.Length; j++)
                {
                    output.WriteLong(normGen[j]);
                }
            }
            output.WriteByte((byte)isCompoundFile);
            output.WriteInt(delCount);
            output.WriteByte((byte)(hasProx?1:0));
            output.WriteStringStringMap(diagnostics);
        }
예제 #2
0
        /// <summary>Fills in no-term-vectors for all docs we haven't seen
        /// since the last doc that had term vectors.
        /// </summary>
        internal void  Fill(int docID)
        {
            int docStoreOffset = docWriter.GetDocStoreOffset();
            int end            = docID + docStoreOffset;

            if (lastDocID < end)
            {
                long tvfPosition = tvf.GetFilePointer();
                while (lastDocID < end)
                {
                    tvx.WriteLong(tvd.GetFilePointer());
                    tvd.WriteVInt(0);
                    tvx.WriteLong(tvfPosition);
                    lastDocID++;
                }
            }
        }
예제 #3
0
        /// <summary>Called to complete TermInfos creation. </summary>
        internal void  Close()
        {
            output.Seek(4);             // write size after format
            output.WriteLong(size);
            output.Close();

            if (!isIndex)
            {
                other.Close();
            }
        }
예제 #4
0
 private void  Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi)
 {
     indexInterval = interval;
     fieldInfos    = fis;
     isIndex       = isi;
     output        = directory.CreateOutput(segment + (isIndex?".tii":".tis"));
     output.WriteInt(FORMAT_CURRENT); // write format
     output.WriteLong(0);             // leave space for size
     output.WriteInt(indexInterval);  // write indexInterval
     output.WriteInt(skipInterval);   // write skipInterval
     output.WriteInt(maxSkipLevels);  // write maxSkipLevels
     System.Diagnostics.Debug.Assert(InitUTF16Results());
 }
예제 #5
0
 // Writes the contents of buffer into the fields stream
 // and adds a new entry for this document into the index
 // stream.  This assumes the buffer was already written
 // in the correct fields format.
 internal void  FlushDocument(int numStoredFields, RAMOutputStream buffer)
 {
     indexStream.WriteLong(fieldsStream.GetFilePointer());
     fieldsStream.WriteVInt(numStoredFields);
     buffer.WriteTo(fieldsStream);
 }
예제 #6
0
		/// <summary> Save this segment's info.</summary>
		internal void  Write(IndexOutput output)
		{
			output.WriteString(name);
			output.WriteInt(docCount);
			output.WriteLong(delGen);
			output.WriteInt(docStoreOffset);
			if (docStoreOffset != - 1)
			{
				output.WriteString(docStoreSegment);
				output.WriteByte((byte) (docStoreIsCompoundFile?1:0));
			}
			
			output.WriteByte((byte) (hasSingleNormFile?1:0));
			if (normGen == null)
			{
				output.WriteInt(NO);
			}
			else
			{
				output.WriteInt(normGen.Length);
				for (int j = 0; j < normGen.Length; j++)
				{
					output.WriteLong(normGen[j]);
				}
			}
			output.WriteByte((byte) isCompoundFile);
			output.WriteInt(delCount);
			output.WriteByte((byte) (hasProx?1:0));
			output.WriteStringStringMap(diagnostics);
		}
예제 #7
0
		private void  Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi)
		{
			indexInterval = interval;
			fieldInfos = fis;
			isIndex = isi;
			output = directory.CreateOutput(segment + (isIndex?".tii":".tis"));
			output.WriteInt(FORMAT_CURRENT); // write format
			output.WriteLong(0); // leave space for size
			output.WriteInt(indexInterval); // write indexInterval
			output.WriteInt(skipInterval); // write skipInterval
			output.WriteInt(maxSkipLevels); // write maxSkipLevels
			System.Diagnostics.Debug.Assert(InitUTF16Results());
		}
예제 #8
0
        /// <summary> Add a complete document specified by all its term vectors. If document has no
        /// term vectors, add value for tvx.
        ///
        /// </summary>
        /// <param name="vectors">
        /// </param>
        /// <throws>  IOException </throws>
        public void  AddAllDocVectors(TermFreqVector[] vectors)
        {
            tvx.WriteLong(tvd.GetFilePointer());
            tvx.WriteLong(tvf.GetFilePointer());

            if (vectors != null)
            {
                int numFields = vectors.Length;
                tvd.WriteVInt(numFields);

                long[] fieldPointers = new long[numFields];

                for (int i = 0; i < numFields; i++)
                {
                    fieldPointers[i] = tvf.GetFilePointer();

                    int fieldNumber = fieldInfos.FieldNumber(vectors[i].GetField());

                    // 1st pass: write field numbers to tvd
                    tvd.WriteVInt(fieldNumber);

                    int numTerms = vectors[i].Size();
                    tvf.WriteVInt(numTerms);

                    TermPositionVector tpVector;

                    byte bits;
                    bool storePositions;
                    bool storeOffsets;

                    if (vectors[i] is TermPositionVector)
                    {
                        // May have positions & offsets
                        tpVector       = (TermPositionVector)vectors[i];
                        storePositions = tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null;
                        storeOffsets   = tpVector.Size() > 0 && tpVector.GetOffsets(0) != null;
                        bits           = (byte)((storePositions?TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR: (byte)0) + (storeOffsets?TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR: (byte)0));
                    }
                    else
                    {
                        tpVector       = null;
                        bits           = 0;
                        storePositions = false;
                        storeOffsets   = false;
                    }

                    tvf.WriteVInt(bits);

                    System.String[] terms = vectors[i].GetTerms();
                    int[]           freqs = vectors[i].GetTermFrequencies();

                    int utf8Upto = 0;
                    utf8Results[1].length = 0;

                    for (int j = 0; j < numTerms; j++)
                    {
                        UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]);

                        int start  = StringHelper.BytesDifference(utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length);
                        int length = utf8Results[utf8Upto].length - start;
                        tvf.WriteVInt(start);                                        // write shared prefix length
                        tvf.WriteVInt(length);                                       // write delta length
                        tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes
                        utf8Upto = 1 - utf8Upto;

                        int termFreq = freqs[j];

                        tvf.WriteVInt(termFreq);

                        if (storePositions)
                        {
                            int[] positions = tpVector.GetTermPositions(j);
                            if (positions == null)
                            {
                                throw new System.SystemException("Trying to write positions that are null!");
                            }
                            System.Diagnostics.Debug.Assert(positions.Length == termFreq);

                            // use delta encoding for positions
                            int lastPosition = 0;
                            for (int k = 0; k < positions.Length; k++)
                            {
                                int position = positions[k];
                                tvf.WriteVInt(position - lastPosition);
                                lastPosition = position;
                            }
                        }

                        if (storeOffsets)
                        {
                            TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j);
                            if (offsets == null)
                            {
                                throw new System.SystemException("Trying to write offsets that are null!");
                            }
                            System.Diagnostics.Debug.Assert(offsets.Length == termFreq);

                            // use delta encoding for offsets
                            int lastEndOffset = 0;
                            for (int k = 0; k < offsets.Length; k++)
                            {
                                int startOffset = offsets[k].GetStartOffset();
                                int endOffset   = offsets[k].GetEndOffset();
                                tvf.WriteVInt(startOffset - lastEndOffset);
                                tvf.WriteVInt(endOffset - startOffset);
                                lastEndOffset = endOffset;
                            }
                        }
                    }
                }

                // 2nd pass: write field pointers to tvd
                if (numFields > 1)
                {
                    long lastFieldPointer = fieldPointers[0];
                    for (int i = 1; i < numFields; i++)
                    {
                        long fieldPointer = fieldPointers[i];
                        tvd.WriteVLong(fieldPointer - lastFieldPointer);
                        lastFieldPointer = fieldPointer;
                    }
                }
            }
            else
            {
                tvd.WriteVInt(0);
            }
        }
예제 #9
0
        /// <summary>Merge files with the extensions added up to now.
        /// All files with these extensions are combined sequentially into the
        /// compound stream. After successful merge, the source files
        /// are deleted.
        /// </summary>
        /// <throws>  IllegalStateException if close() had been called before or </throws>
        /// <summary>   if no file has been added to this object
        /// </summary>
        public void  Close()
        {
            if (merged)
            {
                throw new System.SystemException("Merge already performed");
            }

            if ((entries.Count == 0))
            {
                throw new System.SystemException("No entries to merge have been defined");
            }

            merged = true;

            // open the compound stream
            IndexOutput os = null;

            try
            {
                os = directory.CreateOutput(fileName);

                // Write the number of entries
                os.WriteVInt(entries.Count);

                // Write the directory with all offsets at 0.
                // Remember the positions of directory entries so that we can
                // adjust the offsets later
                System.Collections.IEnumerator it = entries.GetEnumerator();
                long totalSize = 0;
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.directoryOffset = os.GetFilePointer();
                    os.WriteLong(0);                     // for now
                    os.WriteString(fe.file);
                    totalSize += directory.FileLength(fe.file);
                }

                // Pre-allocate size of file as optimization --
                // this can potentially help IO performance as
                // we write the file and also later during
                // searching.  It also uncovers a disk-full
                // situation earlier and hopefully without
                // actually filling disk to 100%:
                long finalLength = totalSize + os.GetFilePointer();
                os.SetLength(finalLength);

                // Open the files and copy their data into the stream.
                // Remember the locations of each file's data section.
                byte[] buffer = new byte[16384];
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.dataOffset = os.GetFilePointer();
                    CopyFile(fe, os, buffer);
                }

                // Write the data offsets into the directory of the compound stream
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    os.Seek(fe.directoryOffset);
                    os.WriteLong(fe.dataOffset);
                }

                System.Diagnostics.Debug.Assert(finalLength == os.Length());

                // Close the output stream. Set the os to null before trying to
                // close so that if an exception occurs during the close, the
                // finally clause below will not attempt to close the stream
                // the second time.
                IndexOutput tmp = os;
                os = null;
                tmp.Close();
            }
            finally
            {
                if (os != null)
                {
                    try
                    {
                        os.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                    }
                }
            }
        }
예제 #10
0
        internal void  FinishCommit(Directory dir)
        {
            if (pendingSegnOutput == null)
            {
                throw new System.SystemException("prepareCommit was not called");
            }
            bool success = false;

            try
            {
                pendingSegnOutput.FinishCommit();
                pendingSegnOutput.Close();
                pendingSegnOutput = null;
                success           = true;
            }
            finally
            {
                if (!success)
                {
                    RollbackCommit(dir);
                }
            }

            // NOTE: if we crash here, we have left a segments_N
            // file in the directory in a possibly corrupt state (if
            // some bytes made it to stable storage and others
            // didn't).  But, the segments_N file includes checksum
            // at the end, which should catch this case.  So when a
            // reader tries to read it, it will throw a
            // CorruptIndexException, which should cause the retry
            // logic in SegmentInfos to kick in and load the last
            // good (previous) segments_N-1 file.

            System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
            success = false;
            try
            {
                dir.Sync(fileName);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    try
                    {
                        dir.DeleteFile(fileName);
                    }
                    catch (System.Exception t)
                    {
                        // Suppress so we keep throwing the original exception
                    }
                }
            }

            lastGeneration = generation;

            try
            {
                IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN);
                try
                {
                    genOutput.WriteInt(FORMAT_LOCKLESS);
                    genOutput.WriteLong(generation);
                    genOutput.WriteLong(generation);
                }
                finally
                {
                    genOutput.Close();
                }
            }
            catch (System.Exception t)
            {
                // It's OK if we fail to write this file since it's
                // used only as one of the retry fallbacks.
            }
        }