Example #1
0
 private void  WriteNorms(System.String segment)
 {
     for (int n = 0; n < fieldInfos.Size(); n++)
     {
         FieldInfo fi = fieldInfos.FieldInfo(n);
         if (fi.isIndexed && !fi.omitNorms)
         {
             float       norm  = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]);
             IndexOutput norms = directory.CreateOutput(segment + ".f" + n);
             try
             {
                 norms.WriteByte(Similarity.EncodeNorm(norm));
             }
             finally
             {
                 norms.Close();
             }
         }
     }
 }
Example #2
0
        /// <summary>Writes this vector to the file <code>name</code> in Directory
        /// <code>d</code>, in a format that can be read by the constructor {@link
        /// #BitVector(Directory, String)}.
        /// </summary>
        public void  Write(Directory d, System.String name)
        {
            IndexOutput output = d.CreateOutput(name);

            try
            {
                if (IsSparse())
                {
                    WriteDgaps(output);                     // sparse bit-set more efficiently saved as d-gaps.
                }
                else
                {
                    WriteBits(output);
                }
            }
            finally
            {
                output.Close();
            }
        }
Example #3
0
        private void  Demo_FSIndexInputBug(Directory fsdir, System.String file)
        {
            // Setup the test file - we need more than 1024 bytes
            IndexOutput os = fsdir.CreateOutput(file);

            for (int i = 0; i < 2000; i++)
            {
                os.WriteByte((byte)i);
            }
            os.Close();

            IndexInput in_Renamed = fsdir.OpenInput(file);

            // This read primes the buffer in IndexInput
            byte b = in_Renamed.ReadByte();

            // Close the file
            in_Renamed.Close();

            // ERROR: this call should fail, but succeeds because the buffer
            // is still filled
            b = in_Renamed.ReadByte();

            // ERROR: this call should fail, but succeeds for some reason as well
            in_Renamed.Seek(1099);

            try
            {
                // OK: this call correctly fails. We are now past the 1024 internal
                // buffer, so an actual IO is attempted, which fails
                b = in_Renamed.ReadByte();
                Assert.Fail("expected readByte() to throw exception");
            }
            catch (System.Exception e)
            {
                // expected exception
            }
        }
Example #4
0
        public virtual void  TestLargeWrites()
        {
            IndexOutput os = dir.CreateOutput("testBufferStart.txt");

            byte[] largeBuf = new byte[2048];
            for (int i = 0; i < largeBuf.Length; i++)
            {
                largeBuf[i] = (byte)((new System.Random().NextDouble()) * 256);
            }

            long currentPos = os.GetFilePointer();

            os.WriteBytes(largeBuf, largeBuf.Length);

            try
            {
                Assert.AreEqual(currentPos + largeBuf.Length, os.GetFilePointer());
            }
            finally
            {
                os.Close();
            }
        }
Example #5
0
 public void ReWrite()
 {
     // NOTE: norms are re-written in regular directory, not cfs
     IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp");
     try
     {
         out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
     }
     finally
     {
         out_Renamed.Close();
     }
     System.String fileName;
     if (Enclosing_Instance.cfsReader == null)
         fileName = Enclosing_Instance.segment + ".f" + number;
     else
     {
         // use a different file name if we have compound format
         fileName = Enclosing_Instance.segment + ".s" + number;
     }
     Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
     this.dirty = false;
 }
Example #6
0
        private void  MergeTerms()
        {
            try
            {
                freqOutput = directory.CreateOutput(segment + ".frq");
                if (HasProx())
                {
                    proxOutput = directory.CreateOutput(segment + ".prx");
                }
                termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
                skipInterval    = termInfosWriter.skipInterval;
                maxSkipLevels   = termInfosWriter.maxSkipLevels;
                skipListWriter  = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput);
                queue           = new SegmentMergeQueue(readers.Count);

                MergeTermInfos();
            }
            finally
            {
                if (freqOutput != null)
                {
                    freqOutput.Close();
                }
                if (proxOutput != null)
                {
                    proxOutput.Close();
                }
                if (termInfosWriter != null)
                {
                    termInfosWriter.Close();
                }
                if (queue != null)
                {
                    queue.Close();
                }
            }
        }
Example #7
0
        /// <summary>Merge files with the extensions added up to now.
        /// All files with these extensions are combined sequentially into the
        /// compound stream. After successful merge, the source files
        /// are deleted.
        /// </summary>
        /// <throws>  IllegalStateException if close() had been called before or </throws>
        /// <summary>   if no file has been added to this object
        /// </summary>
        public void  Close()
        {
            if (merged)
            {
                throw new System.SystemException("Merge already performed");
            }

            if ((entries.Count == 0))
            {
                throw new System.SystemException("No entries to merge have been defined");
            }

            merged = true;

            // open the compound stream
            IndexOutput os = null;

            try
            {
                os = directory.CreateOutput(fileName);

                // Write the number of entries
                os.WriteVInt(entries.Count);

                // Write the directory with all offsets at 0.
                // Remember the positions of directory entries so that we can
                // adjust the offsets later
                System.Collections.IEnumerator it = entries.GetEnumerator();
                long totalSize = 0;
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.directoryOffset = os.GetFilePointer();
                    os.WriteLong(0);                     // for now
                    os.WriteString(fe.file);
                    totalSize += directory.FileLength(fe.file);
                }

                // Pre-allocate size of file as optimization --
                // this can potentially help IO performance as
                // we write the file and also later during
                // searching.  It also uncovers a disk-full
                // situation earlier and hopefully without
                // actually filling disk to 100%:
                long finalLength = totalSize + os.GetFilePointer();
                os.SetLength(finalLength);

                // Open the files and copy their data into the stream.
                // Remember the locations of each file's data section.
                byte[] buffer = new byte[16384];
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.dataOffset = os.GetFilePointer();
                    CopyFile(fe, os, buffer);
                }

                // Write the data offsets into the directory of the compound stream
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    os.Seek(fe.directoryOffset);
                    os.WriteLong(fe.dataOffset);
                }

                System.Diagnostics.Debug.Assert(finalLength == os.Length());

                // Close the output stream. Set the os to null before trying to
                // close so that if an exception occurs during the close, the
                // finally clause below will not attempt to close the stream
                // the second time.
                IndexOutput tmp = os;
                os = null;
                tmp.Close();
            }
            finally
            {
                if (os != null)
                {
                    try
                    {
                        os.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                    }
                }
            }
        }
        /// <summary>Merge files with the extensions added up to now.
        /// All files with these extensions are combined sequentially into the
        /// compound stream. After successful merge, the source files
        /// are deleted.
        /// </summary>
        /// <throws>  IllegalStateException if close() had been called before or </throws>
        /// <summary>   if no file has been added to this object
        /// </summary>
        public void  Close()
        {
            if (merged)
            {
                throw new System.SystemException("Merge already performed");
            }

            if ((entries.Count == 0))
            {
                throw new System.SystemException("No entries to merge have been defined");
            }

            merged = true;

            // open the compound stream
            IndexOutput os = null;

            try
            {
                os = directory.CreateOutput(fileName);

                // Write the number of entries
                os.WriteVInt(entries.Count);

                // Write the directory with all offsets at 0.
                // Remember the positions of directory entries so that we can
                // adjust the offsets later
                System.Collections.IEnumerator it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.directoryOffset = os.GetFilePointer();
                    os.WriteLong(0);                     // for now
                    os.WriteString(fe.file);
                }

                // Open the files and copy their data into the stream.
                // Remember the locations of each file's data section.
                byte[] buffer = new byte[1024];
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.dataOffset = os.GetFilePointer();
                    CopyFile(fe, os, buffer);
                }

                // Write the data offsets into the directory of the compound stream
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    os.Seek(fe.directoryOffset);
                    os.WriteLong(fe.dataOffset);
                }

                // Close the output stream. Set the os to null before trying to
                // close so that if an exception occurs during the close, the
                // finally clause below will not attempt to close the stream
                // the second time.
                IndexOutput tmp = os;
                os = null;
                tmp.Close();
            }
            finally
            {
                if (os != null)
                {
                    try
                    {
                        os.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
                }
            }
        }
        // TODO: would be nice to factor out morme of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        internal override void flush(IDictionary <object, object> threadsAndFields, DocumentsWriter.FlushState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List <object> allFields = new List <object>();

            IEnumerator <KeyValuePair <object, object> > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current;

                ICollection <object> fields = (ICollection <object>)entry.Value;

                IEnumerator <object> fieldsIt = fields.GetEnumerator();

                while (fieldsIt.MoveNext())
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)fieldsIt.Current;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            TermInfosWriter termsOut = new TermInfosWriter(state.directory,
                                                           state.segmentName,
                                                           fieldInfos,
                                                           state.docWriter.writer.GetTermIndexInterval());

            IndexOutput freqOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.FREQ_EXTENSION));
            IndexOutput proxOut;

            if (fieldInfos.HasProx())
            {
                proxOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.PROX_EXTENSION));
            }
            else
            {
                proxOut = null;
            }

            DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
                                                                             termsOut.maxSkipLevels,
                                                                             state.numDocsInRAM, freqOut, proxOut);

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                string    fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.reset();
                    perField.shrinkHash(numPostings);
                    fields[i].reset();
                }

                start = end;
            }

            it = threadsAndFields.GetEnumerator();
            while (it.MoveNext())
            {
                KeyValuePair <object, object> entry     = (KeyValuePair <object, object>)it.Current;
                FreqProxTermsWriterPerThread  perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.reset(true);
            }

            freqOut.Close();
            if (proxOut != null)
            {
                state.flushedFiles[state.SegmentFileName(IndexFileNames.PROX_EXTENSION)] = state.SegmentFileName(IndexFileNames.PROX_EXTENSION);
                proxOut.Close();
            }
            termsOut.Close();

            // Record all files we have flushed
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)] = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)]        = state.SegmentFileName(IndexFileNames.FREQ_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)]       = state.SegmentFileName(IndexFileNames.TERMS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION);
        }
Example #10
0
 internal void  Close()
 {
     fieldsStream.Close();
     indexStream.Close();
 }
Example #11
0
        internal void  FinishCommit(Directory dir)
        {
            if (pendingSegnOutput == null)
            {
                throw new System.SystemException("prepareCommit was not called");
            }
            bool success = false;

            try
            {
                pendingSegnOutput.FinishCommit();
                pendingSegnOutput.Close();
                pendingSegnOutput = null;
                success           = true;
            }
            finally
            {
                if (!success)
                {
                    RollbackCommit(dir);
                }
            }

            // NOTE: if we crash here, we have left a segments_N
            // file in the directory in a possibly corrupt state (if
            // some bytes made it to stable storage and others
            // didn't).  But, the segments_N file includes checksum
            // at the end, which should catch this case.  So when a
            // reader tries to read it, it will throw a
            // CorruptIndexException, which should cause the retry
            // logic in SegmentInfos to kick in and load the last
            // good (previous) segments_N-1 file.

            System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
            success = false;
            try
            {
                dir.Sync(fileName);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    try
                    {
                        dir.DeleteFile(fileName);
                    }
                    catch (System.Exception t)
                    {
                        // Suppress so we keep throwing the original exception
                    }
                }
            }

            lastGeneration = generation;

            try
            {
                IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN);
                try
                {
                    genOutput.WriteInt(FORMAT_LOCKLESS);
                    genOutput.WriteLong(generation);
                    genOutput.WriteLong(generation);
                }
                finally
                {
                    genOutput.Close();
                }
            }
            catch (System.Exception t)
            {
                // It's OK if we fail to write this file since it's
                // used only as one of the retry fallbacks.
            }
        }
Example #12
0
        private void  WritePostings(Posting[] postings, System.String segment)
        {
            IndexOutput       freq = null, prox = null;
            TermInfosWriter   tis              = null;
            TermVectorsWriter termVectorWriter = null;

            try
            {
                //open files for inverse index storage
                freq = directory.CreateOutput(segment + ".frq");
                prox = directory.CreateOutput(segment + ".prx");
                tis  = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
                TermInfo      ti           = new TermInfo();
                System.String currentField = null;

                for (int i = 0; i < postings.Length; i++)
                {
                    Posting posting = postings[i];

                    // add an entry to the dictionary with pointers to prox and freq files
                    ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1);
                    tis.Add(posting.term, ti);

                    // add an entry to the freq file
                    int postingFreq = posting.freq;
                    if (postingFreq == 1)
                    {
                        // optimize freq=1
                        freq.WriteVInt(1);
                    }
                    // set low bit of doc num.
                    else
                    {
                        freq.WriteVInt(0);                         // the document number
                        freq.WriteVInt(postingFreq);               // frequency in doc
                    }

                    int   lastPosition = 0;                   // write positions
                    int[] positions    = posting.positions;
                    for (int j = 0; j < postingFreq; j++)
                    {
                        // use delta-encoding
                        int position = positions[j];
                        prox.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                    // check to see if we switched to a new field
                    System.String termField = posting.term.Field();
                    if (currentField != termField)
                    {
                        // changing field - see if there is something to save
                        currentField = termField;
                        FieldInfo fi = fieldInfos.FieldInfo(currentField);
                        if (fi.storeTermVector)
                        {
                            if (termVectorWriter == null)
                            {
                                termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
                                termVectorWriter.OpenDocument();
                            }
                            termVectorWriter.OpenField(currentField);
                        }
                        else if (termVectorWriter != null)
                        {
                            termVectorWriter.CloseField();
                        }
                    }
                    if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
                    {
                        termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
                    }
                }
                if (termVectorWriter != null)
                {
                    termVectorWriter.CloseDocument();
                }
            }
            finally
            {
                // make an effort to close all streams we can but remember and re-throw
                // the first exception encountered in this process
                System.IO.IOException keep = null;
                if (freq != null)
                {
                    try
                    {
                        freq.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (prox != null)
                {
                    try
                    {
                        prox.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (tis != null)
                {
                    try
                    {
                        tis.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (termVectorWriter != null)
                {
                    try
                    {
                        termVectorWriter.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (keep != null)
                {
                    throw new System.IO.IOException(keep.StackTrace);
                }
            }
        }
Example #13
0
        /** Produce _X.nrm if any document had a field with norms
         *  not disabled */
        internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            IDictionary <object, object> byField = new Dictionary <object, object>();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, ICollection <object> > entry = it.Current;

                ICollection <object> fields         = entry.Value;
                IEnumerator <object> fieldsIt       = fields.GetEnumerator();
                List <object>        fieldsToRemove = new List <object>(fields.Count);

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        IList <object> l;
                        if (byField.ContainsKey(perField.fieldInfo))
                        {
                            l = (IList <object>)byField[perField.fieldInfo];
                        }
                        else
                        {
                            l = new List <object>();
                            byField[perField.fieldInfo] = l;
                        }
                        //IList<object> l = (IList<object>)byField[perField.fieldInfo];
                        //if (l == null)
                        //{
                        //    l = new List<object>();
                        //    byField[perField.fieldInfo] = l;
                        //}
                        l.Add(perField);
                    }
                    else
                    {
                        // Remove this field since we haven't seen it
                        // since the previous flush
                        fieldsToRemove.Add(perField);
                        //fields.Remove(perField);
                    }
                }
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fields.Remove(fieldsToRemove[i]);
                }
            }

            string normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;

            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    List <object> toMerge;
                    int           upto = 0;
                    if (byField.ContainsKey(fieldInfo))
                    {
                        toMerge = (List <object>)byField[fieldInfo];

                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos[0] < fields[0].docIDs.Length, " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocsInRAM);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocsInRAM == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocsInRAM) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
Example #14
0
        /// <summary>Produce _X.nrm if any document had a field with norms
        /// not disabled
        /// </summary>
        public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            System.Collections.IDictionary byField = new System.Collections.Hashtable();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                System.Collections.ICollection fields         = (System.Collections.ICollection)entry.Value;
                System.Collections.IEnumerator fieldsIt       = fields.GetEnumerator();
                System.Collections.ArrayList   fieldsToRemove = new System.Collections.ArrayList();

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo];
                        if (l == null)
                        {
                            l = new System.Collections.ArrayList();
                            byField[perField.fieldInfo] = l;
                        }
                        l.Add(perField);
                    }
                    // Remove this field since we haven't seen it
                    // since the previous flush
                    else
                    {
                        fieldsToRemove.Add(perField);
                    }
                }

                System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields;
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fieldsHT.Remove(fieldsToRemove[i]);
                }
            }

            System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo];
                    int upto = 0;
                    if (toMerge != null)
                    {
                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocs);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].Reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
Example #15
0
        private void MergeTerms()
        {
            try
            {
                freqOutput = directory.CreateOutput(segment + ".frq");
                proxOutput = directory.CreateOutput(segment + ".prx");
                termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
                skipInterval = termInfosWriter.skipInterval;
                queue = new SegmentMergeQueue(readers.Count);

                MergeTermInfos();
            }
            finally
            {
                if (freqOutput != null)
                    freqOutput.Close();
                if (proxOutput != null)
                    proxOutput.Close();
                if (termInfosWriter != null)
                    termInfosWriter.Close();
                if (queue != null)
                    queue.Close();
            }
        }
Example #16
0
        /// <summary>Merge files with the extensions added up to now.
        /// All files with these extensions are combined sequentially into the
        /// compound stream. After successful merge, the source files
        /// are deleted.
        /// </summary>
        /// <throws>  IllegalStateException if close() had been called before or </throws>
        /// <summary>   if no file has been added to this object
        /// </summary>
        public void Dispose()
        {
            // Extract into protected method if class ever becomes unsealed

            // TODO: Dispose shouldn't throw exceptions!
            if (merged)
                throw new SystemException("Merge already performed");

            if ((entries.Count == 0))
                throw new SystemException("No entries to merge have been defined");

            merged = true;

            // open the compound stream
            IndexOutput os = null;
            try
            {
                os = directory.CreateOutput(fileName);

                // Write the number of entries
                os.WriteVInt(entries.Count);

                // Write the directory with all offsets at 0.
                // Remember the positions of directory entries so that we can
                // adjust the offsets later
                long totalSize = 0;
                foreach (FileEntry fe in entries)
                {
                    fe.directoryOffset = os.FilePointer;
                    os.WriteLong(0); // for now
                    os.WriteString(fe.file);
                    totalSize += directory.FileLength(fe.file);
                }

                // Pre-allocate size of file as optimization --
                // this can potentially help IO performance as
                // we write the file and also later during
                // searching.  It also uncovers a disk-full
                // situation earlier and hopefully without
                // actually filling disk to 100%:
                long finalLength = totalSize + os.FilePointer;
                os.SetLength(finalLength);

                // Open the files and copy their data into the stream.
                // Remember the locations of each file's data section.
                var buffer = new byte[16384];
                foreach (FileEntry fe in entries)
                {
                    fe.dataOffset = os.FilePointer;
                    CopyFile(fe, os, buffer);
                }

                // Write the data offsets into the directory of the compound stream
                foreach (FileEntry fe in entries)
                {
                    os.Seek(fe.directoryOffset);
                    os.WriteLong(fe.dataOffset);
                }

                System.Diagnostics.Debug.Assert(finalLength == os.Length);

                // Close the output stream. Set the os to null before trying to
                // close so that if an exception occurs during the close, the
                // finally clause below will not attempt to close the stream
                // the second time.
                IndexOutput tmp = os;
                os = null;
                tmp.Close();
            }
            finally
            {
                if (os != null)
                    try
                    {
                        os.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
            }
        }
Example #17
0
 internal void  Close()
 {
     out_Renamed.Close();
     posWriter.Close();
 }
Example #18
0
        public void  Write(Directory directory)
        {
            System.String segmentFileName = GetNextSegmentFileName();

            // Always advance the generation on write:
            if (generation == -1)
            {
                generation = 1;
            }
            else
            {
                generation++;
            }

            IndexOutput output = directory.CreateOutput(segmentFileName);

            bool success = false;

            try
            {
                output.WriteInt(CURRENT_FORMAT);        // write FORMAT
                output.WriteLong(++version);            // every write changes
                // the index
                output.WriteInt(counter);               // write counter
                output.WriteInt(Count);                 // write infos
                for (int i = 0; i < Count; i++)
                {
                    Info(i).Write(output);
                }
            }
            finally
            {
                try
                {
                    output.Close();
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Try not to leave a truncated segments_N file in
                        // the index:
                        directory.DeleteFile(segmentFileName);
                    }
                }
            }

            try
            {
                output = directory.CreateOutput(IndexFileNames.SEGMENTS_GEN);
                try
                {
                    output.WriteInt(FORMAT_LOCKLESS);
                    output.WriteLong(generation);
                    output.WriteLong(generation);
                }
                finally
                {
                    output.Close();
                }
            }
            catch (System.IO.IOException e)
            {
                // It's OK if we fail to write this file since it's
                // used only as one of the retry fallbacks.
            }

            lastGeneration = generation;
        }
        private void MergeTerms()
        {
            try
            {
                freqOutput = directory.CreateOutput(segment + ".frq");
                if (HasProx())
                    proxOutput = directory.CreateOutput(segment + ".prx");
                termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
                skipInterval = termInfosWriter.skipInterval;
                maxSkipLevels = termInfosWriter.maxSkipLevels;
                skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput);
                queue = new SegmentMergeQueue(readers.Count);

                MergeTermInfos();
            }
            finally
            {
                if (freqOutput != null)
                    freqOutput.Close();
                if (proxOutput != null)
                    proxOutput.Close();
                if (termInfosWriter != null)
                    termInfosWriter.Close();
                if (queue != null)
                    queue.Close();
            }
        }