public PreFlexRWStoredFieldsWriter(Directory directory, string segment, IOContext context)
        {
            Debug.Assert(directory != null);
            this.Directory = directory;
            this.Segment = segment;

            bool success = false;
            try
            {
                FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context);
                IndexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context);

                FieldsStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
                IndexStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);

                success = true;
            }
            finally
            {
                if (!success)
                {
                    Abort();
                }
            }
        }
        /// <summary>
        /// Save a single segment's info. </summary>
        public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext)
        {
            string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
            si.AddFile(fileName);

            IndexOutput output = dir.CreateOutput(fileName, ioContext);

            bool success = false;
            try
            {
                CodecUtil.WriteHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT);
                // Write the Lucene version that created this segment, since 3.1
                output.WriteString(si.Version);
                output.WriteInt(si.DocCount);

                output.WriteByte((sbyte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO));
                output.WriteStringStringMap(si.Diagnostics);
                output.WriteStringStringMap(CollectionsHelper.EmptyMap<string, string>());
                output.WriteStringSet(si.Files);

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(output);
                    si.Dir.DeleteFile(fileName);
                }
                else
                {
                    output.Dispose();
                }
            }
        }
        public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
            String segmentSuffix, IOContext context)
        {
            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                        VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
            var success = false;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            try
            {

                _version = ReadHeader(_input);
                _indexDivisor = indexDivisor;

                if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM)
                    CodecUtil.ChecksumEntireFile(_input);
                
                SeekDir(_input, _dirOffset);

                // Read directory
                var numFields = _input.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")");
                }

                for (var i = 0; i < numFields; i++)
                {
                    var field = _input.ReadVInt();
                    var indexStart = _input.ReadVLong();
                    var fieldInfo = fieldInfos.FieldInfo(field);
                    
                    try
                    {
                        _fields.Add(fieldInfo, new FieldIndexData(indexStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}",
                            fieldInfo.Name, _input));
                    }
                }
                success = true;
            }
            finally
            {
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                    {
                        _indexLoaded = true;
                    }
                }
            }
        }
 public override IndexInput OpenInput(string name, IOContext context)
 {
     if (sleepMillis != -1)
     {
         return new SlowIndexInput(this, base.OpenInput(name, context));
     }
     return base.OpenInput(name, context);
 }
 public override IndexInput OpenInput(string fileName, IOContext context)
 {
     IndexInput @in = base.OpenInput(fileName, context);
     if (fileName.EndsWith(".frq"))
     {
         @in = new CountingStream(OuterInstance, @in);
     }
     return @in;
 }
        public override IndexOutput CreateOutput(string name, IOContext context)
        {
            if (sleepMillis != -1)
            {
                return new SlowIndexOutput(this, base.CreateOutput(name, context));
            }

            return base.CreateOutput(name, context);
        }
Beispiel #7
0
        public UniversalMachine(IOContext ioContext)
        {
            ioContext_ = ioContext;

              arrays_ = new uint[INIT_ARRAYS][];
              for (int i = 1; i < INIT_ARRAYS; ++i)
              {
            arraysFree_.Enqueue(i);
              }
        }
        public SepPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext context,
            IntStreamFactory intFactory, string segmentSuffix)
        {
            var success = false;
            try
            {

                var docFileName = IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix,
                    SepPostingsWriter.DOC_EXTENSION);
                _docIn = intFactory.OpenInput(dir, docFileName, context);

                _skipIn =
                    dir.OpenInput(
                        IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION),
                        context);

                if (fieldInfos.HasFreq())
                {
                    _freqIn = intFactory.OpenInput(dir,
                        IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION),
                        context);
                }
                else
                {
                    _freqIn = null;
                }
                if (fieldInfos.HasProx())
                {
                    _posIn = intFactory.OpenInput(dir,
                        IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.POS_EXTENSION),
                        context);
                    _payloadIn =
                        dir.OpenInput(
                            IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix,
                                SepPostingsWriter.PAYLOAD_EXTENSION), context);
                }
                else
                {
                    _posIn = null;
                    _payloadIn = null;
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    Dispose();
                }
            }
        }
Beispiel #9
0
 // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
 public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate)
 {
     // validate incoming readers
     if (validate)
     {
         foreach (AtomicReader reader in readers)
         {
             reader.CheckIntegrity();
         }
     }
     MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
     Directory = dir;
     this.TermIndexInterval = termIndexInterval;
     this.Codec = segmentInfo.Codec;
     this.Context = context;
     this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
     MergeState.SegmentInfo.DocCount = SetDocMaps();
 }
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment = si.Name;
            bool success = false;
            FieldInfos = fn;
            try
            {
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                CodecUtil.CheckHeader(IndexStream, Lucene40StoredFieldsWriter.CODEC_NAME_IDX, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT);
                CodecUtil.CheckHeader(FieldsStream, Lucene40StoredFieldsWriter.CODEC_NAME_DAT, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_DAT == FieldsStream.FilePointer);
                Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX == IndexStream.FilePointer);
                long indexSize = IndexStream.Length() - Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX;
                this.Size_Renamed = (int)(indexSize >> 3);
                // Verify two sources of "maxDoc" agree:
                if (this.Size_Renamed != si.DocCount)
                {
                    throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size_Renamed + " but segmentInfo shows " + si.DocCount);
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // ensure we throw our original exception
                    catch (Exception)
                    {
                    }
                }
            }
        }
        private int LastFieldNumber = -1; // only for assert

        public PreFlexRWNormsConsumer(Directory directory, string segment, IOContext context)
        {
            string normsFileName = IndexFileNames.SegmentFileName(segment, "", NORMS_EXTENSION);
            bool success = false;
            IndexOutput output = null;
            try
            {
                output = directory.CreateOutput(normsFileName, context);
                output.WriteBytes(NORMS_HEADER, 0, NORMS_HEADER.Length);
                @out = output;
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
        public override SegmentInfo Read(Directory dir, string segment, IOContext context)
        {
            string fileName = IndexFileNames.SegmentFileName(segment, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
            IndexInput input = dir.OpenInput(fileName, context);
            bool success = false;
            try
            {
                CodecUtil.CheckHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_START, Lucene40SegmentInfoFormat.VERSION_CURRENT);
                string version = input.ReadString();
                int docCount = input.ReadInt();
                if (docCount < 0)
                {
                    throw new CorruptIndexException("invalid docCount: " + docCount + " (resource=" + input + ")");
                }
                bool isCompoundFile = input.ReadByte() == SegmentInfo.YES;
                IDictionary<string, string> diagnostics = input.ReadStringStringMap();
                input.ReadStringStringMap(); // read deprecated attributes
                ISet<string> files = input.ReadStringSet();

                CodecUtil.CheckEOF(input);

                SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
                si.Files = files;

                success = true;

                return si;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
                else
                {
                    input.Dispose();
                }
            }
        }
        // private String segment;

        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, string segmentSuffix)
        {
            bool success = false;
            IndexInput freqIn = null;
            IndexInput proxIn = null;
            try
            {
                freqIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION), ioContext);
                CodecUtil.CheckHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT);
                // TODO: hasProx should (somehow!) become codec private,
                // but it's tricky because 1) FIS.hasProx is global (it
                // could be all fields that have prox are written by a
                // different codec), 2) the field may have had prox in
                // the past but all docs w/ that field were deleted.
                // Really we'd need to init prxOut lazily on write, and
                // then somewhere record that we actually wrote it so we
                // know whether to open on read:
                if (fieldInfos.HasProx())
                {
                    proxIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION), ioContext);
                    CodecUtil.CheckHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT);
                }
                else
                {
                    proxIn = null;
                }
                this.FreqIn = freqIn;
                this.ProxIn = proxIn;
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(freqIn, proxIn);
                }
            }
        }
 public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context)
 {
     bool success = false;
     try
     {
         _input = directory.OpenInput(IndexFileNames.SegmentFileName(si.Name, "", SimpleTextTermVectorsWriter.VECTORS_EXTENSION), context);
         success = true;
     }
     finally
     {
         if (!success)
         {
             try
             {
                 Dispose();
             } 
             catch (Exception)
             {
                 // ensure we throw our original exception
             }
         }
     }
     ReadIndex(si.DocCount);
 }
Beispiel #15
0
        /// <summary>
        /// NOTE: this method creates a compound file for all files returned by
        /// info.files(). While, generally, this may include separate norms and
        /// deletion files, this SegmentInfo must not reference such files when this
        /// method is called, because they are not allowed within a compound file.
        /// </summary>
        public static ICollection<string> CreateCompoundFile(InfoStream infoStream, Directory directory, CheckAbort checkAbort, SegmentInfo info, IOContext context)
        {
            string fileName = Index.IndexFileNames.SegmentFileName(info.Name, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION);
            if (infoStream.IsEnabled("IW"))
            {
                infoStream.Message("IW", "create compound file " + fileName);
            }
            Debug.Assert(Lucene3xSegmentInfoFormat.GetDocStoreOffset(info) == -1);
            // Now merge all added files
            ICollection<string> files = info.Files;
            CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true);
            IOException prior = null;
            try
            {
                foreach (string file in files)
                {
                    directory.Copy(cfsDir, file, file, context);
                    checkAbort.Work(directory.FileLength(file));
                }
            }
            catch (System.IO.IOException ex)
            {
                prior = ex;
            }
            finally
            {
                bool success = false;
                try
                {
                    IOUtils.CloseWhileHandlingException(prior, cfsDir);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        try
                        {
                            directory.DeleteFile(fileName);
                        }
                        catch (Exception)
                        {
                        }
                        try
                        {
                            directory.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(info.Name, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
                        }
                        catch (Exception)
                        {
                        }
                    }
                }
            }

            // Replace all previous files with the CFS/CFE files:
            HashSet<string> siFiles = new HashSet<string>();
            siFiles.Add(fileName);
            siFiles.Add(Lucene.Net.Index.IndexFileNames.SegmentFileName(info.Name, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
            info.Files = siFiles;

            return files;
        }
Beispiel #16
0
        /// <summary>
        /// Does the actual (time-consuming) work of the merge,
        ///  but without holding synchronized lock on IndexWriter
        ///  instance
        /// </summary>
        private int MergeMiddle(MergePolicy.OneMerge merge)
        {
            merge.CheckAborted(directory);

            string mergedName = merge.Info_Renamed.Info.Name;

            IList<SegmentCommitInfo> sourceSegments = merge.Segments;

            IOContext context = new IOContext(merge.MergeInfo);

            MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory);
            TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory);

            if (infoStream.IsEnabled("IW"))
            {
                infoStream.Message("IW", "merging " + SegString(merge.Segments));
            }

            merge.Readers = new List<SegmentReader>();

            // this is try/finally to make sure merger's readers are
            // closed:
            bool success = false;
            try
            {
                int segUpto = 0;
                while (segUpto < sourceSegments.Count)
                {
                    SegmentCommitInfo info = sourceSegments[segUpto];

                    // Hold onto the "live" reader; we will use this to
                    // commit merged deletes
                    ReadersAndUpdates rld = readerPool.Get(info, true);

                    // Carefully pull the most recent live docs and reader
                    SegmentReader reader;
                    Bits liveDocs;
                    int delCount;

                    lock (this)
                    {
                        // Must sync to ensure BufferedDeletesStream cannot change liveDocs,
                        // pendingDeleteCount and field updates while we pull a copy:
                        reader = rld.GetReaderForMerge(context);
                        liveDocs = rld.ReadOnlyLiveDocs;
                        delCount = rld.PendingDeleteCount + info.DelCount;

                        Debug.Assert(reader != null);
                        Debug.Assert(rld.VerifyDocCounts());

                        if (infoStream.IsEnabled("IW"))
                        {
                            if (rld.PendingDeleteCount != 0)
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount);
                            }
                            else if (info.DelCount != 0)
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount);
                            }
                            else
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " no deletes");
                            }
                        }
                    }

                    // Deletes might have happened after we pulled the merge reader and
                    // before we got a read-only copy of the segment's actual live docs
                    // (taking pending deletes into account). In that case we need to
                    // make a new reader with updated live docs and del count.
                    if (reader.NumDeletedDocs() != delCount)
                    {
                        // fix the reader's live docs and del count
                        Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies

                        SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount);
                        bool released = false;
                        try
                        {
                            rld.Release(reader);
                            released = true;
                        }
                        finally
                        {
                            if (!released)
                            {
                                newReader.DecRef();
                            }
                        }

                        reader = newReader;
                    }

                    merge.Readers.Add(reader);
                    Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount);
                    segUpto++;
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders());

                // we pass merge.getMergeReaders() instead of merge.readers to allow the
                // OneMerge to return a view over the actual segments to merge
                SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge);

                merge.CheckAborted(directory);

                // this is where all the work happens:
                MergeState mergeState;
                bool success3 = false;
                try
                {
                    if (!merger.ShouldMerge())
                    {
                        // would result in a 0 document segment: nothing to merge!
                        mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort);
                    }
                    else
                    {
                        mergeState = merger.Merge();
                    }
                    success3 = true;
                }
                finally
                {
                    if (!success3)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(merge.Info_Renamed.Info.Name);
                        }
                    }
                }
                Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info);
                merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles);

                // Record which codec was used to write the segment

                if (infoStream.IsEnabled("IW"))
                {
                    if (merge.Info_Renamed.Info.DocCount == 0)
                    {
                        infoStream.Message("IW", "merge away fully deleted segments");
                    }
                    else
                    {
                        infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs"));
                    }
                }

                // Very important to do this before opening the reader
                // because codec must know if prox was written for
                // this segment:
                //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
                bool useCompoundFile;
                lock (this) // Guard segmentInfos
                {
                    useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed);
                }

                if (useCompoundFile)
                {
                    success = false;

                    ICollection<string> filesToRemove = merge.Info_Renamed.Files();

                    try
                    {
                        filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context);
                        success = true;
                    }
                    catch (System.IO.IOException ioe)
                    {
                        lock (this)
                        {
                            if (merge.Aborted)
                            {
                                // this can happen if rollback or close(false)
                                // is called -- fall through to logic below to
                                // remove the partially created CFS:
                            }
                            else
                            {
                                HandleMergeException(ioe, merge);
                            }
                        }
                    }
                    catch (Exception t)
                    {
                        HandleMergeException(t, merge);
                    }
                    finally
                    {
                        if (!success)
                        {
                            if (infoStream.IsEnabled("IW"))
                            {
                                infoStream.Message("IW", "hit exception creating compound file during merge");
                            }

                            lock (this)
                            {
                                Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION));
                                Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
                                Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                            }
                        }
                    }

                    // So that, if we hit exc in deleteNewFiles (next)
                    // or in commitMerge (later), we close the
                    // per-segment readers in the finally clause below:
                    success = false;

                    lock (this)
                    {
                        // delete new non cfs files directly: they were never
                        // registered with IFD
                        Deleter.DeleteNewFiles(filesToRemove);

                        if (merge.Aborted)
                        {
                            if (infoStream.IsEnabled("IW"))
                            {
                                infoStream.Message("IW", "abort merge after building CFS");
                            }
                            Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION));
                            Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
                            return 0;
                        }
                    }

                    merge.Info_Renamed.Info.UseCompoundFile = true;
                }
                else
                {
                    // So that, if we hit exc in commitMerge (later),
                    // we close the per-segment readers in the finally
                    // clause below:
                    success = false;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                bool success2 = false;
                try
                {
                    Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context);
                    success2 = true;
                }
                finally
                {
                    if (!success2)
                    {
                        lock (this)
                        {
                            Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                        }
                    }
                }

                // TODO: ideally we would freeze merge.info here!!
                // because any changes after writing the .si will be
                // lost...

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0));
                }

                IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer;
                if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0)
                {
                    ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true);
                    SegmentReader sr = rld.GetReader(IOContext.READ);
                    try
                    {
                        mergedSegmentWarmer.Warm(sr);
                    }
                    finally
                    {
                        lock (this)
                        {
                            rld.Release(sr);
                            readerPool.Release(rld);
                        }
                    }
                }

                // Force READ context because we merge deletes onto
                // this reader:
                if (!CommitMerge(merge, mergeState))
                {
                    // commitMerge will return false if this merge was
                    // aborted
                    return 0;
                }

                success = true;
            }
            finally
            {
                // Readers are already closed in commitMerge if we didn't hit
                // an exc:
                if (!success)
                {
                    CloseMergeReaders(merge, true);
                }
            }

            return merge.Info_Renamed.Info.DocCount;
        }
Beispiel #17
0
        /// <summary>
        /// Copies the segment files as-is into the IndexWriter's directory. </summary>
        private SegmentCommitInfo CopySegmentAsIs(SegmentCommitInfo info, string segName, IDictionary<string, string> dsNames, ISet<string> dsFilesCopied, IOContext context, ISet<string> copiedFiles)
        {
            // Determine if the doc store of this segment needs to be copied. It's
            // only relevant for segments that share doc store with others,
            // because the DS might have been copied already, in which case we
            // just want to update the DS name of this SegmentInfo.
            string dsName = Lucene3xSegmentInfoFormat.GetDocStoreSegment(info.Info);
            Debug.Assert(dsName != null);
            string newDsName;
            if (dsNames.ContainsKey(dsName))
            {
                newDsName = dsNames[dsName];
            }
            else
            {
                dsNames[dsName] = segName;
                newDsName = segName;
            }

            // note: we don't really need this fis (its copied), but we load it up
            // so we don't pass a null value to the si writer
            FieldInfos fis = SegmentReader.ReadFieldInfos(info);

            ISet<string> docStoreFiles3xOnly = Lucene3xCodec.GetDocStoreFiles(info.Info);

            IDictionary<string, string> attributes;
            // copy the attributes map, we might modify it below.
            // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something).
            if (info.Info.Attributes() == null)
            {
                attributes = new Dictionary<string, string>();
            }
            else
            {
                attributes = new Dictionary<string, string>(info.Info.Attributes());
            }
            if (docStoreFiles3xOnly != null)
            {
                // only violate the codec this way if it's preflex &
                // shares doc stores
                // change docStoreSegment to newDsName
                attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = newDsName;
            }

            //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
            // Same SI as before but we change directory, name and docStoreSegment:
            SegmentInfo newInfo = new SegmentInfo(directory, info.Info.Version, segName, info.Info.DocCount, info.Info.UseCompoundFile, info.Info.Codec, info.Info.Diagnostics, attributes);
            SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.DelCount, info.DelGen, info.FieldInfosGen);

            HashSet<string> segFiles = new HashSet<string>();

            // Build up new segment's file names.  Must do this
            // before writing SegmentInfo:
            foreach (string file in info.Files())
            {
                string newFileName;
                if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file))
                {
                    newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                }
                else
                {
                    newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                }
                segFiles.Add(newFileName);
            }
            newInfo.Files = segFiles;

            // We must rewrite the SI file because it references
            // segment name (its own name, if its 3.x, and doc
            // store segment name):
            TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
            Codec currentCodec = newInfo.Codec;
            try
            {
                currentCodec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, newInfo, fis, context);
            }
            catch (System.NotSupportedException uoe)
            {
                if (currentCodec is Lucene3xCodec)
                {
                    // OK: 3x codec cannot write a new SI file;
                    // SegmentInfos will write this on commit
                }
                else
                {
                    throw uoe;
                }
            }

            ICollection<string> siFiles = trackingDir.CreatedFiles;

            bool success = false;
            try
            {
                // Copy the segment's files
                foreach (string file in info.Files())
                {
                    string newFileName;
                    if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file))
                    {
                        newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                        if (dsFilesCopied.Contains(newFileName))
                        {
                            continue;
                        }
                        dsFilesCopied.Add(newFileName);
                    }
                    else
                    {
                        newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                    }

                    if (siFiles.Contains(newFileName))
                    {
                        // We already rewrote this above
                        continue;
                    }

                    Debug.Assert(!SlowFileExists(directory, newFileName), "file \"" + newFileName + "\" already exists; siFiles=" + siFiles);
                    Debug.Assert(!copiedFiles.Contains(file), "file \"" + file + "\" is being copied more than once");
                    copiedFiles.Add(file);
                    info.Info.Dir.Copy(directory, file, newFileName, context);
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    foreach (string file in newInfo.Files)
                    {
                        try
                        {
                            directory.DeleteFile(file);
                        }
                        catch (Exception)
                        {
                        }
                    }
                }
            }

            return newInfoPerCommit;
        }
 public override IndexOutput CreateOutput(string name, IOContext cxt)
 {
     IndexOutput indexOutput = @in.CreateOutput(name, cxt);
     if (null != CrashAfterCreateOutput_Renamed && name.Equals(CrashAfterCreateOutput_Renamed))
     {
         // CRASH!
         indexOutput.Dispose();
         if (VERBOSE)
         {
             Console.WriteLine("TEST: now crash");
             Console.WriteLine(new Exception().StackTrace);
         }
         throw new CrashingException("crashAfterCreateOutput " + CrashAfterCreateOutput_Renamed);
     }
     return indexOutput;
 }
Beispiel #19
0
        public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info,
            PostingsReaderBase postingsReader, IOContext context,
            String segmentSuffix)
        {
            _postingsReader = postingsReader;

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION),
                    context);

            var success = false;
            try
            {
                _version = ReadHeader(_input);

                // Have PostingsReader init itself
                postingsReader.Init(_input);

                // Read per-field details
                SeekDir(_input, _dirOffset);

                int numFields = _input.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException(String.Format("Invalid number of fields: {0}, Resource: {1}",
                        numFields, _input));
                }

                for (var i = 0; i < numFields; i++)
                {
                    var field = _input.ReadVInt();
                    var numTerms = _input.ReadVLong();

                    Debug.Assert(numTerms >= 0);

                    var termsStartPointer = _input.ReadVLong();
                    var fieldInfo = fieldInfos.FieldInfo(field);
                    var sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY
                        ? -1
                        : _input.ReadVLong();
                    var sumDocFreq = _input.ReadVLong();
                    var docCount = _input.ReadVInt();
                    var longsSize = _version >= BlockTermsWriter.VERSION_META_ARRAY ? _input.ReadVInt() : 0;

                    if (docCount < 0 || docCount > info.DocCount)
                    {
                        // #docs with field must be <= #docs
                        throw new CorruptIndexException(
                            String.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount,
                                _input));
                    }

                    if (sumDocFreq < docCount)
                    {
                        // #postings must be >= #docs with field
                        throw new CorruptIndexException(
                            String.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount,
                                _input));
                    }

                    if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq)
                    {
                        // #positions must be >= #postings
                        throw new CorruptIndexException(
                            String.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}",
                                sumTotalTermFreq, sumDocFreq, _input));
                    }

                    try
                    {
                        _fields.Add(fieldInfo.Name,
                            new FieldReader(fieldInfo, this, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq,
                                docCount,
                                longsSize));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate fields: {0}, Resource: {1}",
                            fieldInfo.Name, _input));
                    }

                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    _input.Dispose();
                }
            }

            _indexReader = indexReader;
        }
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
            IndexOutput output = directory.CreateOutput(fileName, context);
            bool success = false;
            try
            {
                CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
                output.WriteVInt(infos.Size());
                foreach (FieldInfo fi in infos)
                {
                    FieldInfo.IndexOptions? indexOptions = fi.FieldIndexOptions;
                    sbyte bits = 0x0;
                    if (fi.HasVectors())
                    {
                        bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms())
                    {
                        bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
                    }
                    if (fi.HasPayloads())
                    {
                        bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
                    }
                    if (fi.Indexed)
                    {
                        bits |= Lucene46FieldInfosFormat.IS_INDEXED;
                        Debug.Assert(indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads());
                        if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
                        {
                            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
                        }
                        else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);
                    output.WriteVInt(fi.Number);
                    output.WriteByte((byte)bits);

                    // pack the DV types in one byte
                    var dv = DocValuesByte(fi.DocValuesType);
                    var nrm = DocValuesByte(fi.NormType);
                    Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0);
                    var val = unchecked((sbyte)(0xff & ((nrm << 4) | dv)));
                    output.WriteByte((byte)val);
                    output.WriteLong(fi.DocValuesGen);
                    output.WriteStringStringMap(fi.Attributes());
                }
                CodecUtil.WriteFooter(output);
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
        /*
	  private class IterableAnonymousInnerClassHelper : IEnumerable<Number>
	  {
		  private readonly ReadersAndUpdates OuterInstance;

		  private Lucene.Net.Index.SegmentReader Reader;
		  private string Field;
		  private Lucene.Net.Index.NumericDocValuesFieldUpdates FieldUpdates;

		  public IterableAnonymousInnerClassHelper(ReadersAndUpdates outerInstance, Lucene.Net.Index.SegmentReader reader, string field, Lucene.Net.Index.NumericDocValuesFieldUpdates fieldUpdates)
		  {
			  this.OuterInstance = outerInstance;
			  this.Reader = reader;
			  this.Field = field;
			  this.FieldUpdates = fieldUpdates;
			  currentValues = reader.GetNumericDocValues(field);
			  docsWithField = reader.GetDocsWithField(field);
			  maxDoc = reader.MaxDoc;
			  updatesIter = fieldUpdates.Iterator();
		  }

		  internal readonly NumericDocValues currentValues;
		  internal readonly Bits docsWithField;
		  internal readonly int maxDoc;
		  internal readonly NumericDocValuesFieldUpdates.Iterator updatesIter;
		  public virtual IEnumerator<Number> GetEnumerator()
		  {
			updatesIter.Reset();
			return new IteratorAnonymousInnerClassHelper(this);
		  }

		  private class IteratorAnonymousInnerClassHelper : IEnumerator<Number>
		  {
			  private readonly IterableAnonymousInnerClassHelper OuterInstance;

			  public IteratorAnonymousInnerClassHelper(IterableAnonymousInnerClassHelper outerInstance)
			  {
                  this.OuterInstance = outerInstance;
				  curDoc = -1;
				  updateDoc = updatesIter.NextDoc();
			  }

			  internal int curDoc;
			  internal int updateDoc;

			  public virtual bool HasNext()
			  {
				return curDoc < maxDoc - 1;
			  }

			  public virtual Number Next()
			  {
				if (++curDoc >= maxDoc)
				{
				  throw new NoSuchElementException("no more documents to return values for");
				}
				if (curDoc == updateDoc) // this document has an updated value
				{
				  long? value = updatesIter.value(); // either null (unset value) or updated value
				  updateDoc = updatesIter.nextDoc(); // prepare for next round
				  return value;
				}
				else
				{
				  // no update for this document
				  Debug.Assert(curDoc < updateDoc);
				  if (currentValues != null && docsWithField.Get(curDoc))
				  {
					// only read the current value if the document had a value before
					return currentValues.Get(curDoc);
				  }
				  else
				  {
					return null;
				  }
				}
			  }

			  public virtual void Remove()
			  {
				throw new System.NotSupportedException("this iterator does not support removing elements");
			  }
		  }
	  }*/
        /*
	  private class IterableAnonymousInnerClassHelper2 : IEnumerable<BytesRef>
	  {
		  private readonly ReadersAndUpdates OuterInstance;

		  private Lucene.Net.Index.SegmentReader Reader;
		  private string Field;
		  private Lucene.Net.Index.BinaryDocValuesFieldUpdates DvFieldUpdates;

		  public IterableAnonymousInnerClassHelper2(ReadersAndUpdates outerInstance, Lucene.Net.Index.SegmentReader reader, string field, Lucene.Net.Index.BinaryDocValuesFieldUpdates dvFieldUpdates)
		  {
			  this.OuterInstance = outerInstance;
			  this.Reader = reader;
			  this.Field = field;
			  this.DvFieldUpdates = dvFieldUpdates;
			  currentValues = reader.GetBinaryDocValues(field);
			  docsWithField = reader.GetDocsWithField(field);
			  maxDoc = reader.MaxDoc;
			  updatesIter = dvFieldUpdates.Iterator();
		  }

		  internal readonly BinaryDocValues currentValues;
		  internal readonly Bits docsWithField;
		  internal readonly int maxDoc;
		  internal readonly BinaryDocValuesFieldUpdates.Iterator updatesIter;
		  public virtual IEnumerator<BytesRef> GetEnumerator()
		  {
			updatesIter.Reset();
			return new IteratorAnonymousInnerClassHelper2(this);
		  }

		  private class IteratorAnonymousInnerClassHelper2 : IEnumerator<BytesRef>
		  {
			  private readonly IterableAnonymousInnerClassHelper2 OuterInstance;

			  public IteratorAnonymousInnerClassHelper2(IterableAnonymousInnerClassHelper2 outerInstance)
			  {
                  this.OuterInstance = outerInstance;
				  curDoc = -1;
				  updateDoc = updatesIter.nextDoc();
				  scratch = new BytesRef();
			  }

			  internal int curDoc;
			  internal int updateDoc;
			  internal BytesRef scratch;

			  public virtual bool HasNext()
			  {
				return curDoc < maxDoc - 1;
			  }

			  public virtual BytesRef Next()
			  {
				if (++curDoc >= maxDoc)
				{
				  throw new NoSuchElementException("no more documents to return values for");
				}
				if (curDoc == updateDoc) // this document has an updated value
				{
				  BytesRef value = updatesIter.value(); // either null (unset value) or updated value
				  updateDoc = updatesIter.nextDoc(); // prepare for next round
				  return value;
				}
				else
				{
				  // no update for this document
				  Debug.Assert(curDoc < updateDoc);
				  if (currentValues != null && docsWithField.get(curDoc))
				  {
					// only read the current value if the document had a value before
					currentValues.get(curDoc, scratch);
					return scratch;
				  }
				  else
				  {
					return null;
				  }
				}
			  }

			  public virtual void Remove()
			  {
				throw new System.NotSupportedException("this iterator does not support removing elements");
			  }
		  }
	  }*/

        /// <summary>
        /// Returns a reader for merge. this method applies field updates if there are
        /// any and marks that this segment is currently merging.
        /// </summary>
        internal virtual SegmentReader GetReaderForMerge(IOContext context)
        {
            lock (this)
            {
                //Debug.Assert(Thread.holdsLock(Writer));
                // must execute these two statements as atomic operation, otherwise we
                // could lose updates if e.g. another thread calls writeFieldUpdates in
                // between, or the updates are applied to the obtained reader, but then
                // re-applied in IW.commitMergedDeletes (unnecessary work and potential
                // bugs).
                IsMerging = true;
                return GetReader(context);
            }
        }
        public static void Main(string[] args)
        {
            string filename = null;
            bool   extract  = false;
            string dirImpl  = null;

            int j = 0;

            while (j < args.Length)
            {
                string arg = args[j];
                if ("-extract".Equals(arg))
                {
                    extract = true;
                }
                else if ("-dir-impl".Equals(arg))
                {
                    if (j == args.Length - 1)
                    {
                        Console.WriteLine("ERROR: missing value for -dir-impl option");
                        Environment.Exit(1);
                    }
                    j++;
                    dirImpl = args[j];
                }
                else if (filename == null)
                {
                    filename = arg;
                }
                j++;
            }

            if (filename == null)
            {
                Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>");
                return;
            }

            Directory             dir     = null;
            CompoundFileDirectory cfr     = null;
            IOContext             context = IOContext.READ;

            try
            {
                File   file    = new File(filename);
                string dirname = file.AbsoluteFile.Parent;
                filename = file.Name;
                if (dirImpl == null)
                {
                    dir = FSDirectory.open(new File(dirname));
                }
                else
                {
                    dir = CommandLineUtil.newFSDirectory(dirImpl, new File(dirname));
                }

                cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false);

                string[] files = cfr.listAll();
                ArrayUtil.timSort(files);   // sort the array of filename so that the output is more readable

                for (int i = 0; i < files.Length; ++i)
                {
                    long len = cfr.fileLength(files[i]);

                    if (extract)
                    {
                        Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
                        IndexInput ii = cfr.openInput(files[i], context);

                        FileOutputStream f = new FileOutputStream(files[i]);

                        // read and write with a small buffer, which is more effective than reading byte by byte
                        sbyte[] buffer = new sbyte[1024];
                        int     chunk  = buffer.Length;
                        while (len > 0)
                        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int bufLen = (int) Math.min(chunk, len);
                            int bufLen = (int)Math.Min(chunk, len);
                            ii.readBytes(buffer, 0, bufLen);
                            f.write(buffer, 0, bufLen);
                            len -= bufLen;
                        }

                        f.close();
                        ii.close();
                    }
                    else
                    {
                        Console.WriteLine(files[i] + ": " + len + " bytes");
                    }
                }
            }
            catch (IOException ioe)
            {
                Console.WriteLine(ioe.ToString());
                Console.Write(ioe.StackTrace);
            }
            finally
            {
                try
                {
                    if (dir != null)
                    {
                        dir.close();
                    }
                    if (cfr != null)
                    {
                        cfr.close();
                    }
                }
                catch (IOException ioe)
                {
                    Console.WriteLine(ioe.ToString());
                    Console.Write(ioe.StackTrace);
                }
            }
        }
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.compressionMode = compressionMode;
            string segment = si.Name;
            bool   success = false;

            fieldInfos = fn;
            numDocs    = si.DocCount;
            ChecksumIndexInput indexStream = null;

            try
            {
                string indexStreamFN  = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
                // Load the index into memory
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX;
                version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer());
                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                long maxPointer = -1;

                if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    maxPointer = indexStream.ReadVInt64();
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(indexStream);
#pragma warning restore 612, 618
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                fieldsStream = d.OpenInput(fieldsStreamFN, context);
                if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length)
                    {
                        throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length);
                    }
                }
                else
                {
                    maxPointer = fieldsStream.Length;
                }
                this.maxPointer = maxPointer;
                string codecNameDat  = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT;
                int    fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                if (version != fieldsVersion)
                {
                    throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion);
                }
                Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer());

                if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS)
                {
                    chunkSize = fieldsStream.ReadVInt32();
                }
                else
                {
                    chunkSize = -1;
                }
                packedIntsVersion = fieldsStream.ReadVInt32();
                decompressor      = compressionMode.NewDecompressor();
                this.bytes        = new BytesRef();

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(this, indexStream);
                }
            }
        }
Beispiel #24
0
        /// <summary>
        /// TODO: javadoc </summary>
        public static IOContext NewIOContext(Random random, IOContext oldContext)
        {
            int randomNumDocs = random.Next(4192);
            int size = random.Next(512) * randomNumDocs;
            if (oldContext.FlushInfo != null)
            {
                // Always return at least the estimatedSegmentSize of
                // the incoming IOContext:
                return new IOContext(new FlushInfo(randomNumDocs, (long)Math.Max(oldContext.FlushInfo.EstimatedSegmentSize, size)));
            }
            else if (oldContext.MergeInfo != null)
            {
                // Always return at least the estimatedMergeBytes of
                // the incoming IOContext:
                return new IOContext(new MergeInfo(randomNumDocs, Math.Max(oldContext.MergeInfo.EstimatedMergeBytes, size), random.NextBoolean(), TestUtil.NextInt(random, 1, 100)));
            }
            else
            {
                // Make a totally random IOContext:
                IOContext context;
                switch (random.Next(5))
                {
                    case 0:
                        context = IOContext.DEFAULT;
                        break;

                    case 1:
                        context = IOContext.READ;
                        break;

                    case 2:
                        context = IOContext.READONCE;
                        break;

                    case 3:
                        context = new IOContext(new MergeInfo(randomNumDocs, size, true, -1));
                        break;

                    case 4:
                        context = new IOContext(new FlushInfo(randomNumDocs, size));
                        break;

                    default:
                        context = IOContext.DEFAULT;
                        break;
                }
                return context;
            }
        }
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    Debug.Assert(normsProducer != null);
                }
                else
                {
                    normsProducer = null;
                }

                // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException,
                // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could
                // this be needed because we are using unchecked??

#if !NETSTANDARD
                try
                {
#endif
                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);
#if !NETSTANDARD
            }
#pragma warning disable 168
            catch (System.AccessViolationException ave)
#pragma warning restore 168
            {
            }
#endif

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
        {
            string segment = si.Name;
            int size = si.DocCount;

            bool success = false;

            try
            {
                string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
                Tvx = d.OpenInput(idxName, context);
                int tvxVersion = CodecUtil.CheckHeader(Tvx, CODEC_NAME_INDEX, VERSION_START, VERSION_CURRENT);

                string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
                Tvd = d.OpenInput(fn, context);
                int tvdVersion = CodecUtil.CheckHeader(Tvd, CODEC_NAME_DOCS, VERSION_START, VERSION_CURRENT);
                fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
                Tvf = d.OpenInput(fn, context);
                int tvfVersion = CodecUtil.CheckHeader(Tvf, CODEC_NAME_FIELDS, VERSION_START, VERSION_CURRENT);
                Debug.Assert(HEADER_LENGTH_INDEX == Tvx.FilePointer);
                Debug.Assert(HEADER_LENGTH_DOCS == Tvd.FilePointer);
                Debug.Assert(HEADER_LENGTH_FIELDS == Tvf.FilePointer);
                Debug.Assert(tvxVersion == tvdVersion);
                Debug.Assert(tvxVersion == tvfVersion);

                NumTotalDocs = (int)(Tvx.Length() - HEADER_LENGTH_INDEX >> 4);

                this.Size_Renamed = NumTotalDocs;
                Debug.Assert(size == 0 || NumTotalDocs == size);

                this.FieldInfos = fieldInfos;
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // ensure we throw our original exception
                    catch (Exception t)
                    {
                    }
                }
            }
        }
        public override SegmentInfo Read(Directory directory, string segmentName, IOContext context)
        {
            var scratch = new BytesRef();
            string segFileName = IndexFileNames.SegmentFileName(segmentName, "",
                SimpleTextSegmentInfoFormat.SI_EXTENSION);
            ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context);
            bool success = false;
            try
            {
                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_VERSION));
                string version = ReadString(SimpleTextSegmentInfoWriter.SI_VERSION.Length, scratch);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DOCCOUNT));
                int docCount = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_DOCCOUNT.Length, scratch), CultureInfo.InvariantCulture);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_USECOMPOUND));
                bool isCompoundFile = Convert.ToBoolean(ReadString(SimpleTextSegmentInfoWriter.SI_USECOMPOUND.Length, scratch), CultureInfo.InvariantCulture);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_DIAG));
                int numDiag = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_DIAG.Length, scratch), CultureInfo.InvariantCulture);
                IDictionary<string, string> diagnostics = new Dictionary<string, string>();

                for (int i = 0; i < numDiag; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_KEY));
                    string key = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_KEY.Length, scratch);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_VALUE));
                    string value = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_VALUE.Length, scratch);
                    diagnostics[key] = value;
                }

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES));
                int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch), CultureInfo.InvariantCulture);
                var files = new HashSet<string>();

                for (int i = 0; i < numFiles; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_FILE));
                    string fileName = ReadString(SimpleTextSegmentInfoWriter.SI_FILE.Length, scratch);
                    files.Add(fileName);
                }

                SimpleTextUtil.CheckFooter(input);

                var info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null,
                    diagnostics) {Files = files};
                success = true;
                return info;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
                else
                {
                    input.Dispose();
                }
            }
        }
Beispiel #28
0
 public override TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context)
 {
     return(new AssertingTermVectorsWriter(@in.VectorsWriter(directory, segmentInfo, context)));
 }
        public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
        {
            string segment        = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si);
            int    docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si);
            int    size           = si.DocCount;

            bool success = false;

            try
            {
                if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                {
                    d = storeCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
                }
                else
                {
                    storeCFSReader = null;
                }
                string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
                tvx    = d.OpenInput(idxName, context);
                format = CheckValidFormat(tvx);
                string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
                tvd = d.OpenInput(fn, context);
                int tvdFormat = CheckValidFormat(tvd);
                fn  = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
                tvf = d.OpenInput(fn, context);
                int tvfFormat = CheckValidFormat(tvf);

                Debug.Assert(format == tvdFormat);
                Debug.Assert(format == tvfFormat);

                numTotalDocs = (int)(tvx.Length >> 4);

                if (-1 == docStoreOffset)
                {
                    this.docStoreOffset = 0;
                    this.size           = numTotalDocs;
                    Debug.Assert(size == 0 || numTotalDocs == size);
                }
                else
                {
                    this.docStoreOffset = docStoreOffset;
                    this.size           = size;
                    // Verify the file is long enough to hold all of our
                    // docs
                    Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }

                this.fieldInfos = fieldInfos;
                success         = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // keep our original exception
                    catch (Exception)
                    {
                    }
                }
            }
        }
Beispiel #30
0
        /// <summary>
        /// Adds all segments from an array of indexes into this index.
        ///
        /// <p>this may be used to parallelize batch indexing. A large document
        /// collection can be broken into sub-collections. Each sub-collection can be
        /// indexed in parallel, on a different thread, process or machine. The
        /// complete index can then be created by merging sub-collection indexes
        /// with this method.
        ///
        /// <p>
        /// <b>NOTE:</b> this method acquires the write lock in
        /// each directory, to ensure that no {@code IndexWriter}
        /// is currently open or tries to open while this is
        /// running.
        ///
        /// <p>this method is transactional in how Exceptions are
        /// handled: it does not commit a new segments_N file until
        /// all indexes are added.  this means if an Exception
        /// occurs (for example disk full), then either no indexes
        /// will have been added or they all will have been.
        ///
        /// <p>Note that this requires temporary free space in the
        /// <seealso cref="Directory"/> up to 2X the sum of all input indexes
        /// (including the starting index). If readers/searchers
        /// are open against the starting index, then temporary
        /// free space required will be higher by the size of the
        /// starting index (see <seealso cref="#forceMerge(int)"/> for details).
        ///
        /// <p>
        /// <b>NOTE:</b> this method only copies the segments of the incoming indexes
        /// and does not merge them. Therefore deleted documents are not removed and
        /// the new segments are not merged with the existing ones.
        ///
        /// <p>this requires this index not be among those to be added.
        ///
        /// <p>
        /// <b>NOTE</b>: if this method hits an OutOfMemoryError
        /// you should immediately close the writer. See <a
        /// href="#OOME">above</a> for details.
        /// </summary>
        /// <exception cref="CorruptIndexException"> if the index is corrupt </exception>
        /// <exception cref="IOException"> if there is a low-level IO error </exception>
        /// <exception cref="LockObtainFailedException"> if we were unable to
        ///   acquire the write lock in at least one directory </exception>
        public virtual void AddIndexes(params Directory[] dirs)
        {
            EnsureOpen();

            NoDupDirs(dirs);

            IEnumerable<Lock> locks = AcquireWriteLocks(dirs);

            bool successTop = false;

            try
            {
                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "flush at addIndexes(Directory...)");
                }

                Flush(false, true);

                IList<SegmentCommitInfo> infos = new List<SegmentCommitInfo>();
                bool success = false;
                try
                {
                    foreach (Directory dir in dirs)
                    {
                        if (infoStream.IsEnabled("IW"))
                        {
                            infoStream.Message("IW", "addIndexes: process directory " + dir);
                        }
                        SegmentInfos sis = new SegmentInfos(); // read infos from dir
                        sis.Read(dir);
                        HashSet<string> dsFilesCopied = new HashSet<string>();
                        IDictionary<string, string> dsNames = new Dictionary<string, string>();
                        HashSet<string> copiedFiles = new HashSet<string>();
                        foreach (SegmentCommitInfo info in sis.Segments)
                        {
                            Debug.Assert(!infos.Contains(info), "dup info dir=" + info.Info.Dir + " name=" + info.Info.Name);

                            string newSegName = NewSegmentName();

                            if (infoStream.IsEnabled("IW"))
                            {
                                infoStream.Message("IW", "addIndexes: process segment origName=" + info.Info.Name + " newName=" + newSegName + " info=" + info);
                            }

                            IOContext context = new IOContext(new MergeInfo(info.Info.DocCount, info.SizeInBytes(), true, -1));

                            foreach (FieldInfo fi in SegmentReader.ReadFieldInfos(info))
                            {
                                GlobalFieldNumberMap.AddOrGet(fi.Name, fi.Number, fi.DocValuesType);
                            }
                            infos.Add(CopySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles));
                        }
                    }
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        foreach (SegmentCommitInfo sipc in infos)
                        {
                            foreach (string file in sipc.Files())
                            {
                                try
                                {
                                    directory.DeleteFile(file);
                                }
                                catch (Exception)
                                {
                                }
                            }
                        }
                    }
                }

                lock (this)
                {
                    success = false;
                    try
                    {
                        EnsureOpen();
                        success = true;
                    }
                    finally
                    {
                        if (!success)
                        {
                            foreach (SegmentCommitInfo sipc in infos)
                            {
                                foreach (string file in sipc.Files())
                                {
                                    try
                                    {
                                        directory.DeleteFile(file);
                                    }
                                    catch (Exception)
                                    {
                                    }
                                }
                            }
                        }
                    }
                    segmentInfos.AddAll(infos);
                    Checkpoint();
                }

                successTop = true;
            }
            catch (System.OutOfMemoryException oom)
            {
                HandleOOM(oom, "addIndexes(Directory...)");
            }
            finally
            {
                if (locks != null)
                {
                    foreach (var lk in locks)
                    {
                        lk.Release();
                    }
                }

                if (successTop)
                {
                    IOUtils.Close(locks);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(locks);
                }
            }
        }
        public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si);
            int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si);
            int size = si.DocCount;
            bool success = false;
            FieldInfos = fn;
            try
            {
                if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                {
                    d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
                }
                else
                {
                    StoreCFSReader = null;
                }
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                Format = IndexStream.ReadInt();

                if (Format < FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (Format > FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                long indexSize = IndexStream.Length() - FORMAT_SIZE;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.DocStoreOffset = docStoreOffset;
                    this.Size = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    Debug.Assert(((int)(indexSize / 8)) >= size + this.DocStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.DocStoreOffset = 0;
                    this.Size = (int)(indexSize >> 3);
                    // Verify two sources of "maxDoc" agree:
                    if (this.Size != si.DocCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size + " but segmentInfo shows " + si.DocCount);
                    }
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // keep our original exception
                    catch (Exception t)
                    {
                    }
                }
            }
        }
Beispiel #32
0
        /// <summary>
        /// Merges the provided indexes into this index.
        ///
        /// <p>
        /// The provided IndexReaders are not closed.
        ///
        /// <p>
        /// See <seealso cref="#addIndexes"/> for details on transactional semantics, temporary
        /// free space required in the Directory, and non-CFS segments on an Exception.
        ///
        /// <p>
        /// <b>NOTE</b>: if this method hits an OutOfMemoryError you should immediately
        /// close the writer. See <a href="#OOME">above</a> for details.
        ///
        /// <p>
        /// <b>NOTE:</b> empty segments are dropped by this method and not added to this
        /// index.
        ///
        /// <p>
        /// <b>NOTE:</b> this method merges all given <seealso cref="IndexReader"/>s in one
        /// merge. If you intend to merge a large number of readers, it may be better
        /// to call this method multiple times, each time with a small set of readers.
        /// In principle, if you use a merge policy with a {@code mergeFactor} or
        /// {@code maxMergeAtOnce} parameter, you should pass that many readers in one
        /// call. Also, if the given readers are <seealso cref="DirectoryReader"/>s, they can be
        /// opened with {@code termIndexInterval=-1} to save RAM, since during merge
        /// the in-memory structure is not used. See
        /// <seealso cref="DirectoryReader#open(Directory, int)"/>.
        ///
        /// <p>
        /// <b>NOTE</b>: if you call <seealso cref="#close(boolean)"/> with <tt>false</tt>, which
        /// aborts all running merges, then any thread still running this method might
        /// hit a <seealso cref="MergePolicy.MergeAbortedException"/>.
        /// </summary>
        /// <exception cref="CorruptIndexException">
        ///           if the index is corrupt </exception>
        /// <exception cref="IOException">
        ///           if there is a low-level IO error </exception>
        public virtual void AddIndexes(params IndexReader[] readers)
        {
            EnsureOpen();
            int numDocs = 0;

            try
            {
                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "flush at addIndexes(IndexReader...)");
                }
                Flush(false, true);

                string mergedName = NewSegmentName();
                IList<AtomicReader> mergeReaders = new List<AtomicReader>();
                foreach (IndexReader indexReader in readers)
                {
                    numDocs += indexReader.NumDocs();
                    foreach (AtomicReaderContext ctx in indexReader.Leaves())
                    {
                        mergeReaders.Add(ctx.AtomicReader);
                    }
                }

                IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));

                // TODO: somehow we should fix this merge so it's
                // abortable so that IW.close(false) is able to stop it
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);

                SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, Codec, null);

                SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, Config_Renamed.TermIndexInterval, MergeState.CheckAbort.NONE, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge);

                if (!merger.ShouldMerge())
                {
                    return;
                }

                MergeState mergeState;
                bool success = false;
                try
                {
                    mergeState = merger.Merge(); // merge 'em
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(info.Name);
                        }
                    }
                }

                SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L);

                info.Files = new HashSet<string>(trackingDir.CreatedFiles);
                trackingDir.CreatedFiles.Clear();

                SetDiagnostics(info, SOURCE_ADDINDEXES_READERS);

                bool useCompoundFile;
                lock (this) // Guard segmentInfos
                {
                    if (StopMerges)
                    {
                        Deleter.DeleteNewFiles(infoPerCommit.Files());
                        return;
                    }
                    EnsureOpen();
                    useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, infoPerCommit);
                }

                // Now create the compound file if needed
                if (useCompoundFile)
                {
                    ICollection<string> filesToDelete = infoPerCommit.Files();
                    try
                    {
                        CreateCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context);
                    }
                    finally
                    {
                        // delete new non cfs files directly: they were never
                        // registered with IFD
                        lock (this)
                        {
                            Deleter.DeleteNewFiles(filesToDelete);
                        }
                    }
                    info.UseCompoundFile = true;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                success = false;
                try
                {
                    Codec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, info, mergeState.FieldInfos, context);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(info.Name);
                        }
                    }
                }

                info.AddFiles(trackingDir.CreatedFiles);

                // Register the new segment
                lock (this)
                {
                    if (StopMerges)
                    {
                        Deleter.DeleteNewFiles(info.Files);
                        return;
                    }
                    EnsureOpen();
                    segmentInfos.Add(infoPerCommit);
                    Checkpoint();
                }
            }
            catch (System.OutOfMemoryException oom)
            {
                HandleOOM(oom, "addIndexes(IndexReader...)");
            }
        }
        public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
            IComparer<BytesRef> termComp, String segmentSuffix, IOContext context)
        {
            _termComp = termComp;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                        FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION),
                    context);

            var success = false;

            try
            {

                _version = ReadHeader(_input);

                if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
                    CodecUtil.ChecksumEntireFile(_input);
                
                indexInterval = _input.ReadInt();
                
                if (indexInterval < 1)
                {
                    throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}",
                        indexInterval, _input));
                }

                _indexDivisor = indexDivisor;

                if (indexDivisor < 0)
                {
                    _totalIndexInterval = indexInterval;
                }
                else
                {
                    // In case terms index gets loaded, later, on demand
                    _totalIndexInterval = indexInterval*indexDivisor;
                }

                Debug.Assert(_totalIndexInterval > 0);

                SeekDir(_input, _dirOffset);

                // Read directory
                int numFields = _input.ReadVInt();

                if (numFields < 0)
                    throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields,
                        _input));

                for (int i = 0; i < numFields; i++)
                {
                    int field = _input.ReadVInt();
                    int numIndexTerms = _input.ReadVInt();
                    if (numIndexTerms < 0)
                        throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}",
                            numIndexTerms,
                            _input));

                    long termsStart = _input.ReadVLong();
                    long indexStart = _input.ReadVLong();
                    long packedIndexStart = _input.ReadVLong();
                    long packedOffsetsStart = _input.ReadVLong();

                    if (packedIndexStart < indexStart)
                        throw new CorruptIndexException(
                            String.Format(
                                "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}",
                                packedIndexStart,
                                indexStart, numIndexTerms, _input));

                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);

                    try
                    {
                        _fields.Add(fieldInfo,
                            new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart,
                                packedOffsetsStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}",
                            fieldInfo.Name,
                            _input));
                    }


                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(_input);
                }
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                        _indexLoaded = true;

                    _termBytesReader = _termBytes.Freeze(true);
                }
            }
        }
 public override IndexInput OpenInput(string name, IOContext context)
 {
     return new FaultyIndexInput(FsDir.OpenInput(name, context));
 }
 public override IndexOutput CreateOutput(string name, IOContext context)
 {
     return FsDir.CreateOutput(name, context);
 }
Beispiel #36
0
 public override TermVectorsReader VectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context)
 {
     return(new AssertingTermVectorsReader(@in.VectorsReader(directory, segmentInfo, fieldInfos, context)));
 }