Exemple #1
0
        public virtual void Split(DirectoryInfo destDir, string[] segs)
        {
            destDir.Create();
            FSDirectory  destFSDir = FSDirectory.Open(destDir);
            SegmentInfos destInfos = new SegmentInfos();

            destInfos.Counter = infos.Counter;
            foreach (string n in segs)
            {
                SegmentCommitInfo infoPerCommit = GetInfo(n);
                SegmentInfo       info          = infoPerCommit.Info;
                // Same info just changing the dir:
                SegmentInfo newInfo = new SegmentInfo(destFSDir, info.Version, info.Name, info.DocCount, info.UseCompoundFile, info.Codec, info.Diagnostics);
                destInfos.Add(new SegmentCommitInfo(newInfo, infoPerCommit.DelCount, infoPerCommit.DelGen, infoPerCommit.FieldInfosGen));
                // now copy files over
                ICollection <string> files = infoPerCommit.Files();
                foreach (string srcName in files)
                {
                    FileInfo srcFile  = new FileInfo(Path.Combine(dir.FullName, srcName));
                    FileInfo destFile = new FileInfo(Path.Combine(destDir.FullName, srcName));
                    CopyFile(srcFile, destFile);
                }
            }
            destInfos.Changed();
            destInfos.Commit(destFSDir);
            // Console.WriteLine("destDir:"+destDir.getAbsolutePath());
        }
Exemple #2
0
        /// <summary>
        /// Copies the segment files as-is into the IndexWriter's directory. </summary>
        private SegmentCommitInfo CopySegmentAsIs(SegmentCommitInfo info, string segName, IDictionary<string, string> dsNames, ISet<string> dsFilesCopied, IOContext context, ISet<string> copiedFiles)
        {
            // Determine if the doc store of this segment needs to be copied. It's
            // only relevant for segments that share doc store with others,
            // because the DS might have been copied already, in which case we
            // just want to update the DS name of this SegmentInfo.
            string dsName = Lucene3xSegmentInfoFormat.GetDocStoreSegment(info.Info);
            Debug.Assert(dsName != null);
            string newDsName;
            if (dsNames.ContainsKey(dsName))
            {
                newDsName = dsNames[dsName];
            }
            else
            {
                dsNames[dsName] = segName;
                newDsName = segName;
            }

            // note: we don't really need this fis (its copied), but we load it up
            // so we don't pass a null value to the si writer
            FieldInfos fis = SegmentReader.ReadFieldInfos(info);

            ISet<string> docStoreFiles3xOnly = Lucene3xCodec.GetDocStoreFiles(info.Info);

            IDictionary<string, string> attributes;
            // copy the attributes map, we might modify it below.
            // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something).
            if (info.Info.Attributes() == null)
            {
                attributes = new Dictionary<string, string>();
            }
            else
            {
                attributes = new Dictionary<string, string>(info.Info.Attributes());
            }
            if (docStoreFiles3xOnly != null)
            {
                // only violate the codec this way if it's preflex &
                // shares doc stores
                // change docStoreSegment to newDsName
                attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = newDsName;
            }

            //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
            // Same SI as before but we change directory, name and docStoreSegment:
            SegmentInfo newInfo = new SegmentInfo(directory, info.Info.Version, segName, info.Info.DocCount, info.Info.UseCompoundFile, info.Info.Codec, info.Info.Diagnostics, attributes);
            SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.DelCount, info.DelGen, info.FieldInfosGen);

            HashSet<string> segFiles = new HashSet<string>();

            // Build up new segment's file names.  Must do this
            // before writing SegmentInfo:
            foreach (string file in info.Files())
            {
                string newFileName;
                if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file))
                {
                    newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                }
                else
                {
                    newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                }
                segFiles.Add(newFileName);
            }
            newInfo.Files = segFiles;

            // We must rewrite the SI file because it references
            // segment name (its own name, if its 3.x, and doc
            // store segment name):
            TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
            Codec currentCodec = newInfo.Codec;
            try
            {
                currentCodec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, newInfo, fis, context);
            }
            catch (System.NotSupportedException uoe)
            {
                if (currentCodec is Lucene3xCodec)
                {
                    // OK: 3x codec cannot write a new SI file;
                    // SegmentInfos will write this on commit
                }
                else
                {
                    throw uoe;
                }
            }

            ICollection<string> siFiles = trackingDir.CreatedFiles;

            bool success = false;
            try
            {
                // Copy the segment's files
                foreach (string file in info.Files())
                {
                    string newFileName;
                    if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file))
                    {
                        newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                        if (dsFilesCopied.Contains(newFileName))
                        {
                            continue;
                        }
                        dsFilesCopied.Add(newFileName);
                    }
                    else
                    {
                        newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                    }

                    if (siFiles.Contains(newFileName))
                    {
                        // We already rewrote this above
                        continue;
                    }

                    Debug.Assert(!SlowFileExists(directory, newFileName), "file \"" + newFileName + "\" already exists; siFiles=" + siFiles);
                    Debug.Assert(!copiedFiles.Contains(file), "file \"" + file + "\" is being copied more than once");
                    copiedFiles.Add(file);
                    info.Info.Dir.Copy(directory, file, newFileName, context);
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    foreach (string file in newInfo.Files)
                    {
                        try
                        {
                            directory.DeleteFile(file);
                        }
                        catch (Exception)
                        {
                        }
                    }
                }
            }

            return newInfoPerCommit;
        }
Exemple #3
0
        /// <summary>
        /// Merges the provided indexes into this index.
        ///
        /// <p>
        /// The provided IndexReaders are not closed.
        ///
        /// <p>
        /// See <seealso cref="#addIndexes"/> for details on transactional semantics, temporary
        /// free space required in the Directory, and non-CFS segments on an Exception.
        ///
        /// <p>
        /// <b>NOTE</b>: if this method hits an OutOfMemoryError you should immediately
        /// close the writer. See <a href="#OOME">above</a> for details.
        ///
        /// <p>
        /// <b>NOTE:</b> empty segments are dropped by this method and not added to this
        /// index.
        ///
        /// <p>
        /// <b>NOTE:</b> this method merges all given <seealso cref="IndexReader"/>s in one
        /// merge. If you intend to merge a large number of readers, it may be better
        /// to call this method multiple times, each time with a small set of readers.
        /// In principle, if you use a merge policy with a {@code mergeFactor} or
        /// {@code maxMergeAtOnce} parameter, you should pass that many readers in one
        /// call. Also, if the given readers are <seealso cref="DirectoryReader"/>s, they can be
        /// opened with {@code termIndexInterval=-1} to save RAM, since during merge
        /// the in-memory structure is not used. See
        /// <seealso cref="DirectoryReader#open(Directory, int)"/>.
        ///
        /// <p>
        /// <b>NOTE</b>: if you call <seealso cref="#close(boolean)"/> with <tt>false</tt>, which
        /// aborts all running merges, then any thread still running this method might
        /// hit a <seealso cref="MergePolicy.MergeAbortedException"/>.
        /// </summary>
        /// <exception cref="CorruptIndexException">
        ///           if the index is corrupt </exception>
        /// <exception cref="IOException">
        ///           if there is a low-level IO error </exception>
        public virtual void AddIndexes(params IndexReader[] readers)
        {
            EnsureOpen();
            int numDocs = 0;

            try
            {
                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "flush at addIndexes(IndexReader...)");
                }
                Flush(false, true);

                string mergedName = NewSegmentName();
                IList<AtomicReader> mergeReaders = new List<AtomicReader>();
                foreach (IndexReader indexReader in readers)
                {
                    numDocs += indexReader.NumDocs();
                    foreach (AtomicReaderContext ctx in indexReader.Leaves())
                    {
                        mergeReaders.Add(ctx.AtomicReader);
                    }
                }

                IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));

                // TODO: somehow we should fix this merge so it's
                // abortable so that IW.close(false) is able to stop it
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);

                SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, Codec, null);

                SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, Config_Renamed.TermIndexInterval, MergeState.CheckAbort.NONE, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge);

                if (!merger.ShouldMerge())
                {
                    return;
                }

                MergeState mergeState;
                bool success = false;
                try
                {
                    mergeState = merger.Merge(); // merge 'em
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(info.Name);
                        }
                    }
                }

                SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L);

                info.Files = new HashSet<string>(trackingDir.CreatedFiles);
                trackingDir.CreatedFiles.Clear();

                SetDiagnostics(info, SOURCE_ADDINDEXES_READERS);

                bool useCompoundFile;
                lock (this) // Guard segmentInfos
                {
                    if (StopMerges)
                    {
                        Deleter.DeleteNewFiles(infoPerCommit.Files());
                        return;
                    }
                    EnsureOpen();
                    useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, infoPerCommit);
                }

                // Now create the compound file if needed
                if (useCompoundFile)
                {
                    ICollection<string> filesToDelete = infoPerCommit.Files();
                    try
                    {
                        CreateCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context);
                    }
                    finally
                    {
                        // delete new non cfs files directly: they were never
                        // registered with IFD
                        lock (this)
                        {
                            Deleter.DeleteNewFiles(filesToDelete);
                        }
                    }
                    info.UseCompoundFile = true;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                success = false;
                try
                {
                    Codec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, info, mergeState.FieldInfos, context);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(info.Name);
                        }
                    }
                }

                info.AddFiles(trackingDir.CreatedFiles);

                // Register the new segment
                lock (this)
                {
                    if (StopMerges)
                    {
                        Deleter.DeleteNewFiles(info.Files);
                        return;
                    }
                    EnsureOpen();
                    segmentInfos.Add(infoPerCommit);
                    Checkpoint();
                }
            }
            catch (System.OutOfMemoryException oom)
            {
                HandleOOM(oom, "addIndexes(IndexReader...)");
            }
        }
        /// <summary>
        /// Flush all pending docs to a new segment </summary>
        internal virtual FlushedSegment Flush()
        {
            Debug.Assert(numDocsInRAM > 0);
            Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush");
            SegmentInfo_Renamed.DocCount = numDocsInRAM;
            SegmentWriteState flushState = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed())));
            double startMBUsed = BytesUsed() / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (PendingUpdates.DocIDs.Count > 0)
            {
                flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in PendingUpdates.DocIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count;
                PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                PendingUpdates.DocIDs.Clear();
            }

            if (Aborting)
            {
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return null;
            }

            if (InfoStream.IsEnabled("DWPT"))
            {
                InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                Consumer.Flush(flushState);
                PendingUpdates.Terms.Clear();
                SegmentInfo_Renamed.Files = new HashSet<string>(Directory.CreatedFiles);

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L);
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs"));
                    InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files());
                    InfoStream.Message("DWPT", "flushed codec=" + Codec);
                }

                BufferedUpdates segmentDeletes;
                if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0)
                {
                    PendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = PendingUpdates;
                }

                if (InfoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0;
                    InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf));
                }

                Debug.Assert(SegmentInfo_Renamed != null);

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return fs;
            }
            finally
            {
                if (!success)
                {
                    Abort(FilesToDelete);
                }
            }
        }
        /// <summary>
        /// Flush all pending docs to a new segment </summary>
        internal virtual FlushedSegment Flush()
        {
            Debug.Assert(numDocsInRAM > 0);
            Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush");
            SegmentInfo_Renamed.DocCount = numDocsInRAM;
            SegmentWriteState flushState  = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed())));
            double            startMBUsed = BytesUsed() / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (PendingUpdates.DocIDs.Count > 0)
            {
                flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in PendingUpdates.DocIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count;
                PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                PendingUpdates.DocIDs.Clear();
            }

            if (Aborting)
            {
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return(null);
            }

            if (InfoStream.IsEnabled("DWPT"))
            {
                InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                Consumer.Flush(flushState);
                PendingUpdates.Terms.Clear();
                SegmentInfo_Renamed.Files = new HashSet <string>(Directory.CreatedFiles);

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L);
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs"));
                    InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files());
                    InfoStream.Message("DWPT", "flushed codec=" + Codec);
                }

                BufferedUpdates segmentDeletes;
                if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0)
                {
                    PendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = PendingUpdates;
                }

                if (InfoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0;
                    InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf));
                }

                Debug.Assert(SegmentInfo_Renamed != null);

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return(fs);
            }
            finally
            {
                if (!success)
                {
                    Abort(FilesToDelete);
                }
            }
        }