protected internal virtual IndexWriterConfig GetConfig(Random random, IndexDeletionPolicy dp) { IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); if (dp != null) { conf.SetIndexDeletionPolicy(dp); } return conf; }
public RavenIndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, IndexWriter.MaxFieldLength mfl, int maximumNumberOfWritesBeforeRecreate, IndexWriter.IndexReaderWarmer indexReaderWarmer) { directory = d; analyzer = a; indexDeletionPolicy = deletionPolicy; maxFieldLength = mfl; _indexReaderWarmer = indexReaderWarmer; this.maximumNumberOfWritesBeforeRecreate = maximumNumberOfWritesBeforeRecreate; RecreateIfNecessary(); }
public LuceneIndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, IndexWriter.MaxFieldLength mfl, IndexWriter.IndexReaderWarmer indexReaderWarmer, DocumentDatabase documentDatabase, IState state) { _directory = d; _analyzer = a; _indexDeletionPolicy = deletionPolicy; _maxFieldLength = mfl; _indexReaderWarmer = indexReaderWarmer; _logger = LoggingSource.Instance.GetLogger <LuceneIndexWriter>(documentDatabase.Name); RecreateIndexWriter(state); }
/// <summary>Construct reading the named set of readers. </summary> internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = sis; this.deletionPolicy = deletionPolicy; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: SupportClass.CollectionsHelper.AddAllIfNotContains(synced, sis.Files(directory, true)); } // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. SegmentReader[] readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { bool success = false; try { readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor); success = true; } finally { if (!success) { // Close all readers we had opened: for (i++; i < sis.Count; i++) { try { readers[i].Close(); } catch (System.Exception ignore) { // keep going - we want to clean up as much as possible } } } } } Initialize(readers); }
public RavenIndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, IndexWriter.MaxFieldLength mfl, int maximumNumberOfWritesBeforeRecreate, IndexWriter.IndexReaderWarmer indexReaderWarmer) { directory = d; analyzer = a; indexDeletionPolicy = deletionPolicy; maxFieldLength = mfl; _indexReaderWarmer = indexReaderWarmer; this.maximumNumberOfWritesBeforeRecreate = maximumNumberOfWritesBeforeRecreate; forceCommitDoc = new Document(); forceCommitDoc.Add(forceCommitField); RecreateIfNecessary(force: true); }
/// <summary> /// Create index snapshot iterator for a writable index. </summary> /// <param name="indexFolder"> index location folder </param> /// <param name="indexWriter"> index writer </param> /// <returns> index file name iterator </returns> /// <exception cref="IOException"> </exception> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#: //ORIGINAL LINE: public static org.neo4j.graphdb.ResourceIterator<java.io.File> forIndex(java.io.File indexFolder, org.apache.lucene.index.IndexWriter indexWriter) throws java.io.IOException public static ResourceIterator <File> ForIndex(File indexFolder, IndexWriter indexWriter) { IndexDeletionPolicy deletionPolicy = indexWriter.Config.IndexDeletionPolicy; if (deletionPolicy is SnapshotDeletionPolicy) { SnapshotDeletionPolicy policy = ( SnapshotDeletionPolicy )deletionPolicy; return(HasCommits(indexWriter) ? new WritableIndexSnapshotFileIterator(indexFolder, policy) : emptyResourceIterator()); } else { //JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method: throw new UnsupportedIndexDeletionPolicy("Can't perform index snapshot with specified index deletion " + "policy: " + deletionPolicy.GetType().FullName + ". " + "Only " + typeof(SnapshotDeletionPolicy).FullName + " is " + "supported"); } }
public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy) { InitBlock(); Init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH, null, null); }
/// <summary>Expert: returns an IndexReader reading the index in /// the given Directory, using a specific commit and with /// a custom {@link IndexDeletionPolicy}. You should pass /// readOnly=true, since it gives much better concurrent /// performance, unless you intend to do write operations /// (delete documents or change norms) with the reader. /// </summary> /// <param name="commit">the specific {@link IndexCommit} to open; /// see {@link IndexReader#listCommits} to list all commits /// in a directory /// </param> /// <param name="deletionPolicy">a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see {@link IndexWriter} for details. /// </param> /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader /// </param> /// <param name="termInfosIndexDivisor">Subsambles which indexed /// terms are loaded into RAM. This has the same effect as {@link /// IndexWriter#setTermIndexInterval} except that setting /// must be done at indexing time while this setting can be /// set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce /// memory usage, at the expense of higher latency when /// loading a TermInfo. The default value is 1. Set this /// to -1 to skip loading the terms index entirely. /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { return Open(commit.GetDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); }
public TimeTrackingIndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IState state) : base(d, a, deletionPolicy, mfl, state) { }
/// <summary> Initialize the deleter: find all previous commits in /// the Directory, incref the files they reference, call /// the policy to let it delete commits. This will remove /// any files not referenced by any of the commits. /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, HashSet <string> synced) { this.docWriter = docWriter; this.infoStream = infoStream; this.synced = synced; if (infoStream != null) { Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); } this.policy = policy; this.directory = directory; // First pass: walk the files and initialize our ref // counts: long currentGen = segmentInfos.Generation; IndexFileNameFilter filter = IndexFileNameFilter.Filter; System.String[] files = directory.ListAll(); CommitPoint currentCommitPoint = null; for (int i = 0; i < files.Length; i++) { System.String fileName = files[i]; if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) { // Add this file to refCounts with initial count 0: GetRefCount(fileName); if (fileName.StartsWith(IndexFileNames.SEGMENTS)) { // This is a commit (segments or segments_N), and // it's valid (<= the max gen). Load it, then // incref all files it refers to: if (infoStream != null) { Message("init: load commit \"" + fileName + "\""); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, fileName); } catch (System.IO.FileNotFoundException) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if (infoStream != null) { Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); } sis = null; } catch (System.IO.IOException) { if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen) { throw; } else { // Most likely we are opening an index that // has an aborted "future" commit, so suppress // exc in this case sis = null; } } if (sis != null) { CommitPoint commitPoint = new CommitPoint(this, commitsToDelete, directory, sis); if (sis.Generation == segmentInfos.Generation) { currentCommitPoint = commitPoint; } commits.Add(commitPoint); IncRef(sis, true); if (lastSegmentInfos == null || sis.Generation > lastSegmentInfos.Generation) { lastSegmentInfos = sis; } } } } } if (currentCommitPoint == null) { // We did not in fact see the segments_N file // corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds // the write lock. This can happen when the directory // listing was stale (eg when index accessed via NFS // client with stale directory listing cache). So we // try now to explicitly open this commit point: SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, segmentInfos.GetCurrentSegmentFileName()); } catch (System.IO.IOException) { throw new CorruptIndexException("failed to locate current segments_N file"); } if (infoStream != null) { Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName()); } currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis); commits.Add(currentCommitPoint); IncRef(sis, true); } // We keep commits list in sorted order (oldest to newest): commits.Sort(); // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. foreach (KeyValuePair <string, RefCount> entry in refCounts) { string fileName = entry.Key; RefCount rc = refCounts[fileName]; if (0 == rc.count) { if (infoStream != null) { Message("init: removing unreferenced file \"" + fileName + "\""); } DeleteFile(fileName); } } // Finally, give policy a chance to remove things on // startup: policy.OnInit(commits); // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit Checkpoint(segmentInfos, false); startingCommitDeleted = currentCommitPoint.IsDeleted; DeleteCommits(); }
/// <summary>Expert: returns an IndexReader reading the index in /// the given Directory, with a custom {@link /// IndexDeletionPolicy}. You should pass readOnly=true, /// since it gives much better concurrent performance, /// unless you intend to do write operations (delete /// documents or change norms) with the reader. /// </summary> /// <param name="directory">the index directory /// </param> /// <param name="deletionPolicy">a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see {@link IndexWriter} for details. /// </param> /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader /// </param> /// <param name="termInfosIndexDivisor">Subsamples which indexed /// terms are loaded into RAM. This has the same effect as {@link /// IndexWriter#setTermIndexInterval} except that setting /// must be done at indexing time while this setting can be /// set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce /// memory usage, at the expense of higher latency when /// loading a TermInfo. The default value is 1. Set this /// to -1 to skip loading the terms index entirely. /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { return Open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); }
private void Init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, int maxFieldLength, DocumentsWriter.IndexingChain indexingChain, IndexCommit commit) { if (IndexReader.IndexExists(d)) { Init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit); } else { Init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit); } }
/// <summary> /// <seealso cref="PersistentSnapshotDeletionPolicy"/> wraps another /// <seealso cref="IndexDeletionPolicy"/> to enable flexible /// snapshotting, passing <seealso cref="OpenMode#CREATE_OR_APPEND"/> /// by default. /// </summary> /// <param name="primary"> /// the <seealso cref="IndexDeletionPolicy"/> that is used on non-snapshotted /// commits. Snapshotted commits, by definition, are not deleted until /// explicitly released via <seealso cref="#release"/>. </param> /// <param name="dir"> /// the <seealso cref="Directory"/> which will be used to persist the snapshots /// information. </param> public PersistentSnapshotDeletionPolicy(IndexDeletionPolicy primary, Directory dir) : this(primary, dir, OpenMode.CREATE_OR_APPEND) { }
/// <summary> /// Expert: allows an optional <seealso cref="IndexDeletionPolicy"/> implementation to be /// specified. You can use this to control when prior commits are deleted from /// the index. The default policy is <seealso cref="KeepOnlyLastCommitDeletionPolicy"/> /// which removes all prior commits as soon as a new commit is done (this /// matches behavior before 2.2). Creating your own policy can allow you to /// explicitly keep previous "point in time" commits alive in the index for /// some time, to allow readers to refresh to the new commit without having the /// old commit deleted out from under them. this is necessary on filesystems /// like NFS that do not support "delete on last close" semantics, which /// Lucene's "point in time" search normally relies on. /// <p> /// <b>NOTE:</b> the deletion policy cannot be null. /// /// <p>Only takes effect when IndexWriter is first created. /// </summary> public IndexWriterConfig SetIndexDeletionPolicy(IndexDeletionPolicy deletionPolicy) { if (deletionPolicy == null) { throw new System.ArgumentException("indexDeletionPolicy must not be null"); } this.delPolicy = deletionPolicy; return this; }
/// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" /> /// and <see cref="DocumentsWriter.IndexingChain" />, /// for the index in <c>d</c>. /// Text will be analyzed with <c>a</c>. If /// <c>create</c> is true, then a new, empty index /// will be created in <c>d</c>, replacing the index /// already there, if any. /// /// </summary> /// <param name="d">the index directory /// </param> /// <param name="a">the analyzer to use /// </param> /// <param name="create"><c>true</c> to create the index or overwrite /// the existing one; <c>false</c> to append to the existing /// index /// </param> /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> /// </param> /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />. /// </param> /// <param name="indexingChain">the <see cref="DocConsumer" /> chain to be used to /// process documents /// </param> /// <param name="commit">which commit to open /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<c>write.lock</c> could not /// be obtained) /// </summary> /// <throws> IOException if the directory cannot be read/written to, or </throws> /// <summary> if it does not exist and <c>create</c> is /// <c>false</c> or if there is any other low-level /// IO error /// </summary> internal IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, DocumentsWriter.IndexingChain indexingChain, IndexCommit commit) { InitBlock(); Init(d, a, create, deletionPolicy, mfl.Limit, indexingChain, commit); }
internal static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor) { return (IndexReader) new AnonymousClassFindSegmentsFile(readOnly, deletionPolicy, termInfosIndexDivisor, directory).Run(commit); }
internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor) : base(directory, sis, deletionPolicy, true, termInfosIndexDivisor) { }
/// <summary> Expert: constructs an IndexWriter with a custom {@link /// IndexDeletionPolicy} and {@link IndexingChain}, /// for the index in <code>d</code>. /// Text will be analyzed with <code>a</code>. If /// <code>create</code> is true, then a new, empty index /// will be created in <code>d</code>, replacing the index /// already there, if any. /// /// <p/><b>NOTE</b>: autoCommit (see <a /// href="#autoCommit">above</a>) is set to false with this /// constructor. /// /// </summary> /// <param name="d">the index directory /// </param> /// <param name="a">the analyzer to use /// </param> /// <param name="create"><code>true</code> to create the index or overwrite /// the existing one; <code>false</code> to append to the existing /// index /// </param> /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> /// </param> /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See {@link Lucene.Net.Index.IndexWriter.MaxFieldLength}. /// </param> /// <param name="indexingChain">the {@link DocConsumer} chain to be used to /// process documents /// </param> /// <param name="commit">which commit to open /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if the directory cannot be read/written to, or </throws> /// <summary> if it does not exist and <code>create</code> is /// <code>false</code> or if there is any other low-level /// IO error /// </summary> internal IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit) { InitBlock(); Init(d, a, create, false, deletionPolicy, false, mfl.GetLimit(), indexingChain, commit); }
/// <summary> Expert: constructs an IndexWriter with a custom {@link /// IndexDeletionPolicy}, for the index in <code>d</code>, /// first creating it if it does not already exist. Text /// will be analyzed with <code>a</code>. /// /// <p/><b>NOTE</b>: autoCommit (see <a /// href="#autoCommit">above</a>) is set to false with this /// constructor. /// /// </summary> /// <param name="d">the index directory /// </param> /// <param name="a">the analyzer to use /// </param> /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> /// </param> /// <param name="mfl">whether or not to limit field lengths /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if the directory cannot be </throws> /// <summary> read/written to or if there is any other low-level /// IO error /// </summary> public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) { InitBlock(); Init(d, a, false, deletionPolicy, false, mfl.GetLimit(), null, null); }
private void InitBlock(bool readOnly, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor) { this.readOnly = readOnly; this.deletionPolicy = deletionPolicy; this.termInfosIndexDivisor = termInfosIndexDivisor; }
public static IndexWriterConfig CreateWriterConfig(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) { // :Post-Release-Update-Version.LUCENE_XY: LuceneVersion version = (LuceneVersion)Enum.Parse(typeof(LuceneVersion), config.Get("writer.version", LuceneVersion.LUCENE_48.ToString())); IndexWriterConfig iwConf = new IndexWriterConfig(version, runData.Analyzer); iwConf.OpenMode = mode; IndexDeletionPolicy indexDeletionPolicy = GetIndexDeletionPolicy(config); iwConf.IndexDeletionPolicy = indexDeletionPolicy; if (commit != null) { iwConf.IndexCommit = commit; } string mergeScheduler = config.Get("merge.scheduler", "Lucene.Net.Index.ConcurrentMergeScheduler, Lucene.Net"); Type mergeSchedulerType = Type.GetType(mergeScheduler); if (mergeSchedulerType is null) { throw RuntimeException.Create("Unrecognized merge scheduler type '" + mergeScheduler + "'"); // LUCENENET: We don't get an exception in this case, so throwing one for compatibility } else if (mergeSchedulerType.Equals(typeof(NoMergeScheduler))) { iwConf.MergeScheduler = NoMergeScheduler.INSTANCE; } else { try { iwConf.MergeScheduler = (IMergeScheduler)Activator.CreateInstance(mergeSchedulerType); } catch (Exception e) when(e.IsException()) { throw RuntimeException.Create("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e); } if (mergeScheduler.Equals("Lucene.Net.Index.ConcurrentMergeScheduler", StringComparison.Ordinal)) { ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler)iwConf.MergeScheduler; int maxThreadCount = config.Get("concurrent.merge.scheduler.max.thread.count", ConcurrentMergeScheduler.DEFAULT_MAX_THREAD_COUNT); int maxMergeCount = config.Get("concurrent.merge.scheduler.max.merge.count", ConcurrentMergeScheduler.DEFAULT_MAX_MERGE_COUNT); cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount); } } string defaultCodec = config.Get("default.codec", null); if (defaultCodec != null) { try { Type clazz = Type.GetType(defaultCodec); iwConf.Codec = (Codec)Activator.CreateInstance(clazz); } catch (Exception e) when(e.IsException()) { throw RuntimeException.Create("Couldn't instantiate Codec: " + defaultCodec, e); } } string mergePolicy = config.Get("merge.policy", "Lucene.Net.Index.LogByteSizeMergePolicy, Lucene.Net"); bool isCompound = config.Get("compound", true); Type mergePolicyType = Type.GetType(mergePolicy); if (mergePolicyType is null) { throw RuntimeException.Create("Unrecognized merge policy type '" + mergePolicy + "'"); // LUCENENET: We don't get an exception in this case, so throwing one for compatibility } else if (mergePolicyType.Equals(typeof(NoMergePolicy))) { iwConf.MergePolicy = isCompound ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES; } else { try { iwConf.MergePolicy = (MergePolicy)Activator.CreateInstance(mergePolicyType); } catch (Exception e) when(e.IsException()) { throw RuntimeException.Create("unable to instantiate class '" + mergePolicy + "' as merge policy", e); } iwConf.MergePolicy.NoCFSRatio = isCompound ? 1.0 : 0.0; if (iwConf.MergePolicy is LogMergePolicy logMergePolicy) { logMergePolicy.MergeFactor = config.Get("merge.factor", OpenIndexTask.DEFAULT_MERGE_PFACTOR); } } double ramBuffer = config.Get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB); int maxBuffered = config.Get("max.buffered", OpenIndexTask.DEFAULT_MAX_BUFFERED); if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) { iwConf.RAMBufferSizeMB = ramBuffer; iwConf.MaxBufferedDocs = maxBuffered; } else { iwConf.MaxBufferedDocs = maxBuffered; iwConf.RAMBufferSizeMB = ramBuffer; } return(iwConf); }
private void Init(Directory d, Analyzer a, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit) { if (IndexReader.IndexExists(d)) { Init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain, commit); } else { Init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain, commit); } }
/// <summary> Initialize the deleter: find all previous commits in /// the Directory, incref the files they reference, call /// the policy to let it delete commits. This will remove /// any files not referenced by any of the commits. /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, System.Collections.Generic.Dictionary<string, string> synced) { this.docWriter = docWriter; this.infoStream = infoStream; this.synced = synced; if (infoStream != null) { Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); } this.policy = policy; this.directory = directory; // First pass: walk the files and initialize our ref // counts: long currentGen = segmentInfos.GetGeneration(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); System.String[] files = directory.ListAll(); CommitPoint currentCommitPoint = null; for (int i = 0; i < files.Length; i++) { System.String fileName = files[i]; if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) { // Add this file to refCounts with initial count 0: GetRefCount(fileName); if (fileName.StartsWith(IndexFileNames.SEGMENTS)) { // This is a commit (segments or segments_N), and // it's valid (<= the max gen). Load it, then // incref all files it refers to: if (infoStream != null) { Message("init: load commit \"" + fileName + "\""); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, fileName); } catch (System.IO.FileNotFoundException e) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if (infoStream != null) { Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); } sis = null; } catch (System.IO.IOException e) { if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen) { throw e; } else { // Most likely we are opening an index that // has an aborted "future" commit, so suppress // exc in this case sis = null; } } if (sis != null) { CommitPoint commitPoint = new CommitPoint(this,commitsToDelete, directory, sis); if (sis.GetGeneration() == segmentInfos.GetGeneration()) { currentCommitPoint = commitPoint; } commits.Add(commitPoint); IncRef(sis, true); if (lastSegmentInfos == null || sis.GetGeneration() > lastSegmentInfos.GetGeneration()) { lastSegmentInfos = sis; } } } } } if (currentCommitPoint == null) { // We did not in fact see the segments_N file // corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds // the write lock. This can happen when the directory // listing was stale (eg when index accessed via NFS // client with stale directory listing cache). So we // try now to explicitly open this commit point: SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, segmentInfos.GetCurrentSegmentFileName()); } catch (System.IO.IOException e) { throw new CorruptIndexException("failed to locate current segments_N file"); } if (infoStream != null) Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName()); currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis); commits.Add(currentCommitPoint); IncRef(sis, true); } // We keep commits list in sorted order (oldest to newest): commits.Sort(); // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. System.Collections.Generic.IEnumerator<System.Collections.Generic.KeyValuePair<System.String, RefCount>> it = refCounts.GetEnumerator(); while (it.MoveNext()) { System.String fileName = (System.String) it.Current.Key; RefCount rc = (RefCount) refCounts[fileName]; if (0 == rc.count) { if (infoStream != null) { Message("init: removing unreferenced file \"" + fileName + "\""); } DeleteFile(fileName); } } // Finally, give policy a chance to remove things on // startup: policy.OnInit(commits); // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit Checkpoint(segmentInfos, false); startingCommitDeleted = currentCommitPoint.IsDeleted(); DeleteCommits(); }
public SnapshotDeletionPolicy(IndexDeletionPolicy primary) { this.primary = primary; }
/// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" /> ///, for the index in <c>d</c>. /// Text will be analyzed with <c>a</c>. If /// <c>create</c> is true, then a new, empty index /// will be created in <c>d</c>, replacing the index /// already there, if any. /// /// </summary> /// <param name="d">the index directory /// </param> /// <param name="a">the analyzer to use /// </param> /// <param name="create"><c>true</c> to create the index or overwrite /// the existing one; <c>false</c> to append to the existing /// index /// </param> /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> /// </param> /// <param name="mfl"><see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />, whether or not to limit field lengths. Value is in number of terms/tokens /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<c>write.lock</c> could not /// be obtained) /// </summary> /// <throws> IOException if the directory cannot be read/written to, or </throws> /// <summary> if it does not exist and <c>create</c> is /// <c>false</c> or if there is any other low-level /// IO error /// </summary> public IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) { InitBlock(); Init(d, a, create, deletionPolicy, mfl.Limit, null, null); }
/// <summary> Expert: constructs an IndexWriter on specific commit /// point, with a custom <see cref="IndexDeletionPolicy" />, for /// the index in <c>d</c>. Text will be analyzed /// with <c>a</c>. /// /// <p/> This is only meaningful if you've used a <see cref="IndexDeletionPolicy" /> /// in that past that keeps more than /// just the last commit. /// /// <p/>This operation is similar to <see cref="Rollback()" />, /// except that method can only rollback what's been done /// with the current instance of IndexWriter since its last /// commit, whereas this method can rollback to an /// arbitrary commit point from the past, assuming the /// <see cref="IndexDeletionPolicy" /> has preserved past /// commits. /// /// </summary> /// <param name="d">the index directory /// </param> /// <param name="a">the analyzer to use /// </param> /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> /// </param> /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />. /// </param> /// <param name="commit">which commit to open /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<c>write.lock</c> could not /// be obtained) /// </summary> /// <throws> IOException if the directory cannot be read/written to, or </throws> /// <summary> if it does not exist and <c>create</c> is /// <c>false</c> or if there is any other low-level /// IO error /// </summary> public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit) { InitBlock(); Init(d, a, false, deletionPolicy, mfl.Limit, null, commit); }
public virtual void SetDeletionPolicy(IndexDeletionPolicy deletionPolicy) { this.deletionPolicy = deletionPolicy; }
private void Init(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, DocumentsWriter.IndexingChain indexingChain, IndexCommit commit) { directory = d; analyzer = a; SetMessageID(defaultInfoStream); this.maxFieldLength = maxFieldLength; if (indexingChain == null) indexingChain = DocumentsWriter.DefaultIndexingChain; if (create) { // Clear the write lock in case it's leftover: directory.ClearLock(WRITE_LOCK_NAME); } Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME); if (!writeLock.Obtain(writeLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + writeLock); } this.writeLock = writeLock; // save it bool success = false; try { if (create) { // Try to read first. This is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: bool doCommit; try { segmentInfos.Read(directory); segmentInfos.Clear(); doCommit = false; } catch (System.IO.IOException) { // Likely this means it's a fresh directory doCommit = true; } if (doCommit) { // Only commit if there is no segments file // in this dir already. segmentInfos.Commit(directory); synced.UnionWith(segmentInfos.Files(directory, true)); } else { // Record that we have a change (zero out all // segments) pending: changeCount++; } } else { segmentInfos.Read(directory); if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to // preserve write-once. This is important if // readers are open against the future commit // points. if (commit.Directory != directory) throw new System.ArgumentException("IndexCommit's directory doesn't match my directory"); SegmentInfos oldInfos = new SegmentInfos(); oldInfos.Read(directory, commit.SegmentsFileName); segmentInfos.Replace(oldInfos); changeCount++; if (infoStream != null) Message("init: loaded commit \"" + commit.SegmentsFileName + "\""); } // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(segmentInfos.Files(directory, true)); } SetRollbackSegmentInfos(segmentInfos); docWriter = new DocumentsWriter(directory, this, indexingChain); docWriter.SetInfoStream(infoStream); docWriter.SetMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: deleter = new IndexFileDeleter(directory, deletionPolicy == null?new KeepOnlyLastCommitDeletionPolicy():deletionPolicy, segmentInfos, infoStream, docWriter, synced); if (deleter.startingCommitDeleted) // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. changeCount++; PushMaxBufferedDocs(); if (infoStream != null) { Message("init: create=" + create); MessageState(); } success = true; } finally { if (!success) { if (infoStream != null) { Message("init: hit exception on init; releasing write lock"); } try { writeLock.Release(); } catch (Exception) { // don't mask the original exception } writeLock = null; } } }
internal static DirectoryIndexReader Open(Directory directory, bool closeDirectory, IndexDeletionPolicy deletionPolicy) { return Open(directory, closeDirectory, deletionPolicy, null, false); }
/// <summary> /// Builder method for <see cref="IndexWriterConfig.IndexDeletionPolicy"/>. /// </summary> /// <param name="config">this <see cref="IndexWriterConfig"/> instance</param> /// <param name="deletionPolicy"></param> /// <returns>this <see cref="IndexWriterConfig"/> instance</returns> public static IndexWriterConfig SetIndexDeletionPolicy(this IndexWriterConfig config, IndexDeletionPolicy deletionPolicy) { config.IndexDeletionPolicy = deletionPolicy; return(config); }
internal static DirectoryIndexReader Open(Directory directory, bool closeDirectory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly) { SegmentInfos.FindSegmentsFile finder = new AnonymousClassFindSegmentsFile(closeDirectory, deletionPolicy, directory, readOnly); if (commit == null) return (DirectoryIndexReader) finder.Run(); else { if (directory != commit.GetDirectory()) throw new System.IO.IOException("the specified commit does not match the specified Directory"); // this can and will directly throw IOException if the specified commit point has been deleted return (DirectoryIndexReader)finder.DoBody(commit.GetSegmentsFileName()); } }
/// <summary>Expert: returns an IndexReader reading the index in /// the given Directory, with a custom {@link /// IndexDeletionPolicy}. You should pass readOnly=true, /// since it gives much better concurrent performance, /// unless you intend to do write operations (delete /// documents or change norms) with the reader. /// </summary> /// <param name="directory">the index directory /// </param> /// <param name="deletionPolicy">a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see {@link IndexWriter} for details. /// </param> /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly) { return Open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); }
/// <summary>Expert: returns an IndexReader reading the index in /// the given Directory, using a specific commit and with /// a custom {@link IndexDeletionPolicy}. You should pass /// readOnly=true, since it gives much better concurrent /// performance, unless you intend to do write operations /// (delete documents or change norms) with the reader. /// </summary> /// <param name="commit">the specific {@link IndexCommit} to open; /// see {@link IndexReader#listCommits} to list all commits /// in a directory /// </param> /// <param name="deletionPolicy">a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see {@link IndexWriter} for details. /// </param> /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly) { return Open(commit.GetDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); }
/// <summary>Expert: returns an IndexReader reading the index in the given /// Directory, with a custom {@link IndexDeletionPolicy}. /// </summary> /// <param name="directory">the index directory /// </param> /// <param name="deletionPolicy">a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see {@link IndexWriter} for details. /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy) { return Open(directory, false, deletionPolicy); }
private static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor) { return DirectoryReader.Open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); }
private static IndexReader Open(Directory directory, bool closeDirectory, IndexDeletionPolicy deletionPolicy) { return DirectoryIndexReader.Open(directory, closeDirectory, deletionPolicy); }