private void Init(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, DocumentsWriter.IndexingChain indexingChain, IndexCommit commit) { directory = d; analyzer = a; SetMessageID(defaultInfoStream); this.maxFieldLength = maxFieldLength; if (indexingChain == null) indexingChain = DocumentsWriter.DefaultIndexingChain; if (create) { // Clear the write lock in case it's leftover: directory.ClearLock(WRITE_LOCK_NAME); } Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME); if (!writeLock.Obtain(writeLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + writeLock); } this.writeLock = writeLock; // save it bool success = false; try { if (create) { // Try to read first. This is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: bool doCommit; try { segmentInfos.Read(directory); segmentInfos.Clear(); doCommit = false; } catch (System.IO.IOException) { // Likely this means it's a fresh directory doCommit = true; } if (doCommit) { // Only commit if there is no segments file // in this dir already. segmentInfos.Commit(directory); synced.UnionWith(segmentInfos.Files(directory, true)); } else { // Record that we have a change (zero out all // segments) pending: changeCount++; } } else { segmentInfos.Read(directory); if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to // preserve write-once. This is important if // readers are open against the future commit // points. if (commit.Directory != directory) throw new System.ArgumentException("IndexCommit's directory doesn't match my directory"); SegmentInfos oldInfos = new SegmentInfos(); oldInfos.Read(directory, commit.SegmentsFileName); segmentInfos.Replace(oldInfos); changeCount++; if (infoStream != null) Message("init: loaded commit \"" + commit.SegmentsFileName + "\""); } // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(segmentInfos.Files(directory, true)); } SetRollbackSegmentInfos(segmentInfos); docWriter = new DocumentsWriter(directory, this, indexingChain); docWriter.SetInfoStream(infoStream); docWriter.SetMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: deleter = new IndexFileDeleter(directory, deletionPolicy == null?new KeepOnlyLastCommitDeletionPolicy():deletionPolicy, segmentInfos, infoStream, docWriter, synced); if (deleter.startingCommitDeleted) // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. changeCount++; PushMaxBufferedDocs(); if (infoStream != null) { Message("init: create=" + create); MessageState(); } success = true; } finally { if (!success) { if (infoStream != null) { Message("init: hit exception on init; releasing write lock"); } try { writeLock.Release(); } catch (Exception) { // don't mask the original exception } writeLock = null; } } }
/// <summary> Expert: constructs an IndexWriter on specific commit /// point, with a custom <see cref="IndexDeletionPolicy" />, for /// the index in <c>d</c>. Text will be analyzed /// with <c>a</c>. /// /// <p/> This is only meaningful if you've used a <see cref="IndexDeletionPolicy" /> /// in that past that keeps more than /// just the last commit. /// /// <p/>This operation is similar to <see cref="Rollback()" />, /// except that method can only rollback what's been done /// with the current instance of IndexWriter since its last /// commit, whereas this method can rollback to an /// arbitrary commit point from the past, assuming the /// <see cref="IndexDeletionPolicy" /> has preserved past /// commits. /// /// </summary> /// <param name="d">the index directory /// </param> /// <param name="a">the analyzer to use /// </param> /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> /// </param> /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />. /// </param> /// <param name="commit">which commit to open /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<c>write.lock</c> could not /// be obtained) /// </summary> /// <throws> IOException if the directory cannot be read/written to, or </throws> /// <summary> if it does not exist and <c>create</c> is /// <c>false</c> or if there is any other low-level /// IO error /// </summary> public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit) { InitBlock(); Init(d, a, false, deletionPolicy, mfl.Limit, null, commit); }
private void Init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, int maxFieldLength, DocumentsWriter.IndexingChain indexingChain, IndexCommit commit) { if (IndexReader.IndexExists(d)) { Init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit); } else { Init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit); } }
internal MyCommitPoint(SnapshotDeletionPolicy enclosingInstance, IndexCommit cp) { InitBlock(enclosingInstance); this.cp = cp; }
/// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" /> /// and <see cref="DocumentsWriter.IndexingChain" />, /// for the index in <c>d</c>. /// Text will be analyzed with <c>a</c>. If /// <c>create</c> is true, then a new, empty index /// will be created in <c>d</c>, replacing the index /// already there, if any. /// /// </summary> /// <param name="d">the index directory /// </param> /// <param name="a">the analyzer to use /// </param> /// <param name="create"><c>true</c> to create the index or overwrite /// the existing one; <c>false</c> to append to the existing /// index /// </param> /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> /// </param> /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />. /// </param> /// <param name="indexingChain">the <see cref="DocConsumer" /> chain to be used to /// process documents /// </param> /// <param name="commit">which commit to open /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<c>write.lock</c> could not /// be obtained) /// </summary> /// <throws> IOException if the directory cannot be read/written to, or </throws> /// <summary> if it does not exist and <c>create</c> is /// <c>false</c> or if there is any other low-level /// IO error /// </summary> internal IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, DocumentsWriter.IndexingChain indexingChain, IndexCommit commit) { InitBlock(); Init(d, a, create, deletionPolicy, mfl.Limit, indexingChain, commit); }
/// <summary>Expert: reopen this reader on a specific commit point. /// This always returns a readOnly reader. If the /// specified commit point matches what this reader is /// already on, and this reader is already readOnly, then /// this same instance is returned; if it is not already /// readOnly, a readOnly clone is returned. /// </summary> public virtual IndexReader Reopen(IndexCommit commit) { lock (this) { throw new NotSupportedException("This reader does not support reopen(IndexCommit)."); } }
public System.Object Run(IndexCommit commit) { if (commit != null) { if (directory != commit.Directory) { throw new System.IO.IOException("the specified commit does not match the specified Directory"); } return(DoBody(commit.SegmentsFileName)); } System.String segmentFileName = null; long lastGen = -1; long gen = 0; int genLookaheadCount = 0; System.IO.IOException exc = null; bool retry = false; int method = 0; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means a commit was in process and has finished, in // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. // We have three methods for determining the current // generation. We try the first two in parallel, and // fall back to the third when necessary. while (true) { if (0 == method) { // Method 1: list the directory and use the highest // segments_N file. This method works well as long // as there is no stale caching on the directory // contents (NOTE: NFS clients often have such stale // caching): System.String[] files = null; long genA = -1; files = directory.ListAll(); if (files != null) { genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files); } Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA); // Method 2: open segments.gen and read its // contents. Then we take the larger of the two // gens. This way, if either approach is hitting // a stale cache (NFS) we have a better chance of // getting the right generation. long genB = -1; for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++) { IndexInput genInput = null; try { genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN); } catch (System.IO.FileNotFoundException e) { Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e); break; } catch (System.IO.IOException e) { Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e); } if (genInput != null) { try { int version = genInput.ReadInt(); if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS) { long gen0 = genInput.ReadLong(); long gen1 = genInput.ReadLong(); Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1); if (gen0 == gen1) { // The file is consistent. genB = gen0; break; } } } catch (System.IO.IOException) { // will retry } finally { genInput.Close(); } } System.Threading.Thread.Sleep(new TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec)); } Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB); // Pick the larger of the two gen's: if (genA > genB) { gen = genA; } else { gen = genB; } if (gen == -1) { throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + string.Join(" ", files)); } } // Third method (fallback if first & second methods // are not reliable): since both directory cache and // file contents cache seem to be stale, just // advance the generation. if (1 == method || (0 == method && lastGen == gen && retry)) { method = 1; if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount) { gen++; genLookaheadCount++; Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen); } } if (lastGen == gen) { // This means we're about to try the same // segments_N last tried. This is allowed, // exactly once, because writer could have been in // the process of writing segments_N last time. if (retry) { // OK, we've tried the same segments_N file // twice in a row, so this must be a real // error. We throw the original exception we // got. throw exc; } retry = true; } else if (0 == method) { // Segment file has advanced since our last loop, so // reset retry: retry = false; } lastGen = gen; segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); try { System.Object v = DoBody(segmentFileName); Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName); return(v); } catch (System.IO.IOException err) { // Save the original root cause: if (exc == null) { exc = err; } Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen); if (!retry && gen > 1) { // This is our first time trying this segments // file (because retry is false), and, there is // possibly a segments_(N-1) (because gen > 1). // So, check if the segments_(N-1) exists and // try it if so: System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1); bool prevExists; prevExists = directory.FileExists(prevSegmentFileName); if (prevExists) { Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'"); try { System.Object v = DoBody(prevSegmentFileName); if (exc != null) { Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName); } return(v); } catch (System.IO.IOException err2) { Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); } } } } } }
/// <summary>Expert: returns an IndexReader reading the index in /// the given Directory, using a specific commit and with /// a custom <see cref="IndexDeletionPolicy" />. You should pass /// readOnly=true, since it gives much better concurrent /// performance, unless you intend to do write operations /// (delete documents or change norms) with the reader. /// </summary> /// <param name="commit">the specific <see cref="IndexCommit" /> to open; /// see <see cref="IndexReader.ListCommits" /> to list all commits /// in a directory /// </param> /// <param name="deletionPolicy">a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see <see cref="IndexWriter" /> for details. /// </param> /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader /// </param> /// <param name="termInfosIndexDivisor">Subsambles which indexed /// terms are loaded into RAM. This has the same effect as <see> /// <cref>IndexWriter.SetTermIndexInterval</cref> /// </see> except that setting /// must be done at indexing time while this setting can be /// set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce /// memory usage, at the expense of higher latency when /// loading a TermInfo. The default value is 1. Set this /// to -1 to skip loading the terms index entirely. /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { return Open(commit.Directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); }
private static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor) { return DirectoryReader.Open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); }
/// <summary>Expert: returns an IndexReader reading the index in the given /// <see cref="IndexCommit" />. You should pass readOnly=true, since it /// gives much better concurrent performance, unless you /// intend to do write operations (delete documents or /// change norms) with the reader. /// </summary> /// <param name="commit">the commit point to open /// </param> /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> public static IndexReader Open(IndexCommit commit, bool readOnly) { return Open(commit.Directory, null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); }
/// <summary>Expert: returns an IndexReader reading the index in /// the given Directory, using a specific commit and with /// a custom <see cref="IndexDeletionPolicy" />. You should pass /// readOnly=true, since it gives much better concurrent /// performance, unless you intend to do write operations /// (delete documents or change norms) with the reader. /// </summary> /// <param name="commit">the specific <see cref="IndexCommit" /> to open; /// see <see cref="IndexReader.ListCommits" /> to list all commits /// in a directory /// </param> /// <param name="deletionPolicy">a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see <see cref="IndexWriter" /> for details. /// </param> /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly) { return Open(commit.Directory, deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); }
private IndexReader DoReopenNoWriter(bool openReadOnly, IndexCommit commit) { lock (this) { if (commit == null) { if (hasChanges) { // We have changes, which means we are not readOnly: System.Diagnostics.Debug.Assert(readOnly == false); // and we hold the write lock: System.Diagnostics.Debug.Assert(writeLock != null); // so no other writer holds the write lock, which // means no changes could have been done to the index: System.Diagnostics.Debug.Assert(IsCurrent()); if (openReadOnly) { return Clone(openReadOnly); } else { return this; } } else if (IsCurrent()) { if (openReadOnly != readOnly) { // Just fallback to clone return Clone(openReadOnly); } else { return this; } } } else { if (internalDirectory != commit.Directory) throw new System.IO.IOException("the specified commit does not match the specified Directory"); if (segmentInfos != null && commit.SegmentsFileName.Equals(segmentInfos.GetCurrentSegmentFileName())) { if (readOnly != openReadOnly) { // Just fallback to clone return Clone(openReadOnly); } else { return this; } } } return (IndexReader)new AnonymousFindSegmentsFile(internalDirectory, openReadOnly, this).Run(commit); } }
internal virtual IndexReader DoReopen(bool openReadOnly, IndexCommit commit) { EnsureOpen(); System.Diagnostics.Debug.Assert(commit == null || openReadOnly); // If we were obtained by writer.getReader(), re-ask the // writer to get a new reader. if (writer != null) { return DoReopenFromWriter(openReadOnly, commit); } else { return DoReopenNoWriter(openReadOnly, commit); } }
private IndexReader DoReopenFromWriter(bool openReadOnly, IndexCommit commit) { System.Diagnostics.Debug.Assert(readOnly); if (!openReadOnly) { throw new System.ArgumentException("a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)"); } if (commit != null) { throw new System.ArgumentException("a reader obtained from IndexWriter.getReader() cannot currently accept a commit"); } // TODO: right now we *always* make a new reader; in // the future we could have write make some effort to // detect that no changes have occurred return writer.GetReader(); }
public override IndexReader Reopen(IndexCommit commit) { return DoReopen(true, commit); }
internal static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor) { return (IndexReader) new AnonymousClassFindSegmentsFile(readOnly, deletionPolicy, termInfosIndexDivisor, directory).Run(commit); }