/// <summary> Adds a document to this index, using the provided analyzer instead of the /// value of {@link #GetAnalyzer()}. If the document contains more than /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are /// discarded. /// </summary> public virtual void AddDocument(Document doc, Analyzer analyzer) { DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this); dw.SetInfoStream(infoStream); System.String segmentName = NewSegmentName(); dw.AddDocument(segmentName, doc); lock (this) { segmentInfos.Add(new SegmentInfo(segmentName, 1, ramDirectory)); MaybeMergeSegments(); } }
public virtual void Split(DirectoryInfo destDir, string[] segs) { destDir.Create(); FSDirectory destFSDir = FSDirectory.Open(destDir); SegmentInfos destInfos = new SegmentInfos(); destInfos.Counter = Infos.Counter; foreach (string n in segs) { SegmentCommitInfo infoPerCommit = GetInfo(n); SegmentInfo info = infoPerCommit.Info; // Same info just changing the dir: SegmentInfo newInfo = new SegmentInfo(destFSDir, info.Version, info.Name, info.DocCount, info.UseCompoundFile, info.Codec, info.Diagnostics); destInfos.Add(new SegmentCommitInfo(newInfo, infoPerCommit.DelCount, infoPerCommit.DelGen, infoPerCommit.FieldInfosGen)); // now copy files over ICollection <string> files = infoPerCommit.GetFiles(); foreach (string srcName in files) { FileInfo srcFile = new FileInfo(Path.Combine(dir.FullName, srcName)); FileInfo destFile = new FileInfo(Path.Combine(destDir.FullName, srcName)); CopyFile(srcFile, destFile); } } destInfos.Changed(); destInfos.Commit(destFSDir); // Console.WriteLine("destDir:"+destDir.getAbsolutePath()); }
/// <summary> Returns a copy of this instance, also copying each /// SegmentInfo. /// </summary> public override System.Object Clone() { SegmentInfos si = new SegmentInfos(); for (int i = 0; i < base.Count; i++) { si.Add(((SegmentInfo) base[i]).Clone()); } si.generation = this.generation; si.lastGeneration = this.lastGeneration; return(si); }
/// <summary> Returns a copy of this instance, also copying each /// SegmentInfo. /// </summary> public override System.Object Clone() { SegmentInfos sis = new SegmentInfos(); // Copy Fields. const and static fields are ignored sis.counter = this.counter; sis.version = this.version; sis.generation = this.generation; sis.lastGeneration = this.lastGeneration; for (int i = 0; i < this.Count; i++) { sis.Add(((SegmentInfo)this[i]).Clone()); } return(sis); }
/// <summary> Returns a copy of this instance, also copying each /// SegmentInfo. /// </summary> public override System.Object Clone() { SegmentInfos sis = new SegmentInfos(); for (int i = 0; i < this.Count; i++) { sis.Add(((SegmentInfo)this[i]).Clone()); } sis.counter = this.counter; sis.generation = this.generation; sis.lastGeneration = this.lastGeneration; // sis.pendingSegnOutput = this.pendingSegnOutput; // {{Aroush-2.9}} needed? sis.userData = new System.Collections.Generic.Dictionary <string, string>(userData); sis.version = this.version; return(sis); }
/// <summary> Returns a copy of this instance, also copying each /// SegmentInfo. /// </summary> public SegmentInfos Clone() { SegmentInfos sis = new SegmentInfos(); for (int i = 0; i < this.Count; i++) { sis.Add(this[i].Clone()); } sis.counter = this.counter; sis.generation = this.generation; sis.lastGeneration = this.lastGeneration; // sis.pendingSegnOutput = this.pendingSegnOutput; // {{Aroush-2.9}} needed? sis.userData = new Dictionary <string, string>(userData); sis.version = this.version; return(sis); }
/// <summary> Returns a copy of this instance, also copying each /// SegmentInfo. /// </summary> public override object Clone() { SegmentInfos si = new SegmentInfos(); for (int i = 0; i < base.Count; i++) { si.Add(((SegmentInfo)base[i]).Clone()); } si.counter = this.counter; si.version = this.version; si.generation = this.generation; si.lastGeneration = this.lastGeneration; return si; }
/// <summary> Returns a copy of this instance, also copying each /// SegmentInfo. /// </summary> public override System.Object Clone() { SegmentInfos sis = new SegmentInfos(); for (int i = 0; i < this.Count; i++) { sis.Add(((SegmentInfo) this[i]).Clone()); } sis.counter = this.counter; sis.generation = this.generation; sis.lastGeneration = this.lastGeneration; // sis.pendingSegnOutput = this.pendingSegnOutput; // {{Aroush-2.9}} needed? sis.userData = new System.Collections.Generic.Dictionary<string, string>(userData); sis.version = this.version; return sis; }
// For infoStream output internal virtual SegmentInfos ToLiveInfos(SegmentInfos sis) { lock (this) { SegmentInfos newSIS = new SegmentInfos(); IDictionary<SegmentCommitInfo, SegmentCommitInfo> liveSIS = new Dictionary<SegmentCommitInfo, SegmentCommitInfo>(); foreach (SegmentCommitInfo info in segmentInfos.Segments) { liveSIS[info] = info; } foreach (SegmentCommitInfo info in sis.Segments) { SegmentCommitInfo infoMod = info; SegmentCommitInfo liveInfo; if (liveSIS.TryGetValue(info, out liveInfo)) { infoMod = liveInfo; } newSIS.Add(infoMod); } return newSIS; } }
/// <summary>Returns true if index is clean, else false.</summary> public static bool Check(Directory dir, bool doFix) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); try { sis.Read(dir); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read any segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not open segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read segment file version in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); if (skip) { out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); return(false); } SegmentInfos newSIS = (SegmentInfos)sis.Clone(); newSIS.Clear(); bool changed = false; int totLoseDocCount = 0; int numBadSegments = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); int toLoseDocCount = info.docCount; SegmentReader reader = null; try { out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile()); out_Renamed.WriteLine(" numFiles=" + info.Files().Count); out_Renamed.WriteLine(String.Format(nf, " size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset); out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment()); out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { out_Renamed.WriteLine(" no deletions"); } else { out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]"); } out_Renamed.Write(" test: open reader........."); reader = SegmentReader.Get(info); int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]"); } else { out_Renamed.WriteLine("OK"); } out_Renamed.Write(" test: fields, norms......."); System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL); System.Collections.IEnumerator it = fieldNames.Keys.GetEnumerator(); while (it.MoveNext()) { System.String fieldName = (System.String)it.Current; byte[] b = reader.Norms(fieldName); if (b.Length != info.docCount) { throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount); } } out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]"); out_Renamed.Write(" test: terms, freq, prox..."); TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this // term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); long termCount = 0; long totFreq = 0; long totPos = 0; while (termEnum.Next()) { termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos <= lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]"); out_Renamed.Write(" test: stored fields......."); int docCount = 0; long totFields = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { docCount++; Document doc = reader.Document(j); totFields += doc.GetFields().Count; } } if (docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs"); } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) })); out_Renamed.Write(" test: term vectors........"); int totVectors = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { totVectors += tfv.Length; } } } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) })); out_Renamed.WriteLine(""); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED"); System.String comment; if (doFix) { comment = "will remove reference to this segment (-fix is specified)"; } else { comment = "would remove reference to this segment (-fix was not specified)"; } out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); out_Renamed.WriteLine(""); totLoseDocCount += toLoseDocCount; numBadSegments++; changed = true; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper newSIS.Add(info.Clone()); } if (!changed) { out_Renamed.WriteLine("No problems were detected with this index.\n"); return(true); } else { out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected"); if (doFix) { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost"); } else { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified"); } out_Renamed.WriteLine(); } if (doFix) { out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for (int i = 0; i < 5; i++) { try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); i--; continue; } out_Renamed.WriteLine(" " + (5 - i) + "..."); } out_Renamed.Write("Writing..."); try { newSIS.Write(dir); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED; exiting"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } out_Renamed.WriteLine("OK"); out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\""); } else { out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]"); } out_Renamed.WriteLine(""); return(false); }
/// <summary>Checks if any merges are now necessary and returns a /// <see cref="MergePolicy.MergeSpecification" /> if so. A merge /// is necessary when there are more than <see cref="MergeFactor" /> /// segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the <see cref="MergeScheduler" /> /// to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos, IState state) { int numSegments = infos.Count; if (Verbose()) { Message("findMerges: " + numSegments + " segments"); } // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float)System.Math.Log(mergeFactor); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info, state); // Floor tiny segments if (size < 1) { size = 1; } levels[i] = (float)System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) { levelFloor = (float)0.0; } else { levelFloor = (float)(System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) { maxLevel = level; } } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) { // All remaining segments fall into the min level levelBottom = -1.0F; } else { levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) { levelBottom = levelFloor; } } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } if (Verbose()) { Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); } // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { SegmentInfos segmentsToMerge = new SegmentInfos(); long totalSize = 0; var processed = 0; var largeSegmentsCount = 0; // we have a limitation: non-contiguous segment merges aren't allowed for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); var segmentSize = Size(info, state); if (segmentSize >= maxMergeSize || SizeDocs(info, state) >= maxMergeDocs) { // a single segment is larger then the maximum allowed break; } var newTotalSize = totalSize + segmentSize; if (newTotalSize > maxMergeSize) { // the total size of all segments will exceed the maximum allowed size break; } if (segmentSize > largeSegmentSize && ++largeSegmentsCount > numberOfLargeSegmentsToMergeInSingleBatch) { // limit the number of merged large segments break; } totalSize = newTotalSize; processed++; segmentsToMerge.Add(info); } if (segmentsToMerge.Count > 1) { if (spec == null) { spec = new MergeSpecification(); } if (Verbose()) { Message(" " + segmentsToMerge.Count + " segments were added to this merge, total size: " + totalSize + " bytes"); } spec.Add(MakeOneMerge(infos, segmentsToMerge, state)); } else if (Verbose()) { Message(" " + start + " to " + end + ": doesn't have enough segments to execute a merge"); } if (processed == 0) { // no segments to merge, we skip one and try to recalculate using the rest ones processed = 1; } start += processed; if (start == end) { end += mergeFactor; } } start = 1 + upto; } return(spec); }
public virtual void Split(DirectoryInfo destDir, string[] segs) { destDir.Create(); FSDirectory destFSDir = FSDirectory.Open(destDir); SegmentInfos destInfos = new SegmentInfos(); destInfos.Counter = infos.Counter; foreach (string n in segs) { SegmentCommitInfo infoPerCommit = GetInfo(n); SegmentInfo info = infoPerCommit.Info; // Same info just changing the dir: SegmentInfo newInfo = new SegmentInfo(destFSDir, info.Version, info.Name, info.DocCount, info.UseCompoundFile, info.Codec, info.Diagnostics); destInfos.Add(new SegmentCommitInfo(newInfo, infoPerCommit.DelCount, infoPerCommit.DelGen, infoPerCommit.FieldInfosGen)); // now copy files over ICollection<string> files = infoPerCommit.Files(); foreach (string srcName in files) { FileInfo srcFile = new FileInfo(Path.Combine(dir.FullName, srcName)); FileInfo destFile = new FileInfo(Path.Combine(destDir.FullName, srcName)); CopyFile(srcFile, destFile); } } destInfos.Changed(); destInfos.Commit(destFSDir); // Console.WriteLine("destDir:"+destDir.getAbsolutePath()); }
/// <summary> Returns a copy of this instance, also copying each /// SegmentInfo. /// </summary> public override System.Object Clone() { SegmentInfos sis = new SegmentInfos(); // Copy Fields. const and static fields are ignored sis.counter = this.counter; sis.version = this.version; sis.generation = this.generation; sis.lastGeneration = this.lastGeneration; for (int i = 0; i < this.Count; i++) { sis.Add(((SegmentInfo)this[i]).Clone()); } return sis; }