public virtual void TestPreAnalyzedField() { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("preanalyzed", new AnonymousClassTokenStream(this), TermVector.NO)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term2")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term3")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); }
public virtual void CheckSkipTo(TermPositions tp, int target, int maxCounter) { tp.SkipTo(target); if (maxCounter < counter) { Assert.Fail("Too many bytes read: " + counter); } Assert.AreEqual(target, tp.Doc(), "Wrong document " + tp.Doc() + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq(), "Frequency is not 1: " + tp.Freq()); tp.NextPosition(); byte[] b = new byte[1]; tp.GetPayload(b, 0); Assert.AreEqual((byte)target, b[0], "Wrong payload for the target " + target + ": " + b[0]); }
public virtual void TestTokenReuse() { Analyzer analyzer = new AnonymousClassAnalyzer1(this); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("f1", "a 5 a a", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("f1", "a")); Assert.IsTrue(termPositions.Next()); int freq = termPositions.Freq(); Assert.AreEqual(3, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(true, termPositions.IsPayloadAvailable()); Assert.AreEqual(6, termPositions.NextPosition()); Assert.AreEqual(false, termPositions.IsPayloadAvailable()); Assert.AreEqual(7, termPositions.NextPosition()); Assert.AreEqual(false, termPositions.IsPayloadAvailable()); }
public virtual void TestPositionIncrementGap() { Analyzer analyzer = new AnonymousClassAnalyzer(this); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("repeated", "repeated")); Assert.IsTrue(termPositions.Next()); int freq = termPositions.Freq(); Assert.AreEqual(2, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(502, termPositions.NextPosition()); }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> private int AppendPostings(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term ResetSkip(); for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.postings; int base_Renamed = smi.base_Renamed; int[] docMap = smi.docMap; postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space if (doc < lastDoc) { throw new System.SystemException("docs out of order"); } df++; if ((df % skipInterval) == 0) { BufferSkip(lastDoc); } int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 lastDoc = doc; int freq = postings.Freq(); if (freq == 1) { freqOutput.WriteVInt(docCode | 1); // write doc & freq=1 } else { freqOutput.WriteVInt(docCode); // write doc freqOutput.WriteVInt(freq); // write frequency in doc } int lastPosition = 0; // write position deltas for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); proxOutput.WriteVInt(position - lastPosition); lastPosition = position; } } } return(df); }
private void PrintSegment(System.IO.StringWriter out_Renamed, System.String segment) { Directory directory = FSDirectory.GetDirectory(indexDir, false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { out_Renamed.Write("," + positions.NextPosition()); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
internal static void PrintSegment(System.String segment) { Directory directory = FSDirectory.GetDirectory("test", false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { System.Console.Out.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { System.Console.Out.Write(tis.Term()); System.Console.Out.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { System.Console.Out.Write(" doc=" + positions.Doc()); System.Console.Out.Write(" TF=" + positions.Freq()); System.Console.Out.Write(" pos="); System.Console.Out.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { System.Console.Out.Write("," + positions.NextPosition()); } System.Console.Out.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
private void PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si) { SegmentReader reader = SegmentReader.Get(si); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { out_Renamed.Write("," + positions.NextPosition()); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space int freq = postings.Freq(); FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.GetPayloadLength(); if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return(df); }
public virtual void TestThreadSafety() { rnd = NewRandom(); int numThreads = 5; int numDocs = 50; ByteArrayPool pool = new ByteArrayPool(numThreads, 5); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); System.String field = "test"; SupportClass.ThreadClass[] ingesters = new SupportClass.ThreadClass[numThreads]; for (int i = 0; i < numThreads; i++) { ingesters[i] = new AnonymousClassThread(numDocs, field, pool, writer, this); ingesters[i].Start(); } for (int i = 0; i < numThreads; i++) { ingesters[i].Join(); } writer.Close(); IndexReader reader = IndexReader.Open(dir); TermEnum terms = reader.Terms(); while (terms.Next()) { TermPositions tp = reader.TermPositions(terms.Term()); while (tp.Next()) { int freq = tp.Freq(); for (int i = 0; i < freq; i++) { tp.NextPosition(); Assert.AreEqual(pool.BytesToString(tp.GetPayload(new byte[5], 0)), terms.Term().text_ForNUnit); } } tp.Close(); } terms.Close(); reader.Close(); Assert.AreEqual(pool.Size(), numThreads); }
public virtual void CheckSkipTo(TermPositions tp, int target, int maxCounter) { tp.SkipTo(target); if (maxCounter < counter) { Assert.Fail("Too many bytes read: " + counter); } Assert.AreEqual(target, tp.Doc(), "Wrong document " + tp.Doc() + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq(), "Frequency is not 1: " + tp.Freq()); tp.NextPosition(); byte[] b = new byte[1]; tp.GetPayload(b, 0); Assert.AreEqual((byte) target, b[0], "Wrong payload for the target " + target + ": " + b[0]); }
void BeginAsyncReconstruction(int docNum, Document document, Hashtable doc) { // get stored fields ArrayList sf = new ArrayList(); for (int i = 0; i < _indexFields.Length; i++) { Field[] f = document.GetFields(_indexFields[i]); if (f == null || f.Length == 0 || !f[0].IsStored()) { continue; } StringBuilder sb = new StringBuilder(); for (int k = 0; k < f.Length; k++) { if (k > 0) { sb.Append('\n'); } sb.Append(f[k].StringValue()); } Field field = Legacy.CreateField(_indexFields[i], sb.ToString(), f[0].IsStored(), f[0].IsIndexed(), f[0].IsTokenized(), f[0].IsTermVectorStored()); field.SetBoost(f[0].GetBoost()); doc[_indexFields[i]] = field; sf.Add(_indexFields[i]); } String term = null; GrowableStringArray terms = null; try { int i = 0; int delta = (int)Math.Ceiling(((double)_numTerms / 100)); TermEnum te = _luke.IndexReader.Terms(); TermPositions tp = _luke.IndexReader.TermPositions(); while (te.Next()) { if ((i++ % delta) == 0) { // update UI - async UpdateProgress(i / delta); } // skip stored fields if (sf.Contains(te.Term().Field())) { continue; } tp.Seek(te.Term()); if (!tp.SkipTo(docNum) || tp.Doc() != docNum) { // this term is not found in the doc continue; } term = te.Term().Text(); terms = (GrowableStringArray)doc[te.Term().Field()]; if (terms == null) { terms = new GrowableStringArray(); doc[te.Term().Field()] = terms; } for (int k = 0; k < tp.Freq(); k++) { int pos = tp.NextPosition(); terms.Set(pos, term); } } } catch (Exception exc) { // Update UI - async _luke.ShowStatus(exc.Message); } }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { Status.TermIndexStatus status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc(); while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < -1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
/// <summary>Returns true if index is clean, else false.</summary> public static bool Check(Directory dir, bool doFix) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); try { sis.Read(dir); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read any segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not open segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read segment file version in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); if (skip) { out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); return(false); } SegmentInfos newSIS = (SegmentInfos)sis.Clone(); newSIS.Clear(); bool changed = false; int totLoseDocCount = 0; int numBadSegments = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); int toLoseDocCount = info.docCount; SegmentReader reader = null; try { out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile()); out_Renamed.WriteLine(" numFiles=" + info.Files().Count); out_Renamed.WriteLine(String.Format(nf, " size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset); out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment()); out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { out_Renamed.WriteLine(" no deletions"); } else { out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]"); } out_Renamed.Write(" test: open reader........."); reader = SegmentReader.Get(info); int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]"); } else { out_Renamed.WriteLine("OK"); } out_Renamed.Write(" test: fields, norms......."); System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL); System.Collections.IEnumerator it = fieldNames.Keys.GetEnumerator(); while (it.MoveNext()) { System.String fieldName = (System.String)it.Current; byte[] b = reader.Norms(fieldName); if (b.Length != info.docCount) { throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount); } } out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]"); out_Renamed.Write(" test: terms, freq, prox..."); TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this // term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); long termCount = 0; long totFreq = 0; long totPos = 0; while (termEnum.Next()) { termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos <= lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]"); out_Renamed.Write(" test: stored fields......."); int docCount = 0; long totFields = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { docCount++; Document doc = reader.Document(j); totFields += doc.GetFields().Count; } } if (docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs"); } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) })); out_Renamed.Write(" test: term vectors........"); int totVectors = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { totVectors += tfv.Length; } } } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) })); out_Renamed.WriteLine(""); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED"); System.String comment; if (doFix) { comment = "will remove reference to this segment (-fix is specified)"; } else { comment = "would remove reference to this segment (-fix was not specified)"; } out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); out_Renamed.WriteLine(""); totLoseDocCount += toLoseDocCount; numBadSegments++; changed = true; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper newSIS.Add(info.Clone()); } if (!changed) { out_Renamed.WriteLine("No problems were detected with this index.\n"); return(true); } else { out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected"); if (doFix) { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost"); } else { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified"); } out_Renamed.WriteLine(); } if (doFix) { out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for (int i = 0; i < 5; i++) { try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); i--; continue; } out_Renamed.WriteLine(" " + (5 - i) + "..."); } out_Renamed.Write("Writing..."); try { newSIS.Write(dir); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED; exiting"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } out_Renamed.WriteLine("OK"); out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\""); } else { out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]"); } out_Renamed.WriteLine(""); return(false); }
internal void FirstPosition() { count = tp.Freq(); // read first pos NextPosition(); }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term skipListWriter.ResetSkip(); bool storePayloads = fieldInfos.FieldInfo(smis[0].term.field).storePayloads; int lastPayloadLength = -1; // ensures that we write the first length for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space if (doc < 0 || (df > 0 && doc <= lastDoc)) { throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )"); } df++; if ((df % skipInterval) == 0) { skipListWriter.SetSkipData(lastDoc, storePayloads, lastPayloadLength); skipListWriter.BufferSkip(df); } int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 lastDoc = doc; int freq = postings.Freq(); if (freq == 1) { freqOutput.WriteVInt(docCode | 1); // write doc & freq=1 } else { freqOutput.WriteVInt(docCode); // write doc freqOutput.WriteVInt(freq); // write frequency in doc } /** See {@link DocumentWriter#writePostings(Posting[], String) for * documentation about the encoding of positions and payloads */ int lastPosition = 0; // write position deltas for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int delta = position - lastPosition; if (storePayloads) { int payloadLength = postings.GetPayloadLength(); if (payloadLength == lastPayloadLength) { proxOutput.WriteVInt(delta * 2); } else { proxOutput.WriteVInt(delta * 2 + 1); proxOutput.WriteVInt(payloadLength); lastPayloadLength = payloadLength; } if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); proxOutput.WriteBytes(payloadBuffer, 0, payloadLength); } } else { proxOutput.WriteVInt(delta); } lastPosition = position; } } } return(df); }