public virtual void TestPayloadFieldBit() { rnd = NewRandom(); Directory ram = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); Document d = new Document(); // this field won't have any payloads d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); // only add payload data for field f2 analyzer.SetPayloadData("f2", 1, System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1); writer.AddDocument(d, null); // flush writer.Close(); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram, null); FieldInfos fi = reader.FieldInfos(); Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set."); Assert.IsFalse(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should not be set."); reader.Close(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); d = new Document(); d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); // add payload data for field f2 and f3 analyzer.SetPayloadData("f2", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1); analyzer.SetPayloadData("f3", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 3); writer.AddDocument(d, null); // force merge writer.Optimize(null); // flush writer.Close(); reader = SegmentReader.GetOnlySegmentReader(ram, null); fi = reader.FieldInfos(); Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set."); Assert.IsTrue(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should be set."); reader.Close(); }
public virtual void TestBadSeek() { try { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir)); Assert.IsTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term("textField2", "bad")); Assert.IsTrue(segTermDocs.Next() == false); reader.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } try { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir)); Assert.IsTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term("junk", "bad")); Assert.IsTrue(segTermDocs.Next() == false); reader.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
public virtual void TestMixedMerge() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.SetMergeFactor(2); Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.SetOmitTermFreqAndPositions(true); d.Add(f2); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.SetOmitTermFreqAndPositions(true); d.Add(f1); f2.SetOmitTermFreqAndPositions(false); d.Add(f2); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); }
public virtual void TestMixedRAM() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 2; Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f2); for (int i = 0; i < 5; i++) { writer.AddDocument(d); } f2.OmitTermFreqAndPositions = true; for (int i = 0; i < 20; i++) { writer.AddDocument(d); } // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(!fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); }
public virtual void TestSegmentReaderUndeleteall() { Directory dir1 = new MockRAMDirectory(); TestIndexReaderReopen.CreateIndex(dir1, false); SegmentReader origSegmentReader = SegmentReader.GetOnlySegmentReader(dir1); origSegmentReader.DeleteDocument(10); AssertDelDocsRefCountEquals(1, origSegmentReader); origSegmentReader.UndeleteAll(); Assert.IsNull(origSegmentReader.deletedDocsRef_ForNUnit); origSegmentReader.Close(); // need to test norms? dir1.Close(); }
private void PrintSegment(System.IO.StringWriter out_Renamed, System.String segment) { Directory directory = FSDirectory.GetDirectory(indexDir, false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { out_Renamed.Write("," + positions.NextPosition()); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
internal static void PrintSegment(System.String segment) { Directory directory = FSDirectory.GetDirectory("test", false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { System.Console.Out.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { System.Console.Out.Write(tis.Term()); System.Console.Out.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { System.Console.Out.Write(" doc=" + positions.Doc()); System.Console.Out.Write(" TF=" + positions.Freq()); System.Console.Out.Write(" pos="); System.Console.Out.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { System.Console.Out.Write("," + positions.NextPosition()); } System.Console.Out.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
public virtual void TestNormsClose() { Directory dir1 = new MockRAMDirectory(); TestIndexReaderReopen.CreateIndex(dir1, false); SegmentReader reader1 = SegmentReader.GetOnlySegmentReader(dir1); reader1.Norms("field1"); Norm r1norm = (Norm)reader1.norms_ForNUnit["field1"]; SegmentReader.Ref r1BytesRef = r1norm.BytesRef(); SegmentReader reader2 = (SegmentReader)reader1.Clone(); Assert.AreEqual(2, r1norm.BytesRef().RefCount()); reader1.Close(); Assert.AreEqual(1, r1BytesRef.RefCount()); reader2.Norms("field1"); reader2.Close(); dir1.Close(); }
public virtual void TestTermDocs(int indexDivisor) { //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.Get(true, info, indexDivisor); Assert.IsTrue(reader != null); Assert.AreEqual(indexDivisor, reader.GetTermInfosIndexDivisor()); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "field")); if (segTermDocs.Next() == true) { int docId = segTermDocs.Doc(); Assert.IsTrue(docId == 0); int freq = segTermDocs.Freq(); Assert.IsTrue(freq == 3); } reader.Close(); }
private void PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si) { SegmentReader reader = SegmentReader.Get(true, si, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, null); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i, null)); } TermEnum tis = reader.Terms(null); while (tis.Next(null)) { out_Renamed.Write(tis.Term); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term, null); try { while (positions.Next(null)) { out_Renamed.Write(" doc=" + positions.Doc); out_Renamed.Write(" TF=" + positions.Freq); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition(null)); for (int j = 1; j < positions.Freq; j++) { out_Renamed.Write("," + positions.NextPosition(null)); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); }
private void PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si) { SegmentReader reader = SegmentReader.Get(si); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { out_Renamed.Write("," + positions.NextPosition()); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); }
public virtual void TestSegmentReaderCloseReferencing() { Directory dir1 = new MockRAMDirectory(); TestIndexReaderReopen.CreateIndex(dir1, false); SegmentReader origSegmentReader = SegmentReader.GetOnlySegmentReader(dir1); origSegmentReader.DeleteDocument(1); origSegmentReader.SetNorm(4, "field1", 0.5f); SegmentReader clonedSegmentReader = (SegmentReader)origSegmentReader.Clone(); AssertDelDocsRefCountEquals(2, origSegmentReader); origSegmentReader.Close(); AssertDelDocsRefCountEquals(1, origSegmentReader); // check the norm refs Norm norm = clonedSegmentReader.norms_ForNUnit["field1"]; Assert.AreEqual(1, norm.BytesRef().RefCount()); clonedSegmentReader.Close(); dir1.Close(); }
public virtual void testBadSeek(int indexDivisor) { { //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.Get(true, info, indexDivisor); Assert.IsTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term("textField2", "bad")); Assert.IsTrue(segTermDocs.Next() == false); reader.Close(); } { //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.Get(true, info, indexDivisor); Assert.IsTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term("junk", "bad")); Assert.IsTrue(segTermDocs.Next() == false); reader.Close(); } }
public virtual void TestTermDocs() { try { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir)); Assert.IsTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); Assert.IsTrue(segTermDocs != null); segTermDocs.Seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field")); if (segTermDocs.Next() == true) { int docId = segTermDocs.Doc(); Assert.IsTrue(docId == 0); int freq = segTermDocs.Freq(); Assert.IsTrue(freq == 3); } reader.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) { throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
internal static void PrintSegment(System.String segment) { Directory directory = FSDirectory.GetDirectory("test", false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { System.Console.Out.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { System.Console.Out.Write(tis.Term()); System.Console.Out.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { System.Console.Out.Write(" doc=" + positions.Doc()); System.Console.Out.Write(" TF=" + positions.Freq()); System.Console.Out.Write(" pos="); System.Console.Out.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) System.Console.Out.Write("," + positions.NextPosition()); System.Console.Out.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
/// <summary> Release the segment reader (i.e. decRef it and close if there /// are no more references. /// </summary> /// <param name="sr"> /// </param> /// <throws> IOException </throws> public virtual void Release(SegmentReader sr, bool drop) { lock (this) { bool pooled = readerMap.Contains(sr.GetSegmentInfo()); System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.GetSegmentInfo()] == sr); // Drop caller's ref; for an external reader (not // pooled), this decRef will close it sr.DecRef(); if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.GetRefCount() == 1))) { // We invoke deleter.checkpoint below, so we must be // sync'd on IW if there are changes: // TODO: java 5 // assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this); // Discard (don't save) changes when we are dropping // the reader; this is used only on the sub-readers // after a successful merge. sr.hasChanges &= !drop; bool hasChanges = sr.hasChanges; // Drop our ref -- this will commit any pending // changes to the dir sr.Close(); // We are the last ref to this reader; since we're // not pooling readers, we release it: readerMap.Remove(sr.GetSegmentInfo()); if (hasChanges) { // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); } } } }
/// <summary>Returns true if index is clean, else false.</summary> public static bool Check(Directory dir, bool doFix) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); try { sis.Read(dir); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read any segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not open segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read segment file version in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); if (skip) { out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); return(false); } SegmentInfos newSIS = (SegmentInfos)sis.Clone(); newSIS.Clear(); bool changed = false; int totLoseDocCount = 0; int numBadSegments = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); int toLoseDocCount = info.docCount; SegmentReader reader = null; try { out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile()); out_Renamed.WriteLine(" numFiles=" + info.Files().Count); out_Renamed.WriteLine(String.Format(nf, " size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset); out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment()); out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { out_Renamed.WriteLine(" no deletions"); } else { out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]"); } out_Renamed.Write(" test: open reader........."); reader = SegmentReader.Get(info); int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]"); } else { out_Renamed.WriteLine("OK"); } out_Renamed.Write(" test: fields, norms......."); System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL); System.Collections.IEnumerator it = fieldNames.Keys.GetEnumerator(); while (it.MoveNext()) { System.String fieldName = (System.String)it.Current; byte[] b = reader.Norms(fieldName); if (b.Length != info.docCount) { throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount); } } out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]"); out_Renamed.Write(" test: terms, freq, prox..."); TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this // term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); long termCount = 0; long totFreq = 0; long totPos = 0; while (termEnum.Next()) { termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos <= lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]"); out_Renamed.Write(" test: stored fields......."); int docCount = 0; long totFields = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { docCount++; Document doc = reader.Document(j); totFields += doc.GetFields().Count; } } if (docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs"); } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) })); out_Renamed.Write(" test: term vectors........"); int totVectors = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { totVectors += tfv.Length; } } } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) })); out_Renamed.WriteLine(""); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED"); System.String comment; if (doFix) { comment = "will remove reference to this segment (-fix is specified)"; } else { comment = "would remove reference to this segment (-fix was not specified)"; } out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); out_Renamed.WriteLine(""); totLoseDocCount += toLoseDocCount; numBadSegments++; changed = true; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper newSIS.Add(info.Clone()); } if (!changed) { out_Renamed.WriteLine("No problems were detected with this index.\n"); return(true); } else { out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected"); if (doFix) { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost"); } else { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified"); } out_Renamed.WriteLine(); } if (doFix) { out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for (int i = 0; i < 5; i++) { try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); i--; continue; } out_Renamed.WriteLine(" " + (5 - i) + "..."); } out_Renamed.Write("Writing..."); try { newSIS.Write(dir); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED; exiting"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } out_Renamed.WriteLine("OK"); out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\""); } else { out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]"); } out_Renamed.WriteLine(""); return(false); }
private void PrintSegment(System.IO.StringWriter out_Renamed, System.String segment) { Directory directory = FSDirectory.GetDirectory(indexDir, false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) out_Renamed.Write("," + positions.NextPosition()); out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
/// <summary> Release the segment reader (i.e. decRef it and close if there /// are no more references. /// </summary> /// <param name="sr"> /// </param> /// <throws> IOException </throws> public virtual void Release(SegmentReader sr, bool drop) { lock (this) { bool pooled = readerMap.Contains(sr.GetSegmentInfo()); System.Diagnostics.Debug.Assert(!pooled | readerMap[sr.GetSegmentInfo()] == sr); // Drop caller's ref sr.DecRef(); if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.GetRefCount() == 1))) { // We are the last ref to this reader; since we're // not pooling readers, we release it: readerMap.Remove(sr.GetSegmentInfo()); // TODO: java 5 // assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this); // Drop our ref -- this will commit any pending // changes to the dir bool success = false; try { sr.Close(); success = true; } finally { if (!success && sr.hasChanges) { // Abandon the changes & retry closing: sr.hasChanges = false; try { sr.Close(); } catch (System.Exception ignore) { // Keep throwing original exception } } } } } }