Example #1
0
        public virtual void  TestPayloadFieldBit()
        {
            rnd = NewRandom();
            Directory       ram      = new RAMDirectory();
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            Document        d        = new Document();

            // this field won't have any payloads
            d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
            // this field will have payloads in all docs, however not for all term positions,
            // so this field is used to check if the DocumentWriter correctly enables the payloads bit
            // even if only some term positions have payloads
            d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
            d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
            // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
            // enabled in only some documents
            d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
            // only add payload data for field f2
            analyzer.SetPayloadData("f2", 1, System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1);
            writer.AddDocument(d, null);
            // flush
            writer.Close();

            SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram, null);
            FieldInfos    fi     = reader.FieldInfos();

            Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set.");
            Assert.IsFalse(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should not be set.");
            reader.Close();

            // now we add another document which has payloads for field f3 and verify if the SegmentMerger
            // enabled payloads for that field
            writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            d      = new Document();
            d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
            d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
            d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
            d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
            // add payload data for field f2 and f3
            analyzer.SetPayloadData("f2", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1);
            analyzer.SetPayloadData("f3", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 3);
            writer.AddDocument(d, null);
            // force merge
            writer.Optimize(null);
            // flush
            writer.Close();

            reader = SegmentReader.GetOnlySegmentReader(ram, null);
            fi     = reader.FieldInfos();
            Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should be set.");
            reader.Close();
        }
Example #2
0
 public virtual void  TestBadSeek()
 {
     try
     {
         //After adding the document, we should be able to read it back in
         SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir));
         Assert.IsTrue(reader != null);
         SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
         Assert.IsTrue(segTermDocs != null);
         segTermDocs.Seek(new Term("textField2", "bad"));
         Assert.IsTrue(segTermDocs.Next() == false);
         reader.Close();
     }
     catch (System.IO.IOException e)
     {
         Assert.IsTrue(false);
     }
     try
     {
         //After adding the document, we should be able to read it back in
         SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir));
         Assert.IsTrue(reader != null);
         SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
         Assert.IsTrue(segTermDocs != null);
         segTermDocs.Seek(new Term("junk", "bad"));
         Assert.IsTrue(segTermDocs.Next() == false);
         reader.Close();
     }
     catch (System.IO.IOException e)
     {
         Assert.IsTrue(false);
     }
 }
Example #3
0
        public virtual void  TestMixedMerge()
        {
            Directory   ram      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer();
            IndexWriter writer   = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMaxBufferedDocs(3);
            writer.SetMergeFactor(2);
            Document d = new Document();

            // this field will have Tf
            Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);

            d.Add(f1);

            // this field will NOT have Tf
            Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);

            f2.SetOmitTermFreqAndPositions(true);
            d.Add(f2);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
            // keep things constant
            d = new Document();

            // Reverese
            f1.SetOmitTermFreqAndPositions(true);
            d.Add(f1);

            f2.SetOmitTermFreqAndPositions(false);
            d.Add(f2);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.Optimize();
            // flush
            writer.Close();

            _TestUtil.CheckIndex(ram);

            SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram);
            FieldInfos    fi     = reader.FieldInfos();

            Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");

            reader.Close();
            ram.Close();
        }
Example #4
0
        public virtual void  TestMixedRAM()
        {
            Directory   ram      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
            IndexWriter writer   = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMaxBufferedDocs(10);
            writer.MergeFactor = 2;
            Document d = new Document();

            // this field will have Tf
            Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);

            d.Add(f1);

            // this field will NOT have Tf
            Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);

            d.Add(f2);

            for (int i = 0; i < 5; i++)
            {
                writer.AddDocument(d);
            }

            f2.OmitTermFreqAndPositions = true;

            for (int i = 0; i < 20; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.Optimize();

            // flush
            writer.Close();

            _TestUtil.CheckIndex(ram);

            SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram);
            FieldInfos    fi     = reader.FieldInfos();

            Assert.IsTrue(!fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");

            reader.Close();
            ram.Close();
        }
Example #5
0
        public virtual void  TestSegmentReaderUndeleteall()
        {
            Directory dir1 = new MockRAMDirectory();

            TestIndexReaderReopen.CreateIndex(dir1, false);
            SegmentReader origSegmentReader = SegmentReader.GetOnlySegmentReader(dir1);

            origSegmentReader.DeleteDocument(10);
            AssertDelDocsRefCountEquals(1, origSegmentReader);
            origSegmentReader.UndeleteAll();
            Assert.IsNull(origSegmentReader.deletedDocsRef_ForNUnit);
            origSegmentReader.Close();
            // need to test norms?
            dir1.Close();
        }
Example #6
0
        private void  PrintSegment(System.IO.StringWriter out_Renamed, System.String segment)
        {
            Directory     directory = FSDirectory.GetDirectory(indexDir, false);
            SegmentReader reader    = new SegmentReader(new SegmentInfo(segment, 1, directory));

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                out_Renamed.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                out_Renamed.Write(tis.Term());
                out_Renamed.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        out_Renamed.Write(" doc=" + positions.Doc());
                        out_Renamed.Write(" TF=" + positions.Freq());
                        out_Renamed.Write(" pos=");
                        out_Renamed.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            out_Renamed.Write("," + positions.NextPosition());
                        }
                        out_Renamed.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
            directory.Close();
        }
Example #7
0
        internal static void  PrintSegment(System.String segment)
        {
            Directory     directory = FSDirectory.GetDirectory("test", false);
            SegmentReader reader    = new SegmentReader(new SegmentInfo(segment, 1, directory));

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                System.Console.Out.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                System.Console.Out.Write(tis.Term());
                System.Console.Out.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        System.Console.Out.Write(" doc=" + positions.Doc());
                        System.Console.Out.Write(" TF=" + positions.Freq());
                        System.Console.Out.Write(" pos=");
                        System.Console.Out.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            System.Console.Out.Write("," + positions.NextPosition());
                        }
                        System.Console.Out.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
            directory.Close();
        }
        public virtual void  TestNormsClose()
        {
            Directory dir1 = new MockRAMDirectory();

            TestIndexReaderReopen.CreateIndex(dir1, false);
            SegmentReader reader1 = SegmentReader.GetOnlySegmentReader(dir1);

            reader1.Norms("field1");
            Norm r1norm = (Norm)reader1.norms_ForNUnit["field1"];

            SegmentReader.Ref r1BytesRef = r1norm.BytesRef();
            SegmentReader     reader2    = (SegmentReader)reader1.Clone();

            Assert.AreEqual(2, r1norm.BytesRef().RefCount());
            reader1.Close();
            Assert.AreEqual(1, r1BytesRef.RefCount());
            reader2.Norms("field1");
            reader2.Close();
            dir1.Close();
        }
        public virtual void  TestTermDocs(int indexDivisor)
        {
            //After adding the document, we should be able to read it back in
            SegmentReader reader = SegmentReader.Get(true, info, indexDivisor);

            Assert.IsTrue(reader != null);
            Assert.AreEqual(indexDivisor, reader.GetTermInfosIndexDivisor());
            SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);

            Assert.IsTrue(segTermDocs != null);
            segTermDocs.Seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "field"));
            if (segTermDocs.Next() == true)
            {
                int docId = segTermDocs.Doc();
                Assert.IsTrue(docId == 0);
                int freq = segTermDocs.Freq();
                Assert.IsTrue(freq == 3);
            }
            reader.Close();
        }
Example #10
0
        private void  PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si)
        {
            SegmentReader reader = SegmentReader.Get(true, si, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, null);

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                out_Renamed.WriteLine(reader.Document(i, null));
            }

            TermEnum tis = reader.Terms(null);

            while (tis.Next(null))
            {
                out_Renamed.Write(tis.Term);
                out_Renamed.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term, null);
                try
                {
                    while (positions.Next(null))
                    {
                        out_Renamed.Write(" doc=" + positions.Doc);
                        out_Renamed.Write(" TF=" + positions.Freq);
                        out_Renamed.Write(" pos=");
                        out_Renamed.Write(positions.NextPosition(null));
                        for (int j = 1; j < positions.Freq; j++)
                        {
                            out_Renamed.Write("," + positions.NextPosition(null));
                        }
                        out_Renamed.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
        }
Example #11
0
        private void  PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si)
        {
            SegmentReader reader = SegmentReader.Get(si);

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                out_Renamed.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                out_Renamed.Write(tis.Term());
                out_Renamed.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        out_Renamed.Write(" doc=" + positions.Doc());
                        out_Renamed.Write(" TF=" + positions.Freq());
                        out_Renamed.Write(" pos=");
                        out_Renamed.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            out_Renamed.Write("," + positions.NextPosition());
                        }
                        out_Renamed.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
        }
Example #12
0
        public virtual void  TestSegmentReaderCloseReferencing()
        {
            Directory dir1 = new MockRAMDirectory();

            TestIndexReaderReopen.CreateIndex(dir1, false);
            SegmentReader origSegmentReader = SegmentReader.GetOnlySegmentReader(dir1);

            origSegmentReader.DeleteDocument(1);
            origSegmentReader.SetNorm(4, "field1", 0.5f);

            SegmentReader clonedSegmentReader = (SegmentReader)origSegmentReader.Clone();

            AssertDelDocsRefCountEquals(2, origSegmentReader);
            origSegmentReader.Close();
            AssertDelDocsRefCountEquals(1, origSegmentReader);
            // check the norm refs
            Norm norm = clonedSegmentReader.norms_ForNUnit["field1"];

            Assert.AreEqual(1, norm.BytesRef().RefCount());
            clonedSegmentReader.Close();
            dir1.Close();
        }
 public virtual void  testBadSeek(int indexDivisor)
 {
     {
         //After adding the document, we should be able to read it back in
         SegmentReader reader = SegmentReader.Get(true, info, indexDivisor);
         Assert.IsTrue(reader != null);
         SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
         Assert.IsTrue(segTermDocs != null);
         segTermDocs.Seek(new Term("textField2", "bad"));
         Assert.IsTrue(segTermDocs.Next() == false);
         reader.Close();
     }
     {
         //After adding the document, we should be able to read it back in
         SegmentReader reader = SegmentReader.Get(true, info, indexDivisor);
         Assert.IsTrue(reader != null);
         SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
         Assert.IsTrue(segTermDocs != null);
         segTermDocs.Seek(new Term("junk", "bad"));
         Assert.IsTrue(segTermDocs.Next() == false);
         reader.Close();
     }
 }
Example #14
0
		public virtual void  TestTermDocs()
		{
			try
			{
				//After adding the document, we should be able to read it back in
				SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir));
				Assert.IsTrue(reader != null);
				SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
				Assert.IsTrue(segTermDocs != null);
				segTermDocs.Seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field"));
				if (segTermDocs.Next() == true)
				{
					int docId = segTermDocs.Doc();
					Assert.IsTrue(docId == 0);
					int freq = segTermDocs.Freq();
					Assert.IsTrue(freq == 3);
				}
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
		}
Example #15
0
 public virtual void  TestTermDocs()
 {
     try
     {
         //After adding the document, we should be able to read it back in
         SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir));
         Assert.IsTrue(reader != null);
         SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
         Assert.IsTrue(segTermDocs != null);
         segTermDocs.Seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field"));
         if (segTermDocs.Next() == true)
         {
             int docId = segTermDocs.Doc();
             Assert.IsTrue(docId == 0);
             int freq = segTermDocs.Freq();
             Assert.IsTrue(freq == 3);
         }
         reader.Close();
     }
     catch (System.IO.IOException e)
     {
         Assert.IsTrue(false);
     }
 }
Example #16
0
        /// <summary>Returns a {@link Status} instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.GetUserData();
            System.String userDataString;
            if (sis.GetUserData().Count > 0)
            {
                userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                System.Collections.IEnumerator it = onlySegments.GetEnumerator();
                while (it.MoveNext())
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + it.Current);
                    }
                }
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.GetHasProx());
                    segInfoStat.hasProx = info.GetHasProx();
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics();
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.GetDocStoreSegment());
                        segInfoStat.docStoreSegment = info.GetDocStoreSegment();
                        Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                        segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(info);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc())
                        {
                            throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc() != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new System.SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new System.SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new System.SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    System.String comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add(info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
Example #17
0
		internal static void  PrintSegment(System.String segment)
		{
			Directory directory = FSDirectory.GetDirectory("test", false);
			SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory));
			
			for (int i = 0; i < reader.NumDocs(); i++)
			{
				System.Console.Out.WriteLine(reader.Document(i));
			}
			
			TermEnum tis = reader.Terms();
			while (tis.Next())
			{
				System.Console.Out.Write(tis.Term());
				System.Console.Out.WriteLine(" DF=" + tis.DocFreq());
				
				TermPositions positions = reader.TermPositions(tis.Term());
				try
				{
					while (positions.Next())
					{
						System.Console.Out.Write(" doc=" + positions.Doc());
						System.Console.Out.Write(" TF=" + positions.Freq());
						System.Console.Out.Write(" pos=");
						System.Console.Out.Write(positions.NextPosition());
						for (int j = 1; j < positions.Freq(); j++)
							System.Console.Out.Write("," + positions.NextPosition());
						System.Console.Out.WriteLine("");
					}
				}
				finally
				{
					positions.Close();
				}
			}
			tis.Close();
			reader.Close();
			directory.Close();
		}
Example #18
0
			/// <summary> Release the segment reader (i.e. decRef it and close if there
			/// are no more references.
			/// </summary>
			/// <param name="sr">
			/// </param>
			/// <throws>  IOException </throws>
			public virtual void  Release(SegmentReader sr, bool drop)
			{
				lock (this)
				{
					
					bool pooled = readerMap.Contains(sr.GetSegmentInfo());

                    System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.GetSegmentInfo()] == sr);

                    // Drop caller's ref; for an external reader (not
                    // pooled), this decRef will close it
					sr.DecRef();
					
					if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.GetRefCount() == 1)))
					{

                        // We invoke deleter.checkpoint below, so we must be
                        // sync'd on IW if there are changes:
						
						// TODO: java 5
						// assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);

                        // Discard (don't save) changes when we are dropping
                        // the reader; this is used only on the sub-readers
                        // after a successful merge.
                        sr.hasChanges &= !drop;

                        bool hasChanges = sr.hasChanges;
						
						// Drop our ref -- this will commit any pending
						// changes to the dir
                        sr.Close();

                        // We are the last ref to this reader; since we're
                        // not pooling readers, we release it:
                        readerMap.Remove(sr.GetSegmentInfo());

                        if (hasChanges)
                        {
                            // Must checkpoint w/ deleter, because this
                            // segment reader will have created new _X_N.del
                            // file.
                            enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false);
                        }
					}
				}
			}
Example #19
0
        /// <summary>Returns true if index is clean, else false.</summary>
        public static bool Check(Directory dir, bool doFix)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis = new SegmentInfos();

            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not read any segments file in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not open segments file in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not read segment file version in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                skip    = true;
            }
            else
            {
                sFormat = format + " [Lucene 1.3 or prior]";
            }

            out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);

            if (skip)
            {
                out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                return(false);
            }

            SegmentInfos newSIS = (SegmentInfos)sis.Clone();

            newSIS.Clear();
            bool changed         = false;
            int  totLoseDocCount = 0;
            int  numBadSegments  = 0;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                out_Renamed.WriteLine("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    out_Renamed.WriteLine("    compound=" + info.GetUseCompoundFile());
                    out_Renamed.WriteLine("    numFiles=" + info.Files().Count);
                    out_Renamed.WriteLine(String.Format(nf, "    size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        out_Renamed.WriteLine("    docStoreOffset=" + docStoreOffset);
                        out_Renamed.WriteLine("    docStoreSegment=" + info.GetDocStoreSegment());
                        out_Renamed.WriteLine("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        out_Renamed.WriteLine("    no deletions");
                    }
                    else
                    {
                        out_Renamed.WriteLine("    has deletions [delFileName=" + delFileName + "]");
                    }
                    out_Renamed.Write("    test: open reader.........");
                    reader = SegmentReader.Get(info);
                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]");
                    }
                    else
                    {
                        out_Renamed.WriteLine("OK");
                    }

                    out_Renamed.Write("    test: fields, norms.......");
                    System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    System.Collections.IEnumerator it         = fieldNames.Keys.GetEnumerator();
                    while (it.MoveNext())
                    {
                        System.String fieldName = (System.String)it.Current;
                        byte[]        b         = reader.Norms(fieldName);
                        if (b.Length != info.docCount)
                        {
                            throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount);
                        }
                    }
                    out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]");

                    out_Renamed.Write("    test: terms, freq, prox...");
                    TermEnum      termEnum      = reader.Terms();
                    TermPositions termPositions = reader.TermPositions();

                    // Used only to count up # deleted docs for this
                    // term
                    MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);

                    long termCount = 0;
                    long totFreq   = 0;
                    long totPos    = 0;
                    while (termEnum.Next())
                    {
                        termCount++;
                        Term term    = termEnum.Term();
                        int  docFreq = termEnum.DocFreq();
                        termPositions.Seek(term);
                        int lastDoc = -1;
                        int freq0   = 0;
                        totFreq += docFreq;
                        while (termPositions.Next())
                        {
                            freq0++;
                            int doc  = termPositions.Doc();
                            int freq = termPositions.Freq();
                            if (doc <= lastDoc)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc);
                            }
                            lastDoc = doc;
                            if (freq <= 0)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                            }

                            int lastPos = -1;
                            totPos += freq;
                            for (int j = 0; j < freq; j++)
                            {
                                int pos = termPositions.NextPosition();
                                if (pos < 0)
                                {
                                    throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                                }
                                if (pos <= lastPos)
                                {
                                    throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                                }
                            }
                        }

                        // Now count how many deleted docs occurred in
                        // this term:
                        int delCount;
                        if (reader.HasDeletions())
                        {
                            myTermDocs.Seek(term);
                            while (myTermDocs.Next())
                            {
                            }
                            delCount = myTermDocs.delCount;
                        }
                        else
                        {
                            delCount = 0;
                        }

                        if (freq0 + delCount != docFreq)
                        {
                            throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                        }
                    }

                    out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");

                    out_Renamed.Write("    test: stored fields.......");
                    int  docCount  = 0;
                    long totFields = 0;
                    for (int j = 0; j < info.docCount; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            docCount++;
                            Document doc = reader.Document(j);
                            totFields += doc.GetFields().Count;
                        }
                    }

                    if (docCount != reader.NumDocs())
                    {
                        throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
                    }

                    out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) }));

                    out_Renamed.Write("    test: term vectors........");
                    int totVectors = 0;
                    for (int j = 0; j < info.docCount; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
                            if (tfv != null)
                            {
                                totVectors += tfv.Length;
                            }
                        }
                    }

                    out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) }));
                    out_Renamed.WriteLine("");
                }
                catch (System.Exception t)
                {
                    out_Renamed.WriteLine("FAILED");
                    System.String comment;
                    if (doFix)
                    {
                        comment = "will remove reference to this segment (-fix is specified)";
                    }
                    else
                    {
                        comment = "would remove reference to this segment (-fix was not specified)";
                    }
                    out_Renamed.WriteLine("    WARNING: " + comment + "; full exception:");
                    out_Renamed.Write(t.StackTrace);
                    out_Renamed.Flush();
                    out_Renamed.WriteLine("");
                    totLoseDocCount += toLoseDocCount;
                    numBadSegments++;
                    changed = true;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                newSIS.Add(info.Clone());
            }

            if (!changed)
            {
                out_Renamed.WriteLine("No problems were detected with this index.\n");
                return(true);
            }
            else
            {
                out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected");
                if (doFix)
                {
                    out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost");
                }
                else
                {
                    out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified");
                }
                out_Renamed.WriteLine();
            }

            if (doFix)
            {
                out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
                for (int i = 0; i < 5; i++)
                {
                    try
                    {
                        System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
                    }
                    catch (System.Threading.ThreadInterruptedException)
                    {
                        SupportClass.ThreadClass.Current().Interrupt();
                        i--;
                        continue;
                    }

                    out_Renamed.WriteLine("  " + (5 - i) + "...");
                }
                out_Renamed.Write("Writing...");
                try
                {
                    newSIS.Write(dir);
                }
                catch (System.Exception t)
                {
                    out_Renamed.WriteLine("FAILED; exiting");
                    out_Renamed.Write(t.StackTrace);
                    out_Renamed.Flush();
                    return(false);
                }
                out_Renamed.WriteLine("OK");
                out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\"");
            }
            else
            {
                out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]");
            }
            out_Renamed.WriteLine("");

            return(false);
        }
Example #20
0
		public virtual void  TestBadSeek()
		{
			try
			{
				//After adding the document, we should be able to read it back in
				SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir));
				Assert.IsTrue(reader != null);
				SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
				Assert.IsTrue(segTermDocs != null);
				segTermDocs.Seek(new Term("textField2", "bad"));
				Assert.IsTrue(segTermDocs.Next() == false);
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
			try
			{
				//After adding the document, we should be able to read it back in
				SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir));
				Assert.IsTrue(reader != null);
				SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
				Assert.IsTrue(segTermDocs != null);
				segTermDocs.Seek(new Term("junk", "bad"));
				Assert.IsTrue(segTermDocs.Next() == false);
				reader.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
		}
Example #21
0
		private void  PrintSegment(System.IO.StringWriter out_Renamed, System.String segment)
		{
			Directory directory = FSDirectory.GetDirectory(indexDir, false);
			SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory));
			
			for (int i = 0; i < reader.NumDocs(); i++)
			{
				out_Renamed.WriteLine(reader.Document(i));
			}
			
			TermEnum tis = reader.Terms();
			while (tis.Next())
			{
				out_Renamed.Write(tis.Term());
				out_Renamed.WriteLine(" DF=" + tis.DocFreq());
				
				TermPositions positions = reader.TermPositions(tis.Term());
				try
				{
					while (positions.Next())
					{
						out_Renamed.Write(" doc=" + positions.Doc());
						out_Renamed.Write(" TF=" + positions.Freq());
                        out_Renamed.Write(" pos=");
                        out_Renamed.Write(positions.NextPosition());
						for (int j = 1; j < positions.Freq(); j++)
							out_Renamed.Write("," + positions.NextPosition());
						out_Renamed.WriteLine("");
					}
				}
				finally
				{
					positions.Close();
				}
			}
			tis.Close();
			reader.Close();
			directory.Close();
		}
Example #22
0
            /// <summary> Release the segment reader (i.e. decRef it and close if there
            /// are no more references.
            /// </summary>
            /// <param name="sr">
            /// </param>
            /// <throws>  IOException </throws>
            public virtual void Release(SegmentReader sr, bool drop)
            {
                lock (this)
                {

                    bool pooled = readerMap.Contains(sr.GetSegmentInfo());

                    System.Diagnostics.Debug.Assert(!pooled | readerMap[sr.GetSegmentInfo()] == sr);

                    // Drop caller's ref
                    sr.DecRef();

                    if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.GetRefCount() == 1)))
                    {

                        // We are the last ref to this reader; since we're
                        // not pooling readers, we release it:
                        readerMap.Remove(sr.GetSegmentInfo());

                        // TODO: java 5
                        // assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);

                        // Drop our ref -- this will commit any pending
                        // changes to the dir
                        bool success = false;
                        try
                        {
                            sr.Close();
                            success = true;
                        }
                        finally
                        {
                            if (!success && sr.hasChanges)
                            {
                                // Abandon the changes & retry closing:
                                sr.hasChanges = false;
                                try
                                {
                                    sr.Close();
                                }
                                catch (System.Exception ignore)
                                {
                                    // Keep throwing original exception
                                }
                            }
                        }
                    }
                }
            }