Ejemplo n.º 1
0
        private void  VerifyDocFreq()
        {
            IndexReader reader   = IndexReader.Open(dir);
            TermEnum    termEnum = null;

            // create enumeration of all terms
            termEnum = reader.Terms();
            // go to the first term (aaa)
            termEnum.Next();
            // assert that term is 'aaa'
            Assert.AreEqual("aaa", termEnum.Term().Text());
            Assert.AreEqual(200, termEnum.DocFreq());
            // go to the second term (bbb)
            termEnum.Next();
            // assert that term is 'bbb'
            Assert.AreEqual("bbb", termEnum.Term().Text());
            Assert.AreEqual(100, termEnum.DocFreq());

            termEnum.Close();


            // create enumeration of terms after term 'aaa', including 'aaa'
            termEnum = reader.Terms(new Term("content", "aaa"));
            // assert that term is 'aaa'
            Assert.AreEqual("aaa", termEnum.Term().Text());
            Assert.AreEqual(200, termEnum.DocFreq());
            // go to term 'bbb'
            termEnum.Next();
            // assert that term is 'bbb'
            Assert.AreEqual("bbb", termEnum.Term().Text());
            Assert.AreEqual(100, termEnum.DocFreq());

            termEnum.Close();
        }
Ejemplo n.º 2
0
            public override bool Next()
            {
                if (field == null)
                {
                    return(false);
                }

                bool next = termEnum.Next();

                // still within field?
                if (next && (System.Object)termEnum.Term().Field() == (System.Object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field, if any
                field = ((System.String)SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).GetKey(0));
                if (field != null)
                {
                    termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms();
                    return(true);
                }

                return(false);                // no more fields
            }
Ejemplo n.º 3
0
            public override bool Next()
            {
                if (termEnum == null)
                {
                    return(false);
                }

                // another term in this field?
                if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    System.Collections.Comparer   comparer = System.Collections.Comparer.Default;
                    System.Collections.SortedList newList  = new System.Collections.SortedList();
                    if (Enclosing_Instance.fieldToReader != null)
                    {
                        if (Enclosing_Instance.fieldToReader.Count > 0)
                        {
                            int index = 0;
                            while (comparer.Compare(Enclosing_Instance.fieldToReader.GetKey(index), field) < 0)
                            {
                                index++;
                            }
                            for (; index < Enclosing_Instance.fieldToReader.Count; index++)
                            {
                                newList.Add(Enclosing_Instance.fieldToReader.GetKey(index), Enclosing_Instance.fieldToReader[Enclosing_Instance.fieldToReader.GetKey(index)]);
                            }
                        }
                    }

                    fieldIterator = newList.Keys.GetEnumerator();
                    fieldIterator.MoveNext();
                    System.Object generatedAux = fieldIterator.Current;                     // Skip field to get next one
                }
                while (fieldIterator.MoveNext())
                {
                    field    = ((System.String)fieldIterator.Current);
                    termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(new Term(field));
                    Term term = termEnum.Term();
                    if (term != null && (System.Object)term.Field() == (System.Object)field)
                    {
                        return(true);
                    }
                    else
                    {
                        termEnum.Close();
                    }
                }

                return(false);                // no more fields
            }
Ejemplo n.º 4
0
            public override bool Next()
            {
                if (termEnum == null)
                {
                    return(false);
                }

                // another term in this field?
                if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    List <string> tmpList = new List <string>();
                    bool          m       = false;
                    //JAVA: fieldIterator = fieldToReader.tailMap(field).keySet().iterator();
                    //JAVA: fieldIterator.next();  // Skip field to get next one
                    foreach (string key in Enclosing_Instance.fieldToReader.Keys)
                    {
                        if (key == field && m == false)
                        {
                            m = true;
                        }
                        if (m)
                        {
                            tmpList.Add(key);
                        }
                    }
                    fieldIterator = tmpList.GetEnumerator();
                }
                while (fieldIterator.MoveNext())
                {
                    field    = fieldIterator.Current;
                    termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
                    Term term = termEnum.Term();
                    if (term != null && (System.Object)term.Field() == (System.Object)field)
                    {
                        return(true);
                    }
                    else
                    {
                        termEnum.Close();
                    }
                }

                return(false);                // no more fields
            }
Ejemplo n.º 5
0
        public virtual void  TestTerms()
        {
            TermEnum terms = reader.Terms();

            Assert.IsTrue(terms != null);
            while (terms.Next() == true)
            {
                Term term = terms.Term();
                Assert.IsTrue(term != null);
                //System.out.println("Term: " + term);
                System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()];
                Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1);
            }

            TermDocs termDocs = reader.TermDocs();

            Assert.IsTrue(termDocs != null);
            termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
            Assert.IsTrue(termDocs.Next() == true);

            termDocs.Seek(new Term(DocHelper.NO_NORMS_KEY, DocHelper.NO_NORMS_TEXT));
            Assert.IsTrue(termDocs.Next() == true);


            TermPositions positions = reader.TermPositions();

            positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
            Assert.IsTrue(positions != null);
            Assert.IsTrue(positions.Doc() == 0);
            Assert.IsTrue(positions.NextPosition() >= 0);
        }
Ejemplo n.º 6
0
            /// <summary>
            /// Advance to the next item.
            /// </summary>
            /// <returns></returns>
            public bool MoveNext()
            {
                if (this.isFirst)
                {
                    this.isFirst = false;
                }
                else
                {
                    if (!this.termEnum.Next())
                    {
                        return(false);
                    }
                }

                this.currentTerm = termEnum.Term();
                if (this.currentTerm == null || (!this.currentTerm.Field().Equals(this.fieldName)))
                {
                    return(false);
                }

                if (this.enclosing.TryParse(this.currentTerm.Text()))
                {
                    if (this.termDocs != null)
                    {
                        this.termDocs.Seek(this.termEnum);
                    }
                    return(true);
                }

                return(false);
            }
Ejemplo n.º 7
0
        public virtual void  TestTerms()
        {
            try
            {
                TermEnum terms = reader.Terms();
                Assert.IsTrue(terms != null);
                while (terms.Next() == true)
                {
                    Term term = terms.Term();
                    Assert.IsTrue(term != null);
                    //System.out.println("Term: " + term);
                    System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()];
                    Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1);
                }

                TermDocs termDocs = reader.TermDocs();
                Assert.IsTrue(termDocs != null);
                termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field"));
                Assert.IsTrue(termDocs.Next() == true);

                TermPositions positions = reader.TermPositions();
                positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field"));
                Assert.IsTrue(positions != null);
                Assert.IsTrue(positions.Doc() == 0);
                Assert.IsTrue(positions.NextPosition() >= 0);
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
Ejemplo n.º 8
0
        private int[] docMap;           // use getDocMap()

        internal SegmentMergeInfo(int b, TermEnum te, IndexReader r)
        {
            base_Renamed = b;
            reader       = r;
            termEnum     = te;
            term         = te.Term();
        }
Ejemplo n.º 9
0
        private TermPositions postings; // use getPositions()

        #endregion Fields

        #region Constructors

        internal SegmentMergeInfo(int b, TermEnum te, IndexReader r)
        {
            base_Renamed = b;
            reader = r;
            termEnum = te;
            term = te.Term();
        }
        public virtual void  TestFilterIndexReader_Renamed()
        {
            RAMDirectory directory = new MockRAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Document d1 = new Document();

            d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d1);

            Document d2 = new Document();

            d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d2);

            Document d3 = new Document();

            d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d3);

            writer.Close();

            IndexReader reader = new TestReader(IndexReader.Open(directory));

            Assert.IsTrue(reader.IsOptimized());

            TermEnum terms = reader.Terms();

            while (terms.Next())
            {
                Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1);
            }
            terms.Close();

            TermPositions positions = reader.TermPositions(new Term("default", "one"));

            while (positions.Next())
            {
                Assert.IsTrue((positions.Doc() % 2) == 1);
            }

            int NUM_DOCS = 3;

            TermDocs td = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
            directory.Close();
        }
Ejemplo n.º 11
0
        private void  PrintSegment(System.IO.StringWriter out_Renamed, System.String segment)
        {
            Directory     directory = FSDirectory.GetDirectory(indexDir, false);
            SegmentReader reader    = new SegmentReader(new SegmentInfo(segment, 1, directory));

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                out_Renamed.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                out_Renamed.Write(tis.Term());
                out_Renamed.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        out_Renamed.Write(" doc=" + positions.Doc());
                        out_Renamed.Write(" TF=" + positions.Freq());
                        out_Renamed.Write(" pos=");
                        out_Renamed.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            out_Renamed.Write("," + positions.NextPosition());
                        }
                        out_Renamed.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
            directory.Close();
        }
Ejemplo n.º 12
0
        internal static void  PrintSegment(System.String segment)
        {
            Directory     directory = FSDirectory.GetDirectory("test", false);
            SegmentReader reader    = new SegmentReader(new SegmentInfo(segment, 1, directory));

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                System.Console.Out.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                System.Console.Out.Write(tis.Term());
                System.Console.Out.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        System.Console.Out.Write(" doc=" + positions.Doc());
                        System.Console.Out.Write(" TF=" + positions.Freq());
                        System.Console.Out.Write(" pos=");
                        System.Console.Out.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            System.Console.Out.Write("," + positions.NextPosition());
                        }
                        System.Console.Out.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
            directory.Close();
        }
Ejemplo n.º 13
0
        private void  PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si)
        {
            SegmentReader reader = SegmentReader.Get(si);

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                out_Renamed.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                out_Renamed.Write(tis.Term());
                out_Renamed.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        out_Renamed.Write(" doc=" + positions.Doc());
                        out_Renamed.Write(" TF=" + positions.Freq());
                        out_Renamed.Write(" pos=");
                        out_Renamed.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            out_Renamed.Write("," + positions.NextPosition());
                        }
                        out_Renamed.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
        }
Ejemplo n.º 14
0
 internal bool Next()
 {
     if (termEnum.Next())
     {
         term = termEnum.Term();
         return(true);
     }
     else
     {
         term = null;
         return(false);
     }
 }
Ejemplo n.º 15
0
		public virtual void  Seek(TermEnum termEnum)
		{
			TermInfo ti;
			
			// use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
			if (termEnum is SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.fieldInfos)
			// optimized case
				ti = ((SegmentTermEnum) termEnum).TermInfo();
			// punt case
			else
				ti = parent.tis.Get(termEnum.Term());
			
			Seek(ti);
		}
Ejemplo n.º 16
0
            public override bool Next()
            {
                if (termEnum == null)
                {
                    return(false);
                }

                // another term in this field?
                if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    fieldIterator = SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator();
                    fieldIterator.MoveNext();                     // Skip field to get next one
                }
                while (fieldIterator.MoveNext())
                {
                    field    = ((System.String)fieldIterator.Current);
                    termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(new Term(field, ""));
                    Term term = termEnum.Term();
                    if (term != null && (System.Object)term.Field() == (System.Object)field)
                    {
                        return(true);
                    }
                    else
                    {
                        termEnum.Close();
                    }
                }

                return(false);                // no more fields
            }
Ejemplo n.º 17
0
            public override bool Next()
            {
                if (termEnum == null)
                {
                    return(false);
                }

                // another term in this field?
                if (termEnum.Next() && (object)termEnum.Term().Field() == (object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    fieldIterator = SupportClass.CollectionsSupport.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator();
                }
                while (fieldIterator.MoveNext())
                {
                    field    = fieldIterator.Current;
                    termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
                    Term term = termEnum.Term();
                    if (term != null && (object)term.Field() == (object)field)
                    {
                        return(true);
                    }
                    else
                    {
                        termEnum.Close();
                    }
                }

                return(false);                // no more fields
            }
Ejemplo n.º 18
0
        public virtual void  TestThreadSafety()
        {
            rnd = NewRandom();
            int           numThreads = 5;
            int           numDocs    = 50;
            ByteArrayPool pool       = new ByteArrayPool(numThreads, 5);

            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            System.String field = "test";

            SupportClass.ThreadClass[] ingesters = new SupportClass.ThreadClass[numThreads];
            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i] = new AnonymousClassThread(numDocs, field, pool, writer, this);
                ingesters[i].Start();
            }

            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i].Join();
            }
            writer.Close();
            IndexReader reader = IndexReader.Open(dir);
            TermEnum    terms  = reader.Terms();

            while (terms.Next())
            {
                TermPositions tp = reader.TermPositions(terms.Term());
                while (tp.Next())
                {
                    int freq = tp.Freq();
                    for (int i = 0; i < freq; i++)
                    {
                        tp.NextPosition();
                        Assert.AreEqual(pool.BytesToString(tp.GetPayload(new byte[5], 0)), terms.Term().text_ForNUnit);
                    }
                }
                tp.Close();
            }
            terms.Close();
            reader.Close();

            Assert.AreEqual(pool.Size(), numThreads);
        }
Ejemplo n.º 19
0
        public virtual void  Seek(TermEnum termEnum)
        {
            TermInfo ti;

            // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
            if (termEnum is SegmentTermEnum && ((SegmentTermEnum)termEnum).fieldInfos == parent.fieldInfos)
            {
                // optimized case
                ti = ((SegmentTermEnum)termEnum).TermInfo();
            }
            // punt case
            else
            {
                ti = parent.tis.Get(termEnum.Term());
            }

            Seek(ti);
        }
Ejemplo n.º 20
0
        public virtual void  TestFilterIndexReader_()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true);

            Document d1 = new Document();

            d1.Add(Field.Text("default", "one two"));
            writer.AddDocument(d1);

            Document d2 = new Document();

            d2.Add(Field.Text("default", "one three"));
            writer.AddDocument(d2);

            Document d3 = new Document();

            d3.Add(Field.Text("default", "two four"));
            writer.AddDocument(d3);

            writer.Close();

            IndexReader reader = new TestReader(IndexReader.Open(directory));

            TermEnum terms = reader.Terms();

            while (terms.Next())
            {
                Assert.IsTrue(terms.Term().Text().IndexOf((System.Char) 'e') != -1);
            }
            terms.Close();

            TermPositions positions = reader.TermPositions(new Term("default", "one"));

            while (positions.Next())
            {
                Assert.IsTrue((positions.Doc() % 2) == 1);
            }

            reader.Close();
        }
Ejemplo n.º 21
0
        public virtual void  Seek(TermEnum termEnum)
        {
            TermInfo ti;
            Term     term;

            // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
            if (termEnum is SegmentTermEnum && ((SegmentTermEnum)termEnum).fieldInfos == parent.core.fieldInfos)
            {
                // optimized case
                SegmentTermEnum segmentTermEnum = ((SegmentTermEnum)termEnum);
                term = segmentTermEnum.Term();
                ti   = segmentTermEnum.TermInfo();
            }
            else
            {
                // punt case
                term = termEnum.Term();
                ti   = parent.core.GetTermsReader().Get(term);
            }

            Seek(ti, term);
        }
Ejemplo n.º 22
0
		public virtual void  Seek(TermEnum termEnum)
		{
			TermInfo ti;
			Term term;
			
			// use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
			if (termEnum is SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.core.fieldInfos)
			{
				// optimized case
				SegmentTermEnum segmentTermEnum = ((SegmentTermEnum) termEnum);
				term = segmentTermEnum.Term();
				ti = segmentTermEnum.TermInfo();
			}
			else
			{
				// punt case
				term = termEnum.Term();
				ti = parent.core.GetTermsReader().Get(term);
			}
			
			Seek(ti, term);
		}
Ejemplo n.º 23
0
			public override bool Next()
			{
				if (termEnum == null)
					return false;
				
				// another term in this field?
				if (termEnum.Next() && (System.Object) termEnum.Term().Field() == (System.Object) field)
					return true; // yes, keep going
				
				termEnum.Close(); // close old termEnum
				
				// find the next field with terms, if any
				if (fieldIterator == null)
				{
                    List<string> tmpList = new List<string>();
                    bool m = false;
                    //JAVA: fieldIterator = fieldToReader.tailMap(field).keySet().iterator();
                    //JAVA: fieldIterator.next();  // Skip field to get next one
                    foreach (string key in Enclosing_Instance.fieldToReader.Keys)
                    {
                        if (key == field && m==false) m = true;
                        if (m) tmpList.Add(key);
                    }
                    fieldIterator = tmpList.GetEnumerator();
				}
				while (fieldIterator.MoveNext())
				{
					field =  fieldIterator.Current;
					termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
					Term term = termEnum.Term();
					if (term != null && (System.Object) term.Field() == (System.Object) field)
						return true;
					else
						termEnum.Close();
				}
				
				return false; // no more fields
			}
Ejemplo n.º 24
0
 public virtual void  Seek(TermEnum termEnum)
 {
     Seek(termEnum.Term());
 }
Ejemplo n.º 25
0
            public override bool Next()
            {
                if (termEnum == null)
                    return false;

                // another term in this field?
                if (termEnum.Next() && (object) termEnum.Term().Field() == (object) field)
                    return true; // yes, keep going

                termEnum.Close(); // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    fieldIterator = SupportClass.CollectionsSupport.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator();
                }
                while (fieldIterator.MoveNext())
                {
                    field = fieldIterator.Current;
                    termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
                    Term term = termEnum.Term();
                    if (term != null && (object) term.Field() == (object) field)
                        return true;
                    else
                        termEnum.Close();
                }

                return false; // no more fields
            }
Ejemplo n.º 26
0
			public override bool Next()
			{
				if (termEnum == null)
					return false;
				
				// another term in this field?
				if (termEnum.Next() && (System.Object) termEnum.Term().Field() == (System.Object) field)
					return true; // yes, keep going
				
				termEnum.Close(); // close old termEnum
				
				// find the next field with terms, if any
				if (fieldIterator == null)
				{
					fieldIterator = SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator();
                    fieldIterator.MoveNext();                     // Skip field to get next one
				}
				while (fieldIterator.MoveNext())
				{
					field = ((System.String) fieldIterator.Current);
					termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms(new Term(field, ""));
					Term term = termEnum.Term();
					if (term != null && (System.Object) term.Field() == (System.Object) field)
						return true;
					else
						termEnum.Close();
				}
				
				return false; // no more fields
			}
        public virtual void  Test1()
        {
            ParallelReader pr = new ParallelReader();

            pr.Add(ir1);
            pr.Add(ir2);

            TermDocs td = pr.TermDocs();

            TermEnum te = pr.Terms();

            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:brown", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:fox", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:jumps", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:quick", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:the", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:brown", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:fox", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:jumps", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:quick", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:the", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:dog", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:fox", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:jumps", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:lazy", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:over", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:the", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsFalse(te.Next());
        }
Ejemplo n.º 28
0
			public virtual void  Seek(TermEnum termEnum)
			{
				Seek(termEnum.Term());
			}
Ejemplo n.º 29
0
			public override bool Next()
			{
				if (termEnum == null)
					return false;
				
				// another term in this field?
				if (termEnum.Next() && (System.Object) termEnum.Term().Field() == (System.Object) field)
					return true; // yes, keep going
				
				termEnum.Close(); // close old termEnum
				
				// find the next field with terms, if any
				if (fieldIterator == null)
				{
                    System.Collections.Comparer comparer = System.Collections.Comparer.Default;
                    System.Collections.SortedList newList = new System.Collections.SortedList();
                    if (Enclosing_Instance.fieldToReader != null)
                    {
                        if (Enclosing_Instance.fieldToReader.Count > 0)
                        {
                            int index = 0;
                            while (comparer.Compare(Enclosing_Instance.fieldToReader.GetKey(index), field) < 0)
                                index++;
                            for (; index < Enclosing_Instance.fieldToReader.Count; index++)
                            {
                                newList.Add(Enclosing_Instance.fieldToReader.GetKey(index), Enclosing_Instance.fieldToReader[Enclosing_Instance.fieldToReader.GetKey(index)]);
                            }
                        }
                    }

                    fieldIterator = newList.Keys.GetEnumerator();
                    fieldIterator.MoveNext();
					System.Object generatedAux = fieldIterator.Current; // Skip field to get next one
				}
				while (fieldIterator.MoveNext())
				{
					field = ((System.String) fieldIterator.Current);
					termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms(new Term(field));
					Term term = termEnum.Term();
					if (term != null && (System.Object) term.Field() == (System.Object) field)
						return true;
					else
						termEnum.Close();
				}
				
				return false; // no more fields
			}
Ejemplo n.º 30
0
 public override Term Term()
 {
     return(in_Renamed.Term());
 }
Ejemplo n.º 31
0
        // FIXME: OG: remove hard-coded file names
        public static void  Test()
        {
            System.IO.FileInfo file = new System.IO.FileInfo("words.txt");
            System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes");

            System.DateTime start = System.DateTime.Now;

            System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
            System.IO.FileStream         ws   = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
            System.IO.StreamReader       wr   = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding);

            for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine())
            {
                keys.Add(new Term("word", key));
            }
            wr.Close();

            System.DateTime end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words");

            start = System.DateTime.Now;

            System.Random gen = new System.Random((System.Int32) 1251971);
            long          fp  = (gen.Next() & 0xF) + 1;
            long          pp  = (gen.Next() & 0xF) + 1;

            int[]  docFreqs     = new int[keys.Count];
            long[] freqPointers = new long[keys.Count];
            long[] proxPointers = new long[keys.Count];
            for (int i = 0; i < keys.Count; i++)
            {
                docFreqs[i]     = (gen.Next() & 0xF) + 1;
                freqPointers[i] = fp;
                proxPointers[i] = pp;
                fp += (gen.Next() & 0xF) + 1;
                ;
                pp += (gen.Next() & 0xF) + 1;
                ;
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to generate values");

            start = System.DateTime.Now;

            Directory  store = FSDirectory.GetDirectory("test.store", true);
            FieldInfos fis   = new FieldInfos();

            TermInfosWriter writer = new TermInfosWriter(store, "words", fis);

            fis.Add("word", false);

            for (int i = 0; i < keys.Count; i++)
            {
                writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i]));
            }

            writer.Close();

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to write table");

            System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes");

            start = System.DateTime.Now;

            TermInfosReader reader = new TermInfosReader(store, "words", fis);

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to open table");

            start = System.DateTime.Now;

            SegmentTermEnum enumerator = reader.Terms();

            for (int i = 0; i < keys.Count; i++)
            {
                enumerator.Next();
                Term key = (Term)keys[i];
                if (!key.Equals(enumerator.Term()))
                {
                    throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i);
                }
                TermInfo ti = enumerator.TermInfo();
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words");

            start = System.DateTime.Now;

            for (int i = 0; i < keys.Count; i++)
            {
                Term     key = (Term)keys[i];
                TermInfo ti  = reader.Get(key);
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count);
            System.Console.Out.WriteLine(" average milliseconds per lookup");

            TermEnum e = reader.Terms(new Term("word", "azz"));

            System.Console.Out.WriteLine("Word after azz is " + e.Term().text);

            reader.Close();

            store.Close();
        }
Ejemplo n.º 32
0
			public virtual void  Seek(TermEnum termEnum)
			{
				Seek(termEnum.Term());
				if (termEnum is MultiTermEnum)
				{
					tenum = (MultiTermEnum) termEnum;
					if (topReader != tenum.topReader)
						tenum = null;
				}
			}
Ejemplo n.º 33
0
        /// <summary> Test the term index.</summary>
        private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
        {
            Status.TermIndexStatus status = new Status.TermIndexStatus();

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: terms, freq, prox...");
                }

                TermEnum      termEnum      = reader.Terms();
                TermPositions termPositions = reader.TermPositions();

                // Used only to count up # deleted docs for this term
                MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);

                int maxDoc = reader.MaxDoc();

                while (termEnum.Next())
                {
                    status.termCount++;
                    Term term    = termEnum.Term();
                    int  docFreq = termEnum.DocFreq();
                    termPositions.Seek(term);
                    int lastDoc = -1;
                    int freq0   = 0;
                    status.totFreq += docFreq;
                    while (termPositions.Next())
                    {
                        freq0++;
                        int doc  = termPositions.Doc();
                        int freq = termPositions.Freq();
                        if (doc <= lastDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                        }
                        if (doc >= maxDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
                        }

                        lastDoc = doc;
                        if (freq <= 0)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                        }

                        int lastPos = -1;
                        status.totPos += freq;
                        for (int j = 0; j < freq; j++)
                        {
                            int pos = termPositions.NextPosition();
                            if (pos < -1)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                            }
                            if (pos < lastPos)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                            }
                        }
                    }

                    // Now count how many deleted docs occurred in
                    // this term:
                    int delCount;
                    if (reader.HasDeletions())
                    {
                        myTermDocs.Seek(term);
                        while (myTermDocs.Next())
                        {
                        }
                        delCount = myTermDocs.delCount;
                    }
                    else
                    {
                        delCount = 0;
                    }

                    if (freq0 + delCount != docFreq)
                    {
                        throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                    }
                }

                Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
            }
            catch (System.Exception e)
            {
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return(status);
        }
Ejemplo n.º 34
0
        /// <summary>Returns true if index is clean, else false.</summary>
        public static bool Check(Directory dir, bool doFix)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis = new SegmentInfos();

            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not read any segments file in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not open segments file in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not read segment file version in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                skip    = true;
            }
            else
            {
                sFormat = format + " [Lucene 1.3 or prior]";
            }

            out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);

            if (skip)
            {
                out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                return(false);
            }

            SegmentInfos newSIS = (SegmentInfos)sis.Clone();

            newSIS.Clear();
            bool changed         = false;
            int  totLoseDocCount = 0;
            int  numBadSegments  = 0;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                out_Renamed.WriteLine("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    out_Renamed.WriteLine("    compound=" + info.GetUseCompoundFile());
                    out_Renamed.WriteLine("    numFiles=" + info.Files().Count);
                    out_Renamed.WriteLine(String.Format(nf, "    size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        out_Renamed.WriteLine("    docStoreOffset=" + docStoreOffset);
                        out_Renamed.WriteLine("    docStoreSegment=" + info.GetDocStoreSegment());
                        out_Renamed.WriteLine("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        out_Renamed.WriteLine("    no deletions");
                    }
                    else
                    {
                        out_Renamed.WriteLine("    has deletions [delFileName=" + delFileName + "]");
                    }
                    out_Renamed.Write("    test: open reader.........");
                    reader = SegmentReader.Get(info);
                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]");
                    }
                    else
                    {
                        out_Renamed.WriteLine("OK");
                    }

                    out_Renamed.Write("    test: fields, norms.......");
                    System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    System.Collections.IEnumerator it         = fieldNames.Keys.GetEnumerator();
                    while (it.MoveNext())
                    {
                        System.String fieldName = (System.String)it.Current;
                        byte[]        b         = reader.Norms(fieldName);
                        if (b.Length != info.docCount)
                        {
                            throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount);
                        }
                    }
                    out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]");

                    out_Renamed.Write("    test: terms, freq, prox...");
                    TermEnum      termEnum      = reader.Terms();
                    TermPositions termPositions = reader.TermPositions();

                    // Used only to count up # deleted docs for this
                    // term
                    MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);

                    long termCount = 0;
                    long totFreq   = 0;
                    long totPos    = 0;
                    while (termEnum.Next())
                    {
                        termCount++;
                        Term term    = termEnum.Term();
                        int  docFreq = termEnum.DocFreq();
                        termPositions.Seek(term);
                        int lastDoc = -1;
                        int freq0   = 0;
                        totFreq += docFreq;
                        while (termPositions.Next())
                        {
                            freq0++;
                            int doc  = termPositions.Doc();
                            int freq = termPositions.Freq();
                            if (doc <= lastDoc)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc);
                            }
                            lastDoc = doc;
                            if (freq <= 0)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                            }

                            int lastPos = -1;
                            totPos += freq;
                            for (int j = 0; j < freq; j++)
                            {
                                int pos = termPositions.NextPosition();
                                if (pos < 0)
                                {
                                    throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                                }
                                if (pos <= lastPos)
                                {
                                    throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                                }
                            }
                        }

                        // Now count how many deleted docs occurred in
                        // this term:
                        int delCount;
                        if (reader.HasDeletions())
                        {
                            myTermDocs.Seek(term);
                            while (myTermDocs.Next())
                            {
                            }
                            delCount = myTermDocs.delCount;
                        }
                        else
                        {
                            delCount = 0;
                        }

                        if (freq0 + delCount != docFreq)
                        {
                            throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                        }
                    }

                    out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");

                    out_Renamed.Write("    test: stored fields.......");
                    int  docCount  = 0;
                    long totFields = 0;
                    for (int j = 0; j < info.docCount; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            docCount++;
                            Document doc = reader.Document(j);
                            totFields += doc.GetFields().Count;
                        }
                    }

                    if (docCount != reader.NumDocs())
                    {
                        throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
                    }

                    out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) }));

                    out_Renamed.Write("    test: term vectors........");
                    int totVectors = 0;
                    for (int j = 0; j < info.docCount; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
                            if (tfv != null)
                            {
                                totVectors += tfv.Length;
                            }
                        }
                    }

                    out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) }));
                    out_Renamed.WriteLine("");
                }
                catch (System.Exception t)
                {
                    out_Renamed.WriteLine("FAILED");
                    System.String comment;
                    if (doFix)
                    {
                        comment = "will remove reference to this segment (-fix is specified)";
                    }
                    else
                    {
                        comment = "would remove reference to this segment (-fix was not specified)";
                    }
                    out_Renamed.WriteLine("    WARNING: " + comment + "; full exception:");
                    out_Renamed.Write(t.StackTrace);
                    out_Renamed.Flush();
                    out_Renamed.WriteLine("");
                    totLoseDocCount += toLoseDocCount;
                    numBadSegments++;
                    changed = true;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                newSIS.Add(info.Clone());
            }

            if (!changed)
            {
                out_Renamed.WriteLine("No problems were detected with this index.\n");
                return(true);
            }
            else
            {
                out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected");
                if (doFix)
                {
                    out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost");
                }
                else
                {
                    out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified");
                }
                out_Renamed.WriteLine();
            }

            if (doFix)
            {
                out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
                for (int i = 0; i < 5; i++)
                {
                    try
                    {
                        System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
                    }
                    catch (System.Threading.ThreadInterruptedException)
                    {
                        SupportClass.ThreadClass.Current().Interrupt();
                        i--;
                        continue;
                    }

                    out_Renamed.WriteLine("  " + (5 - i) + "...");
                }
                out_Renamed.Write("Writing...");
                try
                {
                    newSIS.Write(dir);
                }
                catch (System.Exception t)
                {
                    out_Renamed.WriteLine("FAILED; exiting");
                    out_Renamed.Write(t.StackTrace);
                    out_Renamed.Flush();
                    return(false);
                }
                out_Renamed.WriteLine("OK");
                out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\"");
            }
            else
            {
                out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]");
            }
            out_Renamed.WriteLine("");

            return(false);
        }
Ejemplo n.º 35
0
        public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
        {
            Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
            bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());

            int[] r2r1 = new int[r2.MaxDoc()];             // r2 id to r1 id mapping

            TermDocs termDocs1 = r1.TermDocs();
            TermDocs termDocs2 = r2.TermDocs();

            // create mapping from id2 space to id2 based on idField
            idField = StringHelper.Intern(idField);
            TermEnum termEnum = r1.Terms(new Term(idField, ""));

            do
            {
                Term term = termEnum.Term();
                if (term == null || (System.Object)term.Field() != (System.Object)idField)
                {
                    break;
                }

                termDocs1.Seek(termEnum);
                if (!termDocs1.Next())
                {
                    // This doc is deleted and wasn't replaced
                    termDocs2.Seek(termEnum);
                    Assert.IsFalse(termDocs2.Next());
                    continue;
                }

                int id1 = termDocs1.Doc();
                Assert.IsFalse(termDocs1.Next());

                termDocs2.Seek(termEnum);
                Assert.IsTrue(termDocs2.Next());
                int id2 = termDocs2.Doc();
                Assert.IsFalse(termDocs2.Next());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (System.Exception t)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
                    System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
                }
                catch (System.Exception e)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
                    System.Console.Out.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        for (int i = 0; i < tv1.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
                        }
                    }

                    TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
                    System.Console.Out.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        for (int i = 0; i < tv2.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
                        }
                    }

                    throw e;
                }
            }while (termEnum.Next());

            termEnum.Close();

            // Verify postings
            TermEnum termEnum1 = r1.Terms(new Term("", ""));
            TermEnum termEnum2 = r2.Terms(new Term("", ""));

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs()];
            long[] info2 = new long[r2.NumDocs()];

            for (; ;)
            {
                Term term1, term2;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1  = 0;
                    term1 = termEnum1.Term();
                    if (term1 == null)
                    {
                        break;
                    }
                    termDocs1.Seek(termEnum1);
                    while (termDocs1.Next())
                    {
                        int d1 = termDocs1.Doc();
                        int f1 = termDocs1.Freq();
                        info1[len1] = (((long)d1) << 32) | f1;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                    if (!termEnum1.Next())
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2  = 0;
                    term2 = termEnum2.Term();
                    if (term2 == null)
                    {
                        break;
                    }
                    termDocs2.Seek(termEnum2);
                    while (termDocs2.Next())
                    {
                        int d2 = termDocs2.Doc();
                        int f2 = termDocs2.Freq();
                        info2[len2] = (((long)r2r1[d2]) << 32) | f2;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                    if (!termEnum2.Next())
                    {
                        break;
                    }
                }

                if (!hasDeletes)
                {
                    Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0)
                {
                    break;                     // no more terms
                }
                Assert.AreEqual(term1, term2);

                // sort info2 to get it into ascending docid
                System.Array.Sort(info2, 0, len2 - 0);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i]);
                }

                termEnum1.Next();
                termEnum2.Next();
            }
        }