Ejemplo n.º 1
0
        public virtual void TestBasic()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc = new Document();
            FieldType         ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            Field f = NewField("foo", "this is a test test", ft);

            doc.Add(f);
            for (int i = 0; i < 100; i++)
            {
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;

            w.Dispose();

            Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test")));

            DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS);

            while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.AreEqual(2, de.Freq());
            }

            reader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 2
0
        public virtual void TestDocsOnlyFreq()
        {
            // tests that when fields are indexed with DOCS_ONLY, the Codec
            // returns 1 in docsEnum.Freq()
            Directory   dir    = NewDirectory();
            Random      random = Random();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
            // we don't need many documents to assert this, but don't use one document either
            int numDocs = AtLeast(random, 50);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("f", "doc", Store.NO));
                writer.AddDocument(doc);
            }
            writer.Dispose();

            Term            term   = new Term("f", new BytesRef("doc"));
            DirectoryReader reader = DirectoryReader.Open(dir);

            foreach (AtomicReaderContext ctx in reader.Leaves)
            {
                DocsEnum de = ((AtomicReader)ctx.Reader).TermDocsEnum(term);
                while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    Assert.AreEqual(1, de.Freq(), "wrong freq for doc " + de.DocID());
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
Ejemplo n.º 3
0
        public virtual void TestTermDocs(int indexDivisor)
        {
            //After adding the document, we should be able to read it back in
            SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));

            Assert.IsTrue(reader != null);
            Assert.AreEqual(indexDivisor, reader.TermInfosIndexDivisor);

            TermsEnum terms = reader.Fields.Terms(DocHelper.TEXT_FIELD_2_KEY).Iterator(null);

            terms.SeekCeil(new BytesRef("field"));
            DocsEnum termDocs = TestUtil.Docs(Random(), terms, reader.LiveDocs, null, DocsEnum.FLAG_FREQS);

            if (termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                int docId = termDocs.DocID();
                Assert.IsTrue(docId == 0);
                int freq = termDocs.Freq();
                Assert.IsTrue(freq == 3);
            }
            reader.Dispose();
        }
Ejemplo n.º 4
0
        /// <summary>
        /// checks advancing docs
        /// </summary>
        public void AssertDocsSkippingEquals(string info, IndexReader leftReader, int docFreq, DocsEnum leftDocs, DocsEnum rightDocs, bool hasFreqs)
        {
            if (leftDocs == null)
            {
                Assert.IsNull(rightDocs);
                return;
            }
            int docid = -1;
            int averageGap = leftReader.MaxDoc / (1 + docFreq);
            int skipInterval = 16;

            while (true)
            {
                if (Random().NextBoolean())
                {
                    // nextDoc()
                    docid = leftDocs.NextDoc();
                    Assert.AreEqual(docid, rightDocs.NextDoc(), info);
                }
                else
                {
                    // advance()
                    int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap));
                    docid = leftDocs.Advance(skip);
                    Assert.AreEqual(docid, rightDocs.Advance(skip), info);
                }

                if (docid == DocIdSetIterator.NO_MORE_DOCS)
                {
                    return;
                }
                if (hasFreqs)
                {
                    Assert.AreEqual(leftDocs.Freq(), rightDocs.Freq(), info);
                }
            }
        }
Ejemplo n.º 5
0
 /// <summary>
 /// checks docs + freqs, sequentially
 /// </summary>
 public void AssertDocsEnumEquals(string info, DocsEnum leftDocs, DocsEnum rightDocs, bool hasFreqs)
 {
     if (leftDocs == null)
     {
         Assert.IsNull(rightDocs);
         return;
     }
     Assert.AreEqual(-1, leftDocs.DocID(), info);
     Assert.AreEqual(-1, rightDocs.DocID(), info);
     int docid;
     while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
     {
         Assert.AreEqual(docid, rightDocs.NextDoc(), info);
         if (hasFreqs)
         {
             Assert.AreEqual(leftDocs.Freq(), rightDocs.Freq(), info);
         }
     }
     Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc(), info);
 }
Ejemplo n.º 6
0
        public virtual void TestRandom()
        {
            // token -> docID -> tokens
            IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >();

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Iwc);

            int numDocs = AtLeast(20);
            //final int numDocs = AtLeast(5);

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: randomize what IndexOptions we use; also test
            // changing this up in one IW buffered segment...:
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random().NextBoolean();
                ft.StoreTermVectorPositions = Random().NextBoolean();
            }

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = new Document();
                doc.Add(new IntField("id", docCount, Field.Store.NO));
                IList <Token> tokens    = new List <Token>();
                int           numTokens = AtLeast(100);
                //final int numTokens = AtLeast(20);
                int pos    = -1;
                int offset = 0;
                //System.out.println("doc id=" + docCount);
                for (int tokenCount = 0; tokenCount < numTokens; tokenCount++)
                {
                    string text;
                    if (Random().NextBoolean())
                    {
                        text = "a";
                    }
                    else if (Random().NextBoolean())
                    {
                        text = "b";
                    }
                    else if (Random().NextBoolean())
                    {
                        text = "c";
                    }
                    else
                    {
                        text = "d";
                    }

                    int posIncr = Random().NextBoolean() ? 1 : Random().Next(5);
                    if (tokenCount == 0 && posIncr == 0)
                    {
                        posIncr = 1;
                    }
                    int offIncr     = Random().NextBoolean() ? 0 : Random().Next(5);
                    int tokenOffset = Random().Next(5);

                    Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset);
                    if (!actualTokens.ContainsKey(text))
                    {
                        actualTokens[text] = new Dictionary <int?, IList <Token> >();
                    }
                    IDictionary <int?, IList <Token> > postingsByDoc = actualTokens[text];
                    if (!postingsByDoc.ContainsKey(docCount))
                    {
                        postingsByDoc[docCount] = new List <Token>();
                    }
                    postingsByDoc[docCount].Add(token);
                    tokens.Add(token);
                    pos += posIncr;
                    // stuff abs position into type:
                    token.Type = "" + pos;
                    offset    += offIncr + tokenOffset;
                    //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset() + "/" + token.EndOffset() + " (freq=" + postingsByDoc.Get(docCount).Size() + ")");
                }
                doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.Reader;

            w.Dispose();

            string[] terms = new string[] { "a", "b", "c", "d" };
            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                // TODO: improve this
                AtomicReader sub = (AtomicReader)ctx.Reader;
                //System.out.println("\nsub=" + sub);
                TermsEnum            termsEnum                  = sub.Fields.Terms("content").Iterator(null);
                DocsEnum             docs                       = null;
                DocsAndPositionsEnum docsAndPositions           = null;
                DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
                FieldCache.Ints      docIDToID                  = FieldCache.DEFAULT.GetInts(sub, "id", false);
                foreach (string term in terms)
                {
                    //System.out.println("  term=" + term);
                    if (termsEnum.SeekExact(new BytesRef(term)))
                    {
                        docs = termsEnum.Docs(null, docs);
                        Assert.IsNotNull(docs);
                        int doc;
                        //System.out.println("    doc/freq");
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docs.Freq());
                        }

                        // explicitly exclude offsets here
                        docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsEnum.FLAG_PAYLOADS);
                        Assert.IsNotNull(docsAndPositions);
                        //System.out.println("    doc/freq/pos");
                        while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositions.Freq());
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositions.NextPosition());
                            }
                        }

                        docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions);
                        Assert.IsNotNull(docsAndPositionsAndOffsets);
                        //System.out.println("    doc/freq/pos/offs");
                        while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq());
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition());
                                Assert.AreEqual(token.StartOffset(), docsAndPositionsAndOffsets.StartOffset());
                                Assert.AreEqual(token.EndOffset(), docsAndPositionsAndOffsets.EndOffset());
                            }
                        }
                    }
                }
                // TODO: test advance:
            }
            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 7
0
        public virtual void TestRandomDocs()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            int numDocs = AtLeast(49);
            int max     = 15678;
            int term    = Random().Next(max);

            int[]     freqInDoc  = new int[numDocs];
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.OmitNorms = true;
            for (int i = 0; i < numDocs; i++)
            {
                Document      doc     = new Document();
                StringBuilder builder = new StringBuilder();
                for (int j = 0; j < 199; j++)
                {
                    int nextInt = Random().Next(max);
                    builder.Append(nextInt).Append(' ');
                    if (nextInt == term)
                    {
                        freqInDoc[i]++;
                    }
                }
                doc.Add(NewField(FieldName, builder.ToString(), customType));
                writer.AddDocument(doc);
            }

            IndexReader reader = writer.Reader;

            writer.Dispose();

            int num = AtLeast(13);

            for (int i = 0; i < num; i++)
            {
                BytesRef           bytes            = new BytesRef("" + term);
                IndexReaderContext topReaderContext = reader.Context;
                foreach (AtomicReaderContext context in topReaderContext.Leaves)
                {
                    int      maxDoc   = context.AtomicReader.MaxDoc;
                    DocsEnum docsEnum = TestUtil.Docs(Random(), context.Reader, FieldName, bytes, null, null, DocsEnum.FLAG_FREQS);
                    if (FindNext(freqInDoc, context.DocBase, context.DocBase + maxDoc) == int.MaxValue)
                    {
                        Assert.IsNull(docsEnum);
                        continue;
                    }
                    Assert.IsNotNull(docsEnum);
                    docsEnum.NextDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (freqInDoc[context.DocBase + j] != 0)
                        {
                            Assert.AreEqual(j, docsEnum.DocID());
                            Assert.AreEqual(docsEnum.Freq(), freqInDoc[context.DocBase + j]);
                            if (i % 2 == 0 && Random().Next(10) == 0)
                            {
                                int next       = FindNext(freqInDoc, context.DocBase + j + 1, context.DocBase + maxDoc) - context.DocBase;
                                int advancedTo = docsEnum.Advance(next);
                                if (next >= maxDoc)
                                {
                                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, advancedTo);
                                }
                                else
                                {
                                    Assert.IsTrue(next >= advancedTo, "advanced to: " + advancedTo + " but should be <= " + next);
                                }
                            }
                            else
                            {
                                docsEnum.NextDoc();
                            }
                        }
                    }
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.DocID(), "DocBase: " + context.DocBase + " maxDoc: " + maxDoc + " " + docsEnum.GetType());
                }
            }

            reader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Default merge impl: append documents, mapping around
        ///  deletes
        /// </summary>
        public virtual TermStats Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, DocsEnum postings, FixedBitSet visitedDocs)
        {
            int df = 0;
            long totTF = 0;

            if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
            {
                while (true)
                {
                    int doc = postings.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    this.StartDoc(doc, -1);
                    this.FinishDoc();
                    df++;
                }
                totTF = -1;
            }
            else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS)
            {
                while (true)
                {
                    int doc = postings.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postings.Freq();
                    this.StartDoc(doc, freq);
                    this.FinishDoc();
                    df++;
                    totTF += freq;
                }
            }
            else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                var postingsEnum = (DocsAndPositionsEnum)postings;
                while (true)
                {
                    int doc = postingsEnum.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postingsEnum.Freq();
                    this.StartDoc(doc, freq);
                    totTF += freq;
                    for (int i = 0; i < freq; i++)
                    {
                        int position = postingsEnum.NextPosition();
                        BytesRef payload = postingsEnum.Payload;
                        this.AddPosition(position, payload, -1, -1);
                    }
                    this.FinishDoc();
                    df++;
                }
            }
            else
            {
                Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                var postingsEnum = (DocsAndPositionsEnum)postings;
                while (true)
                {
                    int doc = postingsEnum.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postingsEnum.Freq();
                    this.StartDoc(doc, freq);
                    totTF += freq;
                    for (int i = 0; i < freq; i++)
                    {
                        int position = postingsEnum.NextPosition();
                        BytesRef payload = postingsEnum.Payload;
                        this.AddPosition(position, payload, postingsEnum.StartOffset(), postingsEnum.EndOffset());
                    }
                    this.FinishDoc();
                    df++;
                }
            }
            return new TermStats(df, indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : totTF);
        }
Ejemplo n.º 9
0
        public virtual void TestSkipTo(int indexDivisor)
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            Term ta = new Term("content", "aaa");

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, "aaa aaa aaa aaa");
            }

            Term tb = new Term("content", "bbb");

            for (int i = 0; i < 16; i++)
            {
                AddDoc(writer, "bbb bbb bbb bbb");
            }

            Term tc = new Term("content", "ccc");

            for (int i = 0; i < 50; i++)
            {
                AddDoc(writer, "ccc ccc ccc ccc");
            }

            // assure that we deal with a single segment
            writer.ForceMerge(1);
            writer.Dispose();

            IndexReader reader = DirectoryReader.Open(dir, indexDivisor);

            DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            // without optimization (assumption skipInterval == 16)

            // with next
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(0, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(2, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(4, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(9, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);

            // without next
            tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0);

            Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(0, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(4, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(9, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);

            // exactly skipInterval documents and therefore with optimization

            // with next
            tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(10, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(11, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(12, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(15, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(24, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(25, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);

            // without next
            tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(10, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(15, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(24, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(25, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);

            // much more than skipInterval documents and therefore with optimization

            // with next
            tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(26, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(27, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(28, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(40, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(57, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(74, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(75, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);

            //without next
            tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0);
            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(26, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(40, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(57, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(74, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(75, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);

            reader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 10
0
        public static void VerifyEquals(Fields d1, Fields d2)
        {
            if (d1 == null)
            {
                Assert.IsTrue(d2 == null || d2.Size == 0);
                return;
            }
            Assert.IsTrue(d2 != null);

            IEnumerator <string> fieldsEnum2 = d2.GetEnumerator();

            foreach (string field1 in d1)
            {
                fieldsEnum2.MoveNext();
                string field2 = fieldsEnum2.Current;
                Assert.AreEqual(field1, field2);

                Terms terms1 = d1.Terms(field1);
                Assert.IsNotNull(terms1);
                TermsEnum termsEnum1 = terms1.Iterator(null);

                Terms terms2 = d2.Terms(field2);
                Assert.IsNotNull(terms2);
                TermsEnum termsEnum2 = terms2.Iterator(null);

                DocsAndPositionsEnum dpEnum1 = null;
                DocsAndPositionsEnum dpEnum2 = null;
                DocsEnum             dEnum1  = null;
                DocsEnum             dEnum2  = null;

                BytesRef term1;
                while ((term1 = termsEnum1.Next()) != null)
                {
                    BytesRef term2 = termsEnum2.Next();
                    Assert.AreEqual(term1, term2);
                    Assert.AreEqual(termsEnum1.TotalTermFreq(), termsEnum2.TotalTermFreq());

                    dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1);
                    dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2);
                    if (dpEnum1 != null)
                    {
                        Assert.IsNotNull(dpEnum2);
                        int docID1 = dpEnum1.NextDoc();
                        dpEnum2.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dpEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);

                        int freq1 = dpEnum1.Freq();
                        int freq2 = dpEnum2.Freq();
                        Assert.AreEqual(freq1, freq2);
                        IOffsetAttribute offsetAtt1 = dpEnum1.Attributes().HasAttribute <IOffsetAttribute>() ? dpEnum1.Attributes().GetAttribute <IOffsetAttribute>() : null;
                        IOffsetAttribute offsetAtt2 = dpEnum2.Attributes().HasAttribute <IOffsetAttribute>() ? dpEnum2.Attributes().GetAttribute <IOffsetAttribute>() : null;

                        if (offsetAtt1 != null)
                        {
                            Assert.IsNotNull(offsetAtt2);
                        }
                        else
                        {
                            Assert.IsNull(offsetAtt2);
                        }

                        for (int posUpto = 0; posUpto < freq1; posUpto++)
                        {
                            int pos1 = dpEnum1.NextPosition();
                            int pos2 = dpEnum2.NextPosition();
                            Assert.AreEqual(pos1, pos2);
                            if (offsetAtt1 != null)
                            {
                                Assert.AreEqual(offsetAtt1.StartOffset(), offsetAtt2.StartOffset());
                                Assert.AreEqual(offsetAtt1.EndOffset(), offsetAtt2.EndOffset());
                            }
                        }
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc());
                    }
                    else
                    {
                        dEnum1 = TestUtil.Docs(Random(), termsEnum1, null, dEnum1, DocsEnum.FLAG_FREQS);
                        dEnum2 = TestUtil.Docs(Random(), termsEnum2, null, dEnum2, DocsEnum.FLAG_FREQS);
                        Assert.IsNotNull(dEnum1);
                        Assert.IsNotNull(dEnum2);
                        int docID1 = dEnum1.NextDoc();
                        dEnum2.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
                        int freq1 = dEnum1.Freq();
                        int freq2 = dEnum2.Freq();
                        Assert.AreEqual(freq1, freq2);
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc());
                    }
                }

                Assert.IsNull(termsEnum2.Next());
            }
            Assert.IsFalse(fieldsEnum2.MoveNext());
        }
Ejemplo n.º 11
0
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (VERBOSE)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.Terms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).Terms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.Iterator(null);

            Bits liveDocs1 = MultiFields.GetLiveDocs(r1);
            Bits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                Bits     liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.Next() != null)
                {
                    docs = TestUtil.Docs(Random(), termsEnum, liveDocs, docs, DocsEnum.FLAG_NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.Terms(idField);
            TermsEnum termsEnum2 = terms2.Iterator(null);

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (true)
            {
                BytesRef term = termsEnum.Next();
                //System.out.println("TEST: match id term=" + term);
                if (term == null)
                {
                    break;
                }

                termDocs1 = TestUtil.Docs(Random(), termsEnum, liveDocs1, termDocs1, DocsEnum.FLAG_NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, termDocs2, DocsEnum.FLAG_NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID();
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID();
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception t)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception e)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.Terms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.Iterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq());
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq();
                                    Console.WriteLine("        doc=" + dpEnum.DocID() + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq();
                                    Console.WriteLine("        doc=" + dEnum.DocID() + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.Terms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.Iterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq());
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq();
                                    Console.WriteLine("        doc=" + dpEnum.DocID() + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq();
                                    Console.WriteLine("        doc=" + dEnum.DocID() + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw e;
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.Terms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.Iterator(null);
                    }
                    term1 = termsEnum1.Next();
                    if (term1 == null)
                    {
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random(), termsEnum1, liveDocs1, docs1, DocsEnum.FLAG_FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID();
                        int f = docs1.Freq();
                        info1[len1] = (((long)d) << 32) | f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.Terms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.Iterator(null);
                    }
                    term2 = termsEnum2.Next();
                    if (term2 == null)
                    {
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, docs2, DocsEnum.FLAG_FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID()];
                        int f = docs2.Freq();
                        info2[len2] = (((long)d) << 32) | f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq(), termsEnum2.DocFreq());
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }
Ejemplo n.º 12
0
 public override int Freq()
 {
     return(Current.Freq());
 }
Ejemplo n.º 13
0
 public override int Freq()
 {
     return(@in.Freq());
 }