Пример #1
0
        /// <summary>
        /// Returns the number of documents containing the term
        /// <code>t</code>.  this method returns 0 if the term or
        /// field does not exists.  this method does not take into
        /// account deleted documents that have not yet been merged
        /// away.
        /// </summary>
        public override sealed long TotalTermFreq(Term term)
        {
            Fields fields = Fields;

            if (fields == null)
            {
                return(0);
            }
            Terms terms = fields.Terms(term.Field());

            if (terms == null)
            {
                return(0);
            }
            TermsEnum termsEnum = terms.Iterator(null);

            if (termsEnum.SeekExact(term.Bytes()))
            {
                return(termsEnum.TotalTermFreq());
            }
            else
            {
                return(0);
            }
        }
Пример #2
0
        public virtual void TestEndOffsetPositionWithCachingTokenFilter()
        {
            Directory   dir            = NewDirectory();
            Analyzer    analyzer       = new MockAnalyzer(Random());
            IndexWriter w              = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document    doc            = new Document();
            IOException priorException = null;
            TokenStream stream         = analyzer.TokenStream("field", new StringReader("abcd   "));

            try
            {
                stream.Reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct?
                TokenStream cachedStream = new CachingTokenFilter(stream);
                FieldType   customType   = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectors         = true;
                customType.StoreTermVectorPositions = true;
                customType.StoreTermVectorOffsets   = true;
                Field f = new Field("field", cachedStream, customType);
                doc.Add(f);
                doc.Add(f);
                w.AddDocument(doc);
            }
            catch (IOException e)
            {
                priorException = e;
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(priorException, stream);
            }
            w.Dispose();

            IndexReader r         = DirectoryReader.Open(dir);
            TermsEnum   termsEnum = r.GetTermVectors(0).Terms("field").Iterator(null);

            Assert.IsNotNull(termsEnum.Next());
            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(2, termsEnum.TotalTermFreq());

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            r.Dispose();
            dir.Dispose();
        }
Пример #3
0
        public virtual void TestEndOffsetPositionStandardEmptyField2()
        {
            Directory   dir        = NewDirectory();
            IndexWriter w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document    doc        = new Document();
            FieldType   customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorOffsets   = true;

            Field f = NewField("field", "abcd", customType);

            doc.Add(f);
            doc.Add(NewField("field", "", customType));

            Field f2 = NewField("field", "crunch", customType);

            doc.Add(f2);

            w.AddDocument(doc);
            w.Dispose();

            IndexReader r         = DirectoryReader.Open(dir);
            TermsEnum   termsEnum = r.GetTermVectors(0).Terms("field").Iterator(null);

            Assert.IsNotNull(termsEnum.Next());
            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(1, (int)termsEnum.TotalTermFreq());
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            Assert.IsNotNull(termsEnum.Next());
            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(6, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());

            r.Dispose();
            dir.Dispose();
        }
Пример #4
0
        public virtual void TestTermVectors()
        {
            Terms result = Reader.GetTermVectors(0).Terms(DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsNotNull(result);
            Assert.AreEqual(3, result.Size());
            TermsEnum termsEnum = result.Iterator(null);

            while (termsEnum.Next() != null)
            {
                string term = termsEnum.Term().Utf8ToString();
                int    freq = (int)termsEnum.TotalTermFreq();
                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1);
                Assert.IsTrue(freq > 0);
            }

            Fields results = Reader.GetTermVectors(0);

            Assert.IsTrue(results != null);
            Assert.AreEqual(3, results.Size, "We do not have 3 term freq vectors");
        }
Пример #5
0
 /// <summary>
 /// checks term-level statistics
 /// </summary>
 public void AssertTermStatsEquals(string info, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum)
 {
     Assert.AreEqual(leftTermsEnum.DocFreq(), rightTermsEnum.DocFreq(), info);
     if (leftTermsEnum.TotalTermFreq() != -1 && rightTermsEnum.TotalTermFreq() != -1)
     {
         Assert.AreEqual(leftTermsEnum.TotalTermFreq(), rightTermsEnum.TotalTermFreq(), info);
     }
 }
Пример #6
0
 public override long TotalTermFreq()
 {
     return(TermsEnum.TotalTermFreq());
 }
Пример #7
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }
            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType((FieldType)prototype.FieldType);

            if (Random().NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random().Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = FieldInfo.IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                                  // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random().Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Size());
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  termBR;

            while ((termBR = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(termBR.Utf8ToString());
                Assert.AreEqual(value, termsEnum.TotalTermFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
 /// <summary>
 /// checks term-level statistics
 /// </summary>
 public virtual void AssertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum)
 {
     Assert.AreEqual(leftTermsEnum.DocFreq(), rightTermsEnum.DocFreq());
     if (leftTermsEnum.TotalTermFreq() != -1 && rightTermsEnum.TotalTermFreq() != -1)
     {
         Assert.AreEqual(leftTermsEnum.TotalTermFreq(), rightTermsEnum.TotalTermFreq());
     }
 }
Пример #9
0
        public virtual void TestDoubleOffsetCounting()
        {
            Directory   dir        = NewDirectory();
            IndexWriter w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document    doc        = new Document();
            FieldType   customType = new FieldType(StringField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorOffsets   = true;
            Field f = NewField("field", "abcd", customType);

            doc.Add(f);
            doc.Add(f);
            Field f2 = NewField("field", "", customType);

            doc.Add(f2);
            doc.Add(f);
            w.AddDocument(doc);
            w.Dispose();

            IndexReader r      = DirectoryReader.Open(dir);
            Terms       vector = r.GetTermVectors(0).Terms("field");

            Assert.IsNotNull(vector);
            TermsEnum termsEnum = vector.Iterator(null);

            Assert.IsNotNull(termsEnum.Next());
            Assert.AreEqual("", termsEnum.Term().Utf8ToString());

            // Token "" occurred once
            Assert.AreEqual(1, termsEnum.TotalTermFreq());

            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(8, dpEnum.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            // Token "abcd" occurred three times
            Assert.AreEqual(new BytesRef("abcd"), termsEnum.Next());
            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.AreEqual(3, termsEnum.TotalTermFreq());

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(4, dpEnum.StartOffset());
            Assert.AreEqual(8, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());

            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            Assert.IsNull(termsEnum.Next());
            r.Dispose();
            dir.Dispose();
        }
Пример #10
0
        /// <summary>
        /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the
        /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers
        /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/>
        /// using the leaf reader's ordinal.
        /// <p>
        /// Note: the given context must be a top-level context.
        /// </summary>
        public static TermContext Build(IndexReaderContext context, Term term)
        {
            Debug.Assert(context != null && context.IsTopLevel);
            string      field = term.Field();
            BytesRef    bytes = term.Bytes();
            TermContext perReaderTermState = new TermContext(context);

            //if (DEBUG) System.out.println("prts.build term=" + term);
            foreach (AtomicReaderContext ctx in context.Leaves)
            {
                //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
                Fields fields = ctx.AtomicReader.Fields;
                if (fields != null)
                {
                    Terms terms = fields.Terms(field);
                    if (terms != null)
                    {
                        TermsEnum termsEnum = terms.Iterator(null);
                        if (termsEnum.SeekExact(bytes))
                        {
                            TermState termState = termsEnum.TermState();
                            //if (DEBUG) System.out.println("    found");
                            perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                    }
                }
            }
            return(perReaderTermState);
        }
Пример #11
0
        public virtual void TestArbitraryFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir);

            int NUM_DOCS = AtLeast(27);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + NUM_DOCS + " docs");
            }
            int[] fieldsPerDoc = new int[NUM_DOCS];
            int   baseCount    = 0;

            for (int docCount = 0; docCount < NUM_DOCS; docCount++)
            {
                int fieldCount = TestUtil.NextInt(Random(), 1, 17);
                fieldsPerDoc[docCount] = fieldCount - 1;

                int finalDocCount = docCount;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount);
                }

                int finalBaseCount = baseCount;
                baseCount += fieldCount - 1;

                w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount));
            }

            IndexReader r = w.Reader;

            w.Dispose();

            IndexSearcher s       = NewSearcher(r);
            int           counter = 0;

            for (int id = 0; id < NUM_DOCS; id++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter);
                }
                TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1);
                Assert.AreEqual(1, hits.TotalHits);
                int      docID      = hits.ScoreDocs[0].Doc;
                Document doc        = s.Doc(docID);
                int      endCounter = counter + fieldsPerDoc[id];
                while (counter < endCounter)
                {
                    string name    = "f" + counter;
                    int    fieldID = counter % 10;

                    bool stored  = (counter & 1) == 0 || fieldID == 3;
                    bool binary  = fieldID == 3;
                    bool indexed = fieldID != 3;

                    string stringValue;
                    if (fieldID != 3 && fieldID != 9)
                    {
                        stringValue = "text " + counter;
                    }
                    else
                    {
                        stringValue = null;
                    }

                    // stored:
                    if (stored)
                    {
                        IndexableField f = doc.GetField(name);
                        Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                        if (binary)
                        {
                            Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                            BytesRef b = f.BinaryValue();
                            Assert.IsNotNull(b);
                            Assert.AreEqual(10, b.Length);
                            for (int idx = 0; idx < 10; idx++)
                            {
                                Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]);
                            }
                        }
                        else
                        {
                            Debug.Assert(stringValue != null);
                            Assert.AreEqual(stringValue, f.StringValue);
                        }
                    }

                    if (indexed)
                    {
                        bool tv = counter % 2 == 1 && fieldID != 9;
                        if (tv)
                        {
                            Terms tfv = r.GetTermVectors(docID).Terms(name);
                            Assert.IsNotNull(tfv);
                            TermsEnum termsEnum = tfv.Iterator(null);
                            Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next());
                            Assert.AreEqual(1, termsEnum.TotalTermFreq());
                            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq());
                            Assert.AreEqual(1, dpEnum.NextPosition());

                            Assert.AreEqual(new BytesRef("text"), termsEnum.Next());
                            Assert.AreEqual(1, termsEnum.TotalTermFreq());
                            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq());
                            Assert.AreEqual(0, dpEnum.NextPosition());

                            Assert.IsNull(termsEnum.Next());

                            // TODO: offsets
                        }
                        else
                        {
                            Fields vectors = r.GetTermVectors(docID);
                            Assert.IsTrue(vectors == null || vectors.Terms(name) == null);
                        }

                        BooleanQuery bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), BooleanClause.Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "text")), BooleanClause.Occur.MUST);
                        TopDocs hits2 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits2.TotalHits);
                        Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc);

                        bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), BooleanClause.Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "" + counter)), BooleanClause.Occur.MUST);
                        TopDocs hits3 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits3.TotalHits);
                        Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc);
                    }

                    counter++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Пример #12
0
        public virtual void TestMerge()
        {
            Codec       codec = Codec.Default;
            SegmentInfo si    = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null);

            SegmentMerger merger     = new SegmentMerger(Arrays.AsList <AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true);
            MergeState    mergeState = merger.Merge();
            int           docsMerged = mergeState.SegmentInfo.DocCount;

            Assert.IsTrue(docsMerged == 2);
            //Should be able to open a new SegmentReader against the new directory
            SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));

            Assert.IsTrue(mergedReader != null);
            Assert.IsTrue(mergedReader.NumDocs == 2);
            Document newDoc1 = mergedReader.Document(0);

            Assert.IsTrue(newDoc1 != null);
            //There are 2 unstored fields on the document
            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count);
            Document newDoc2 = mergedReader.Document(1);

            Assert.IsTrue(newDoc2 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count);

            DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0);

            Assert.IsTrue(termDocs != null);
            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            int tvCount = 0;

            foreach (FieldInfo fieldInfo in mergedReader.FieldInfos)
            {
                if (fieldInfo.HasVectors())
                {
                    tvCount++;
                }
            }

            //System.out.println("stored size: " + stored.Size());
            Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector");

            Terms vector = mergedReader.GetTermVectors(0).Terms(DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsNotNull(vector);
            Assert.AreEqual(3, vector.Size());
            TermsEnum termsEnum = vector.Iterator(null);

            int i = 0;

            while (termsEnum.Next() != null)
            {
                string term = termsEnum.Term().Utf8ToString();
                int    freq = (int)termsEnum.TotalTermFreq();
                //System.out.println("Term: " + term + " Freq: " + freq);
                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1);
                Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq);
                i++;
            }

            TestSegmentReader.CheckNorms(mergedReader);
            mergedReader.Dispose();
        }
Пример #13
0
        public static void VerifyEquals(Fields d1, Fields d2)
        {
            if (d1 == null)
            {
                Assert.IsTrue(d2 == null || d2.Size == 0);
                return;
            }
            Assert.IsTrue(d2 != null);

            IEnumerator <string> fieldsEnum2 = d2.GetEnumerator();

            foreach (string field1 in d1)
            {
                fieldsEnum2.MoveNext();
                string field2 = fieldsEnum2.Current;
                Assert.AreEqual(field1, field2);

                Terms terms1 = d1.Terms(field1);
                Assert.IsNotNull(terms1);
                TermsEnum termsEnum1 = terms1.Iterator(null);

                Terms terms2 = d2.Terms(field2);
                Assert.IsNotNull(terms2);
                TermsEnum termsEnum2 = terms2.Iterator(null);

                DocsAndPositionsEnum dpEnum1 = null;
                DocsAndPositionsEnum dpEnum2 = null;
                DocsEnum             dEnum1  = null;
                DocsEnum             dEnum2  = null;

                BytesRef term1;
                while ((term1 = termsEnum1.Next()) != null)
                {
                    BytesRef term2 = termsEnum2.Next();
                    Assert.AreEqual(term1, term2);
                    Assert.AreEqual(termsEnum1.TotalTermFreq(), termsEnum2.TotalTermFreq());

                    dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1);
                    dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2);
                    if (dpEnum1 != null)
                    {
                        Assert.IsNotNull(dpEnum2);
                        int docID1 = dpEnum1.NextDoc();
                        dpEnum2.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dpEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);

                        int freq1 = dpEnum1.Freq();
                        int freq2 = dpEnum2.Freq();
                        Assert.AreEqual(freq1, freq2);
                        IOffsetAttribute offsetAtt1 = dpEnum1.Attributes().HasAttribute <IOffsetAttribute>() ? dpEnum1.Attributes().GetAttribute <IOffsetAttribute>() : null;
                        IOffsetAttribute offsetAtt2 = dpEnum2.Attributes().HasAttribute <IOffsetAttribute>() ? dpEnum2.Attributes().GetAttribute <IOffsetAttribute>() : null;

                        if (offsetAtt1 != null)
                        {
                            Assert.IsNotNull(offsetAtt2);
                        }
                        else
                        {
                            Assert.IsNull(offsetAtt2);
                        }

                        for (int posUpto = 0; posUpto < freq1; posUpto++)
                        {
                            int pos1 = dpEnum1.NextPosition();
                            int pos2 = dpEnum2.NextPosition();
                            Assert.AreEqual(pos1, pos2);
                            if (offsetAtt1 != null)
                            {
                                Assert.AreEqual(offsetAtt1.StartOffset(), offsetAtt2.StartOffset());
                                Assert.AreEqual(offsetAtt1.EndOffset(), offsetAtt2.EndOffset());
                            }
                        }
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc());
                    }
                    else
                    {
                        dEnum1 = TestUtil.Docs(Random(), termsEnum1, null, dEnum1, DocsEnum.FLAG_FREQS);
                        dEnum2 = TestUtil.Docs(Random(), termsEnum2, null, dEnum2, DocsEnum.FLAG_FREQS);
                        Assert.IsNotNull(dEnum1);
                        Assert.IsNotNull(dEnum2);
                        int docID1 = dEnum1.NextDoc();
                        dEnum2.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
                        int freq1 = dEnum1.Freq();
                        int freq2 = dEnum2.Freq();
                        Assert.AreEqual(freq1, freq2);
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc());
                    }
                }

                Assert.IsNull(termsEnum2.Next());
            }
            Assert.IsFalse(fieldsEnum2.MoveNext());
        }
Пример #14
0
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (VERBOSE)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.Terms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).Terms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.Iterator(null);

            Bits liveDocs1 = MultiFields.GetLiveDocs(r1);
            Bits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                Bits     liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.Next() != null)
                {
                    docs = TestUtil.Docs(Random(), termsEnum, liveDocs, docs, DocsEnum.FLAG_NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.Terms(idField);
            TermsEnum termsEnum2 = terms2.Iterator(null);

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (true)
            {
                BytesRef term = termsEnum.Next();
                //System.out.println("TEST: match id term=" + term);
                if (term == null)
                {
                    break;
                }

                termDocs1 = TestUtil.Docs(Random(), termsEnum, liveDocs1, termDocs1, DocsEnum.FLAG_NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, termDocs2, DocsEnum.FLAG_NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID();
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID();
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception t)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception e)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.Terms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.Iterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq());
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq();
                                    Console.WriteLine("        doc=" + dpEnum.DocID() + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq();
                                    Console.WriteLine("        doc=" + dEnum.DocID() + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.Terms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.Iterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq());
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq();
                                    Console.WriteLine("        doc=" + dpEnum.DocID() + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq();
                                    Console.WriteLine("        doc=" + dEnum.DocID() + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw e;
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.Terms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.Iterator(null);
                    }
                    term1 = termsEnum1.Next();
                    if (term1 == null)
                    {
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random(), termsEnum1, liveDocs1, docs1, DocsEnum.FLAG_FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID();
                        int f = docs1.Freq();
                        info1[len1] = (((long)d) << 32) | f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.Terms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.Iterator(null);
                    }
                    term2 = termsEnum2.Next();
                    if (term2 == null)
                    {
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, docs2, DocsEnum.FLAG_FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID()];
                        int f = docs2.Freq();
                        info2[len2] = (((long)d) << 32) | f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq(), termsEnum2.DocFreq());
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }
Пример #15
0
 public override long TotalTermFreq()
 {
     return(@in.TotalTermFreq());
 }