示例#1
0
        public override void VisitMatchingTerms(IndexReader reader, string fieldName, SimpleTerm.IMatchingTermVisitor mtv)
        {
            int   prefixLength = prefix.Length;
            Terms terms        = MultiFields.GetTerms(reader, fieldName);

            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetEnumerator();

                TermsEnum.SeekStatus status = termsEnum.SeekCeil(prefixRef);
                BytesRef             text;
                if (status == TermsEnum.SeekStatus.FOUND)
                {
                    text = prefixRef;
                }
                else if (status == TermsEnum.SeekStatus.NOT_FOUND)
                {
                    text = termsEnum.Term;
                }
                else
                {
                    text = null;
                }

                while (true)
                {
                    if (text != null && StringHelper.StartsWith(text, prefixRef))
                    {
                        string textString = text.Utf8ToString();
                        Match  matcher    = pattern.Match(textString.Substring(prefixLength));
                        if (matcher.Success)
                        {
                            mtv.VisitMatchingTerm(new Term(fieldName, textString));
                        }
                    }
                    else
                    {
                        break;
                    }
                    if (termsEnum.MoveNext())
                    {
                        text = termsEnum.Term;
                    }
                    else
                    {
                        break;
                    }
                }
            }
        }
示例#2
0
        public virtual void TestDocsAndPositionsEnum()
        {
            TermsEnum termsEnum = reader.GetTerms(DOC_POSITIONS_FIELD).GetIterator(null);

            assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOC_POSITIONS_TERM)));
            DocsAndPositionsEnum sortedPositions = termsEnum.DocsAndPositions(null, null);
            int doc;

            // test nextDoc()
            while ((doc = sortedPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                int freq = sortedPositions.Freq;
                assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq);
                for (int i = 0; i < freq; i++)
                {
                    assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition());
                    if (!DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
                    {
                        assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset);
                        assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset);
                    }
                    assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.GetPayload().Utf8ToString(), CultureInfo.InvariantCulture));
                }
            }

            // test advance()
            DocsAndPositionsEnum reuse = sortedPositions;

            sortedPositions = termsEnum.DocsAndPositions(null, reuse);
            if (sortedPositions is SortingAtomicReader.SortingDocsAndPositionsEnum)
            {
                assertTrue(((SortingAtomicReader.SortingDocsAndPositionsEnum)sortedPositions).Reused(reuse)); // make sure reuse worked
            }
            doc = 0;
            while ((doc = sortedPositions.Advance(doc + TestUtil.NextInt32(Random, 1, 5))) != DocIdSetIterator.NO_MORE_DOCS)
            {
                int freq = sortedPositions.Freq;
                assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq);
                for (int i = 0; i < freq; i++)
                {
                    assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition());
                    if (!DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
                    {
                        assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset);
                        assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset);
                    }
                    assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.GetPayload().Utf8ToString(), CultureInfo.InvariantCulture));
                }
            }
        }
示例#3
0
        public override void VisitMatchingTerms(IndexReader reader, string fieldName, IMatchingTermVisitor mtv)
        {
            /* inspired by PrefixQuery.rewrite(): */
            Terms terms = MultiFields.GetTerms(reader, fieldName);

            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);

                bool skip = false;
                TermsEnum.SeekStatus status = termsEnum.SeekCeil(new BytesRef(Prefix));
                if (status == TermsEnum.SeekStatus.FOUND)
                {
                    mtv.VisitMatchingTerm(GetLucenePrefixTerm(fieldName));
                }
                else if (status == TermsEnum.SeekStatus.NOT_FOUND)
                {
                    if (StringHelper.StartsWith(termsEnum.Term, prefixRef))
                    {
                        mtv.VisitMatchingTerm(new Term(fieldName, termsEnum.Term.Utf8ToString()));
                    }
                    else
                    {
                        skip = true;
                    }
                }
                else
                {
                    // EOF
                    skip = true;
                }

                if (!skip)
                {
                    while (true)
                    {
                        BytesRef text = termsEnum.Next();
                        if (text != null && StringHelper.StartsWith(text, prefixRef))
                        {
                            mtv.VisitMatchingTerm(new Term(fieldName, text.Utf8ToString()));
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }
        }
示例#4
0
        public virtual void TestDocsEnum()
        {
            IBits     mappedLiveDocs = RandomLiveDocs(reader.MaxDoc);
            TermsEnum termsEnum      = reader.GetTerms(DOCS_ENUM_FIELD).GetIterator(null);

            assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOCS_ENUM_TERM)));
            DocsEnum docs = termsEnum.Docs(mappedLiveDocs, null);

            int doc;
            int prev = -1;

            while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(doc));
                assertEquals("incorrect value; doc " + doc, sortedValues[doc], int.Parse(reader.Document(doc).Get(ID_FIELD)));
                while (++prev < doc)
                {
                    assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev));
                }
            }
            while (++prev < reader.MaxDoc)
            {
                assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev));
            }

            DocsEnum reuse = docs;

            docs = termsEnum.Docs(mappedLiveDocs, reuse);
            if (docs is SortingAtomicReader.SortingDocsEnum)
            {
                assertTrue(((SortingAtomicReader.SortingDocsEnum)docs).Reused(reuse)); // make sure reuse worked
            }
            doc  = -1;
            prev = -1;
            while ((doc = docs.Advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS)
            {
                assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(doc));
                assertEquals("incorrect value; doc " + doc, sortedValues[doc], int.Parse(reader.Document(doc).Get(ID_FIELD)));
                while (++prev < doc)
                {
                    assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev));
                }
            }
            while (++prev < reader.MaxDoc)
            {
                assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev));
            }
        }
示例#5
0
        public virtual void TestNoOrds()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.StoreTermVectors = true;
            doc.Add(new Field("foo", "this is a test", ft));
            iw.AddDocument(doc);
            AtomicReader ir    = GetOnlySegmentReader(iw.GetReader());
            Terms        terms = ir.GetTermVector(0, "foo");

            Assert.IsNotNull(terms);
            TermsEnum termsEnum = terms.GetEnumerator();

            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("this")));
            try
            {
                var _ = termsEnum.Ord;
                Assert.Fail();
            }
#pragma warning disable 168
            catch (NotSupportedException expected)
#pragma warning restore 168
            {
                // expected exception
            }

            try
            {
                termsEnum.SeekExact(0);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (NotSupportedException expected)
#pragma warning restore 168
            {
                // expected exception
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
示例#6
0
        public override void VisitMatchingTerms(IndexReader reader, string fieldName, IMatchingTermVisitor mtv)
        {
            /* check term presence in index here for symmetry with other SimpleTerm's */
            Terms terms = MultiFields.GetTerms(reader, fieldName);

            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetEnumerator();

                TermsEnum.SeekStatus status = termsEnum.SeekCeil(new BytesRef(TermText));
                if (status == TermsEnum.SeekStatus.FOUND)
                {
                    mtv.VisitMatchingTerm(GetLuceneTerm(fieldName));
                }
            }
        }
示例#7
0
        public virtual void TestNoOrds()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc = new Document();
            FieldType         ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.StoreTermVectors = true;
            doc.Add(new Field("foo", "this is a test", ft));
            iw.AddDocument(doc);
            AtomicReader ir    = GetOnlySegmentReader(iw.Reader);
            Terms        terms = ir.GetTermVector(0, "foo");

            Assert.IsNotNull(terms);
            TermsEnum termsEnum = terms.Iterator(null);

            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("this")));
            try
            {
                termsEnum.Ord();
                Assert.Fail();
            }
            catch (System.NotSupportedException expected)
            {
                // expected exception
            }

            try
            {
                termsEnum.SeekExact(0);
                Assert.Fail();
            }
            catch (System.NotSupportedException expected)
            {
                // expected exception
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
示例#8
0
        private void AssertTermsSeeking(Terms leftTerms, Terms rightTerms)
        {
            TermsEnum leftEnum  = null;
            TermsEnum rightEnum = null;

            // just an upper bound
            int    numTests = AtLeast(20);
            Random random   = Random;

            // collect this number of terms from the left side
            ISet <BytesRef> tests     = new JCG.HashSet <BytesRef>();
            int             numPasses = 0;

            while (numPasses < 10 && tests.Count < numTests)
            {
                leftEnum = leftTerms.GetIterator(leftEnum);
                BytesRef term = null;
                while ((term = leftEnum.Next()) != null)
                {
                    int code = random.Next(10);
                    if (code == 0)
                    {
                        // the term
                        tests.Add(BytesRef.DeepCopyOf(term));
                    }
                    else if (code == 1)
                    {
                        // truncated subsequence of term
                        term = BytesRef.DeepCopyOf(term);
                        if (term.Length > 0)
                        {
                            // truncate it
                            term.Length = random.Next(term.Length);
                        }
                    }
                    else if (code == 2)
                    {
                        // term, but ensure a non-zero offset
                        var newbytes = new byte[term.Length + 5];
                        Array.Copy(term.Bytes, term.Offset, newbytes, 5, term.Length);
                        tests.Add(new BytesRef(newbytes, 5, term.Length));
                    }
                }
                numPasses++;
            }

            List <BytesRef> shuffledTests = new List <BytesRef>(tests);

            shuffledTests.Shuffle(Random);

            foreach (BytesRef b in shuffledTests)
            {
                leftEnum  = leftTerms.GetIterator(leftEnum);
                rightEnum = rightTerms.GetIterator(rightEnum);

                Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b));
                Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b));

                SeekStatus leftStatus;
                SeekStatus rightStatus;

                leftStatus  = leftEnum.SeekCeil(b);
                rightStatus = rightEnum.SeekCeil(b);
                Assert.AreEqual(leftStatus, rightStatus);
                if (leftStatus != SeekStatus.END)
                {
                    Assert.AreEqual(leftEnum.Term, rightEnum.Term);
                }

                leftStatus  = leftEnum.SeekCeil(b);
                rightStatus = rightEnum.SeekCeil(b);
                Assert.AreEqual(leftStatus, rightStatus);
                if (leftStatus != SeekStatus.END)
                {
                    Assert.AreEqual(leftEnum.Term, rightEnum.Term);
                }
            }
        }
示例#9
0
        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, indexStore);
            Document doc1 = new Document();
            Document doc2 = new Document();
            Document doc3 = new Document();
            Document doc4 = new Document();
            Document doc5 = new Document();

            doc1.Add(NewTextField("body", "blueberry pie", Field.Store.YES));
            doc2.Add(NewTextField("body", "blueberry strudel", Field.Store.YES));
            doc3.Add(NewTextField("body", "blueberry pizza", Field.Store.YES));
            doc4.Add(NewTextField("body", "blueberry chewing gum", Field.Store.YES));
            doc5.Add(NewTextField("body", "piccadilly circus", Field.Store.YES));
            writer.AddDocument(doc1);
            writer.AddDocument(doc2);
            writer.AddDocument(doc3);
            writer.AddDocument(doc4);
            writer.AddDocument(doc5);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            // PhrasePrefixQuery query1 = new PhrasePrefixQuery();
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // PhrasePrefixQuery query2 = new PhrasePrefixQuery();
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).GetTerms("body").GetIterator(null);

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term.Utf8ToString();
                if (s.StartsWith(prefix, StringComparison.Ordinal))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.Next() != null);

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);

            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            reader.Dispose();
            indexStore.Dispose();
        }
示例#10
0
        public virtual void Test()
        {
#pragma warning disable 612, 618
            IFieldCache        cache   = FieldCache.DEFAULT;
            FieldCache.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random.NextBoolean());
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i));
            }

            FieldCache.Int64s longs = cache.GetInt64s(Reader, "theLong", Random.NextBoolean());
            Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", FieldCache.DEFAULT_INT64_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i);
            }

            FieldCache.Bytes bytes = cache.GetBytes(Reader, "theByte", Random.NextBoolean());
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue((sbyte)bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), (sbyte)bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i));
            }

            FieldCache.Int16s shorts = cache.GetInt16s(Reader, "theShort", Random.NextBoolean());
            Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", FieldCache.DEFAULT_INT16_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i));
            }

            FieldCache.Int32s ints = cache.GetInt32s(Reader, "theInt", Random.NextBoolean());
            Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", FieldCache.DEFAULT_INT32_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i));
            }

            FieldCache.Singles floats = cache.GetSingles(Reader, "theFloat", Random.NextBoolean());
            Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", FieldCache.DEFAULT_SINGLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i));
            }
#pragma warning restore 612, 618

            IBits docsWithField = cache.GetDocsWithField(Reader, "theLong");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array");
            Assert.IsTrue(docsWithField is Bits.MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length; i++)
            {
                Assert.IsTrue(docsWithField.Get(i));
            }

            docsWithField = cache.GetDocsWithField(Reader, "sparse");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array");
            Assert.IsFalse(docsWithField is Bits.MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length; i++)
            {
                Assert.AreEqual(i % 2 == 0, docsWithField.Get(i));
            }

            // getTermsIndex
            SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString");
            Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array");
            BytesRef br = new BytesRef();
            for (int i = 0; i < NUM_DOCS; i++)
            {
                BytesRef term;
                int      ord = termsIndex.GetOrd(i);
                if (ord == -1)
                {
                    term = null;
                }
                else
                {
                    termsIndex.LookupOrd(ord, br);
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            int nTerms = termsIndex.ValueCount;

            TermsEnum tenum = termsIndex.GetTermsEnum();
            BytesRef  val   = new BytesRef();
            for (int i = 0; i < nTerms; i++)
            {
                BytesRef val1 = tenum.Next();
                termsIndex.LookupOrd(i, val);
                // System.out.println("i="+i);
                Assert.AreEqual(val, val1);
            }

            // seek the enum around (note this isn't a great test here)
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                int k = Random.Next(nTerms);
                termsIndex.LookupOrd(k, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term);
            }

            for (int i = 0; i < nTerms; i++)
            {
                termsIndex.LookupOrd(i, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term);
            }

            // test bad field
            termsIndex = cache.GetTermsIndex(Reader, "bogusfield");

            // getTerms
            BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true);
            Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array");
            IBits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                terms.Get(i, br);
                BytesRef term;
                if (!bits.Get(i))
                {
                    term = null;
                }
                else
                {
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            // test bad field
            terms = cache.GetTerms(Reader, "bogusfield", false);

            // getDocTermOrds
            SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            int numEntries = cache.GetCacheEntries().Length;
            // ask for it again, and check that we didnt create any additional entries:
            termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            Assert.AreEqual(numEntries, cache.GetCacheEntries().Length);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                termOrds.SetDocument(i);
                // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
                IList <BytesRef> values = MultiValued[i].Distinct().ToList();
                foreach (BytesRef v in values)
                {
                    if (v == null)
                    {
                        // why does this test use null values... instead of an empty list: confusing
                        break;
                    }
                    long ord = termOrds.NextOrd();
                    Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS);
                    BytesRef scratch = new BytesRef();
                    termOrds.LookupOrd(ord, scratch);
                    Assert.AreEqual(v, scratch);
                }
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd());
            }

            // test bad field
            termOrds = cache.GetDocTermOrds(Reader, "bogusfield");
            Assert.IsTrue(termOrds.ValueCount == 0);

            FieldCache.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey);
        }
示例#11
0
        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), indexStore);

            Add("blueberry pie", writer);
            Add("blueberry strudel", writer);
            Add("blueberry pizza", writer);
            Add("blueberry chewing gum", writer);
            Add("bluebird pizza", writer);
            Add("bluebird foobar pizza", writer);
            Add("piccadilly circus", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // search for "blueberry pi*":
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // search for "strawberry pi*":
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).Terms("body").Iterator(null);

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term().Utf8ToString();
                if (s.StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.Next() != null);

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);
            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            // search for "blue* pizza":
            MultiPhraseQuery query3 = new MultiPhraseQuery();

            termsWithPrefix.Clear();
            prefix = "blue";
            te.SeekCeil(new BytesRef(prefix));

            do
            {
                if (te.Term().Utf8ToString().StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", te.Term().Utf8ToString()));
                }
            } while (te.Next() != null);

            query3.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query3.Add(new Term("body", "pizza"));

            result = searcher.Search(query3, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza
            Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());

            // test slop:
            query3.Slop = 1;
            result      = searcher.Search(query3, null, 1000).ScoreDocs;

            // just make sure no exc:
            searcher.Explain(query3, 0);

            Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird
            // foobar pizza

            MultiPhraseQuery query4 = new MultiPhraseQuery();

            try
            {
                query4.Add(new Term("field1", "foo"));
                query4.Add(new Term("field2", "foobar"));
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // okay, all terms must belong to the same field
            }

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }