private void VerifyVector(TermsEnum vector, int num)
 {
     StringBuilder temp = new StringBuilder();
     while (vector.Next() != null)
     {
         temp.Append(vector.Term().Utf8ToString());
     }
     if (!English.IntToEnglish(num).Trim().Equals(temp.ToString().Trim()))
     {
         Console.WriteLine("wrong term result");
     }
 }
예제 #2
0
        public virtual void Test()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w    = new RandomIndexWriter(Random(), dir, analyzer);
            LineFileDocs      docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues());
            int charsToIndex       = AtLeast(100000);
            int charsIndexed       = 0;

            //System.out.println("bytesToIndex=" + charsToIndex);
            while (charsIndexed < charsToIndex)
            {
                Document doc = docs.NextDoc();
                charsIndexed += doc.Get("body").Length;
                w.AddDocument(doc);
                //System.out.println("  bytes=" + charsIndexed + " add: " + doc);
            }
            IndexReader r = w.Reader;

            //System.out.println("numDocs=" + r.NumDocs);
            w.Dispose();

            IndexSearcher s         = NewSearcher(r);
            Terms         terms     = MultiFields.GetFields(r).Terms("body");
            int           termCount = 0;
            TermsEnum     termsEnum = terms.Iterator(null);

            while (termsEnum.Next() != null)
            {
                termCount++;
            }
            Assert.IsTrue(termCount > 0);

            // Target ~10 terms to search:
            double chance = 10.0 / termCount;

            termsEnum = terms.Iterator(termsEnum);
            IDictionary <BytesRef, TopDocs> answers = new Dictionary <BytesRef, TopDocs>();

            while (termsEnum.Next() != null)
            {
                if (Random().NextDouble() <= chance)
                {
                    BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term());
                    answers[term] = s.Search(new TermQuery(new Term("body", term)), 100);
                }
            }

            if (answers.Count > 0)
            {
                CountdownEvent startingGun = new CountdownEvent(1);
                int            numThreads  = TestUtil.NextInt(Random(), 2, 5);
                ThreadClass[]  threads     = new ThreadClass[numThreads];
                for (int threadID = 0; threadID < numThreads; threadID++)
                {
                    ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, s, answers, startingGun);
                    threads[threadID] = thread;
                    thread.Start();
                }
                startingGun.Signal();
                foreach (ThreadClass thread in threads)
                {
                    thread.Join();
                }
            }
            r.Dispose();
            dir.Dispose();
        }
예제 #3
0
        private void DuellReaders(CompositeReader other, AtomicReader memIndexReader)
        {
            AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other);
            Fields       memFields  = memIndexReader.Fields;

            foreach (string field in competitor.Fields)
            {
                Terms memTerms = memFields.Terms(field);
                Terms iwTerms  = memIndexReader.Terms(field);
                if (iwTerms == null)
                {
                    assertNull(memTerms);
                }
                else
                {
                    NumericDocValues normValues    = competitor.GetNormValues(field);
                    NumericDocValues memNormValues = memIndexReader.GetNormValues(field);
                    if (normValues != null)
                    {
                        // mem idx always computes norms on the fly
                        assertNotNull(memNormValues);
                        assertEquals(normValues.Get(0), memNormValues.Get(0));
                    }

                    assertNotNull(memTerms);
                    assertEquals(iwTerms.DocCount, memTerms.DocCount);
                    assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq);
                    assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq);
                    TermsEnum iwTermsIter  = iwTerms.Iterator(null);
                    TermsEnum memTermsIter = memTerms.Iterator(null);
                    if (iwTerms.HasPositions())
                    {
                        bool offsets = iwTerms.HasOffsets() && memTerms.HasOffsets();

                        while (iwTermsIter.Next() != null)
                        {
                            assertNotNull(memTermsIter.Next());
                            assertEquals(iwTermsIter.Term(), memTermsIter.Term());
                            DocsAndPositionsEnum iwDocsAndPos  = iwTermsIter.DocsAndPositions(null, null);
                            DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq());
                                for (int i = 0; i < iwDocsAndPos.Freq(); i++)
                                {
                                    assertEquals("term: " + iwTermsIter.Term().Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition());
                                    if (offsets)
                                    {
                                        assertEquals(iwDocsAndPos.StartOffset(), memDocsAndPos.StartOffset());
                                        assertEquals(iwDocsAndPos.EndOffset(), memDocsAndPos.EndOffset());
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        while (iwTermsIter.Next() != null)
                        {
                            assertEquals(iwTermsIter.Term(), memTermsIter.Term());
                            DocsEnum iwDocsAndPos  = iwTermsIter.Docs(null, null);
                            DocsEnum memDocsAndPos = memTermsIter.Docs(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq());
                            }
                        }
                    }
                }
            }
        }
        // we need to guarantee that if several threads call this concurrently, only
        // one executes it, and after it returns, the cache is updated and is either
        // complete or not.
        private void PerhapsFillCache()
        {
            lock (this)
            {
                if (cacheMisses.Get() < cacheMissesUntilFill)
                {
                    return;
                }

                if (!shouldFillCache)
                {
                    // we already filled the cache once, there's no need to re-fill it
                    return;
                }
                shouldFillCache = false;

                InitReaderManager();

                bool            aborted = false;
                DirectoryReader reader  = readerManager.Acquire();
                try
                {
                    TermsEnum termsEnum = null;
                    DocsEnum  docsEnum  = null;
                    foreach (AtomicReaderContext ctx in reader.Leaves)
                    {
                        Terms terms = ctx.AtomicReader.Terms(Consts.FULL);
                        if (terms != null) // cannot really happen, but be on the safe side
                        {
                            termsEnum = terms.Iterator(termsEnum);
                            while (termsEnum.Next() != null)
                            {
                                if (!cache.Full)
                                {
                                    BytesRef t = termsEnum.Term();
                                    // Since we guarantee uniqueness of categories, each term has exactly
                                    // one document. Also, since we do not allow removing categories (and
                                    // hence documents), there are no deletions in the index. Therefore, it
                                    // is sufficient to call next(), and then doc(), exactly once with no
                                    // 'validation' checks.
                                    FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(t.Utf8ToString()));
                                    docsEnum = termsEnum.Docs(null, docsEnum, DocsEnum.FLAG_NONE);
                                    bool res = cache.Put(cp, docsEnum.NextDoc() + ctx.DocBase);
                                    Debug.Assert(!res, "entries should not have been evicted from the cache");
                                }
                                else
                                {
                                    // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
                                    aborted = true;
                                    break;
                                }
                            }
                        }
                        if (aborted)
                        {
                            break;
                        }
                    }
                }
                finally
                {
                    readerManager.Release(reader);
                }

                cacheIsComplete = !aborted;
                if (cacheIsComplete)
                {
                    lock (this)
                    {
                        // everything is in the cache, so no need to keep readerManager open.
                        // this block is executed in a sync block so that it works well with
                        // initReaderManager called in parallel.
                        readerManager.Dispose();
                        readerManager            = null;
                        initializedReaderManager = false;
                    }
                }
            }
        }
        /// <summary>
        /// Takes the categories from the given taxonomy directory, and adds the
        /// missing ones to this taxonomy. Additionally, it fills the given
        /// <seealso cref="OrdinalMap"/> with a mapping from the original ordinal to the new
        /// ordinal.
        /// </summary>
        public virtual void AddTaxonomy(Directory taxoDir, OrdinalMap map)
        {
            EnsureOpen();
            DirectoryReader r = DirectoryReader.Open(taxoDir);

            try
            {
                int        size       = r.NumDocs;
                OrdinalMap ordinalMap = map;
                ordinalMap.Size = size;
                int       @base = 0;
                TermsEnum te    = null;
                DocsEnum  docs  = null;
                foreach (AtomicReaderContext ctx in r.Leaves)
                {
                    AtomicReader ar    = ctx.AtomicReader;
                    Terms        terms = ar.Terms(Consts.FULL);
                    te = terms.Iterator(te);
                    while (te.Next() != null)
                    {
                        FacetLabel cp      = new FacetLabel(FacetsConfig.StringToPath(te.Term().Utf8ToString()));
                        int        ordinal = AddCategory(cp);
                        docs = te.Docs(null, docs, DocsEnum.FLAG_NONE);
                        ordinalMap.AddMapping(docs.NextDoc() + @base, ordinal);
                    }
                    @base += ar.MaxDoc; // no deletions, so we're ok
                }
                ordinalMap.AddDone();
            }
            finally
            {
                r.Dispose();
            }
        }
예제 #6
0
        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), indexStore);

            Add("blueberry pie", writer);
            Add("blueberry strudel", writer);
            Add("blueberry pizza", writer);
            Add("blueberry chewing gum", writer);
            Add("bluebird pizza", writer);
            Add("bluebird foobar pizza", writer);
            Add("piccadilly circus", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // search for "blueberry pi*":
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // search for "strawberry pi*":
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).Terms("body").Iterator(null);

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term().Utf8ToString();
                if (s.StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.Next() != null);

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);
            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            // search for "blue* pizza":
            MultiPhraseQuery query3 = new MultiPhraseQuery();

            termsWithPrefix.Clear();
            prefix = "blue";
            te.SeekCeil(new BytesRef(prefix));

            do
            {
                if (te.Term().Utf8ToString().StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", te.Term().Utf8ToString()));
                }
            } while (te.Next() != null);

            query3.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query3.Add(new Term("body", "pizza"));

            result = searcher.Search(query3, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza
            Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());

            // test slop:
            query3.Slop = 1;
            result      = searcher.Search(query3, null, 1000).ScoreDocs;

            // just make sure no exc:
            searcher.Explain(query3, 0);

            Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird
            // foobar pizza

            MultiPhraseQuery query4 = new MultiPhraseQuery();

            try
            {
                query4.Add(new Term("field1", "foo"));
                query4.Add(new Term("field2", "foobar"));
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // okay, all terms must belong to the same field
            }

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #7
0
        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), indexStore);
            Document          doc1       = new Document();
            Document          doc2       = new Document();
            Document          doc3       = new Document();
            Document          doc4       = new Document();
            Document          doc5       = new Document();

            doc1.Add(NewTextField("body", "blueberry pie", Field.Store.YES));
            doc2.Add(NewTextField("body", "blueberry strudel", Field.Store.YES));
            doc3.Add(NewTextField("body", "blueberry pizza", Field.Store.YES));
            doc4.Add(NewTextField("body", "blueberry chewing gum", Field.Store.YES));
            doc5.Add(NewTextField("body", "piccadilly circus", Field.Store.YES));
            writer.AddDocument(doc1);
            writer.AddDocument(doc2);
            writer.AddDocument(doc3);
            writer.AddDocument(doc4);
            writer.AddDocument(doc5);
            IndexReader reader = writer.Reader;

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            // PhrasePrefixQuery query1 = new PhrasePrefixQuery();
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // PhrasePrefixQuery query2 = new PhrasePrefixQuery();
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).Terms("body").Iterator(null);

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term().Utf8ToString();
                if (s.StartsWith(prefix))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.Next() != null);

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);

            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            reader.Dispose();
            indexStore.Dispose();
        }
예제 #8
0
        public virtual void Test()
        {
            IFieldCache cache = FieldCache.DEFAULT;

            FieldCache.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random().NextBoolean());
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i));
            }

            FieldCache.Longs longs = cache.GetLongs(Reader, "theLong", Random().NextBoolean());
            Assert.AreSame(longs, cache.GetLongs(Reader, "theLong", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(longs, cache.GetLongs(Reader, "theLong", FieldCache.DEFAULT_LONG_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i);
            }

            FieldCache.Bytes bytes = cache.GetBytes(Reader, "theByte", Random().NextBoolean());
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i));
            }

            FieldCache.Shorts shorts = cache.GetShorts(Reader, "theShort", Random().NextBoolean());
            Assert.AreSame(shorts, cache.GetShorts(Reader, "theShort", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(shorts, cache.GetShorts(Reader, "theShort", FieldCache.DEFAULT_SHORT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i));
            }

            FieldCache.Ints ints = cache.GetInts(Reader, "theInt", Random().NextBoolean());
            Assert.AreSame(ints, cache.GetInts(Reader, "theInt", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(ints, cache.GetInts(Reader, "theInt", FieldCache.DEFAULT_INT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i));
            }

            FieldCache.Floats floats = cache.GetFloats(Reader, "theFloat", Random().NextBoolean());
            Assert.AreSame(floats, cache.GetFloats(Reader, "theFloat", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(floats, cache.GetFloats(Reader, "theFloat", FieldCache.DEFAULT_FLOAT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i));
            }

            Bits docsWithField = cache.GetDocsWithField(Reader, "theLong");

            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array");
            Assert.IsTrue(docsWithField is Bits_MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length() == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length() + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length(); i++)
            {
                Assert.IsTrue(docsWithField.Get(i));
            }

            docsWithField = cache.GetDocsWithField(Reader, "sparse");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array");
            Assert.IsFalse(docsWithField is Bits_MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length() == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length() + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length(); i++)
            {
                Assert.AreEqual(i % 2 == 0, docsWithField.Get(i));
            }

            // getTermsIndex
            SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString");

            Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array");
            BytesRef br = new BytesRef();

            for (int i = 0; i < NUM_DOCS; i++)
            {
                BytesRef term;
                int      ord = termsIndex.GetOrd(i);
                if (ord == -1)
                {
                    term = null;
                }
                else
                {
                    termsIndex.LookupOrd(ord, br);
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            int nTerms = termsIndex.ValueCount;

            TermsEnum tenum = termsIndex.TermsEnum();
            BytesRef  val   = new BytesRef();

            for (int i = 0; i < nTerms; i++)
            {
                BytesRef val1 = tenum.Next();
                termsIndex.LookupOrd(i, val);
                // System.out.println("i="+i);
                Assert.AreEqual(val, val1);
            }

            // seek the enum around (note this isn't a great test here)
            int num = AtLeast(100);

            for (int i = 0; i < num; i++)
            {
                int k = Random().Next(nTerms);
                termsIndex.LookupOrd(k, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term());
            }

            for (int i = 0; i < nTerms; i++)
            {
                termsIndex.LookupOrd(i, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term());
            }

            // test bad field
            termsIndex = cache.GetTermsIndex(Reader, "bogusfield");

            // getTerms
            BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true);

            Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array");
            Bits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString");

            for (int i = 0; i < NUM_DOCS; i++)
            {
                terms.Get(i, br);
                BytesRef term;
                if (!bits.Get(i))
                {
                    term = null;
                }
                else
                {
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            // test bad field
            terms = cache.GetTerms(Reader, "bogusfield", false);

            // getDocTermOrds
            SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            int numEntries = cache.CacheEntries.Length;

            // ask for it again, and check that we didnt create any additional entries:
            termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            Assert.AreEqual(numEntries, cache.CacheEntries.Length);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                termOrds.Document = i;
                // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
                IList <BytesRef> values = new List <BytesRef>(new /*Linked*/ HashSet <BytesRef>(Arrays.AsList(MultiValued[i])));
                foreach (BytesRef v in values)
                {
                    if (v == null)
                    {
                        // why does this test use null values... instead of an empty list: confusing
                        break;
                    }
                    long ord = termOrds.NextOrd();
                    Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS);
                    BytesRef scratch = new BytesRef();
                    termOrds.LookupOrd(ord, scratch);
                    Assert.AreEqual(v, scratch);
                }
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd());
            }

            // test bad field
            termOrds = cache.GetDocTermOrds(Reader, "bogusfield");
            Assert.IsTrue(termOrds.ValueCount == 0);

            FieldCache.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey);
        }
예제 #9
0
        private void AssertTermsSeeking(Terms leftTerms, Terms rightTerms)
        {
            TermsEnum leftEnum  = null;
            TermsEnum rightEnum = null;

            // just an upper bound
            int    numTests = AtLeast(20);
            Random random   = Random();

            // collect this number of terms from the left side
            HashSet <BytesRef> tests = new HashSet <BytesRef>();
            int numPasses            = 0;

            while (numPasses < 10 && tests.Count < numTests)
            {
                leftEnum = leftTerms.Iterator(leftEnum);
                BytesRef term = null;
                while ((term = leftEnum.Next()) != null)
                {
                    int code = random.Next(10);
                    if (code == 0)
                    {
                        // the term
                        tests.Add(BytesRef.DeepCopyOf(term));
                    }
                    else if (code == 1)
                    {
                        // truncated subsequence of term
                        term = BytesRef.DeepCopyOf(term);
                        if (term.Length > 0)
                        {
                            // truncate it
                            term.Length = random.Next(term.Length);
                        }
                    }
                    else if (code == 2)
                    {
                        // term, but ensure a non-zero offset
                        sbyte[] newbytes = new sbyte[term.Length + 5];
                        Array.Copy(term.Bytes, term.Offset, newbytes, 5, term.Length);
                        tests.Add(new BytesRef(newbytes, 5, term.Length));
                    }
                }
                numPasses++;
            }

            List <BytesRef> shuffledTests = new List <BytesRef>(tests);

            shuffledTests = (List <BytesRef>)CollectionsHelper.Shuffle(shuffledTests);

            foreach (BytesRef b in shuffledTests)
            {
                leftEnum  = leftTerms.Iterator(leftEnum);
                rightEnum = rightTerms.Iterator(rightEnum);

                Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b));
                Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b));

                SeekStatus leftStatus;
                SeekStatus rightStatus;

                leftStatus  = leftEnum.SeekCeil(b);
                rightStatus = rightEnum.SeekCeil(b);
                Assert.AreEqual(leftStatus, rightStatus);
                if (leftStatus != SeekStatus.END)
                {
                    Assert.AreEqual(leftEnum.Term(), rightEnum.Term());
                }

                leftStatus  = leftEnum.SeekCeil(b);
                rightStatus = rightEnum.SeekCeil(b);
                Assert.AreEqual(leftStatus, rightStatus);
                if (leftStatus != SeekStatus.END)
                {
                    Assert.AreEqual(leftEnum.Term(), rightEnum.Term());
                }
            }
        }
예제 #10
0
        public virtual void TestSimple()
        {
            int numNodes = TestUtil.NextInt(Random(), 1, 10);

            double runTimeSec = AtLeast(3);

            int minDocsToMakeTerms = TestUtil.NextInt(Random(), 5, 20);

            int maxSearcherAgeSeconds = TestUtil.NextInt(Random(), 1, 3);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds);
            }

            Start(numNodes, runTimeSec, maxSearcherAgeSeconds);

            List <PreviousSearchState> priorSearches = new List <PreviousSearchState>();
            List <BytesRef>            terms         = null;

            while (TimeHelper.NanoTime() < EndTimeNanos)
            {
                bool doFollowon = priorSearches.Count > 0 && Random().Next(7) == 1;

                // Pick a random node; we will run the query on this node:
                int myNodeID = Random().Next(numNodes);

                NodeState.ShardIndexSearcher localShardSearcher;

                PreviousSearchState prevSearchState;

                if (doFollowon)
                {
                    // Pretend user issued a followon query:
                    prevSearchState = priorSearches[Random().Next(priorSearches.Count)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: follow-on query age=" + ((TimeHelper.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0));
                    }

                    try
                    {
                        localShardSearcher = Nodes[myNodeID].Acquire(prevSearchState.Versions);
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected, sometimes; in a "real" app we would
                        // either forward this error to the user ("too
                        // much time has passed; please re-run your
                        // search") or sneakily just switch to newest
                        // searcher w/o telling them...
                        if (VERBOSE)
                        {
                            Console.WriteLine("  searcher expired during local shard searcher init: " + see);
                        }
                        priorSearches.Remove(prevSearchState);
                        continue;
                    }
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: fresh query");
                    }
                    // Do fresh query:
                    localShardSearcher = Nodes[myNodeID].Acquire();
                    prevSearchState    = null;
                }

                IndexReader[] subs = new IndexReader[numNodes];

                PreviousSearchState searchState = null;

                try
                {
                    // Mock: now make a single reader (MultiReader) from all node
                    // searchers.  In a real shard env you can't do this... we
                    // do it to confirm results from the shard searcher
                    // are correct:
                    int docCount = 0;
                    try
                    {
                        for (int nodeID = 0; nodeID < numNodes; nodeID++)
                        {
                            long          subVersion = localShardSearcher.NodeVersions[nodeID];
                            IndexSearcher sub        = Nodes[nodeID].Searchers.Acquire(subVersion);
                            if (sub == null)
                            {
                                nodeID--;
                                while (nodeID >= 0)
                                {
                                    subs[nodeID].DecRef();
                                    subs[nodeID] = null;
                                    nodeID--;
                                }
                                throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion);
                            }
                            subs[nodeID] = sub.IndexReader;
                            docCount    += subs[nodeID].MaxDoc;
                        }
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected
                        if (VERBOSE)
                        {
                            Console.WriteLine("  searcher expired during mock reader init: " + see);
                        }
                        continue;
                    }

                    IndexReader   mockReader   = new MultiReader(subs);
                    IndexSearcher mockSearcher = new IndexSearcher(mockReader);

                    Query query;
                    Sort  sort;

                    if (prevSearchState != null)
                    {
                        query = prevSearchState.Query;
                        sort  = prevSearchState.Sort;
                    }
                    else
                    {
                        if (terms == null && docCount > minDocsToMakeTerms)
                        {
                            // TODO: try to "focus" on high freq terms sometimes too
                            // TODO: maybe also periodically reset the terms...?
                            TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").Iterator(null);
                            terms = new List <BytesRef>();
                            while (termsEnum.Next() != null)
                            {
                                terms.Add(BytesRef.DeepCopyOf(termsEnum.Term()));
                            }
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: init terms: " + terms.Count + " terms");
                            }
                            if (terms.Count == 0)
                            {
                                terms = null;
                            }
                        }

                        if (VERBOSE)
                        {
                            Console.WriteLine("  maxDoc=" + mockReader.MaxDoc);
                        }

                        if (terms != null)
                        {
                            if (Random().NextBoolean())
                            {
                                query = new TermQuery(new Term("body", terms[Random().Next(terms.Count)]));
                            }
                            else
                            {
                                string t = terms[Random().Next(terms.Count)].Utf8ToString();
                                string prefix;
                                if (t.Length <= 1)
                                {
                                    prefix = t;
                                }
                                else
                                {
                                    prefix = t.Substring(0, TestUtil.NextInt(Random(), 1, 2));
                                }
                                query = new PrefixQuery(new Term("body", prefix));
                            }

                            if (Random().NextBoolean())
                            {
                                sort = null;
                            }
                            else
                            {
                                // TODO: sort by more than 1 field
                                int what = Random().Next(3);
                                if (what == 0)
                                {
                                    sort = new Sort(SortField.FIELD_SCORE);
                                }
                                else if (what == 1)
                                {
                                    // TODO: this sort doesn't merge
                                    // correctly... it's tricky because you
                                    // could have > 2.1B docs across all shards:
                                    //sort = new Sort(SortField.FIELD_DOC);
                                    sort = null;
                                }
                                else if (what == 2)
                                {
                                    sort = new Sort(new SortField[] { new SortField("docid", SortField.Type_e.INT, Random().NextBoolean()) });
                                }
                                else
                                {
                                    sort = new Sort(new SortField[] { new SortField("title", SortField.Type_e.STRING, Random().NextBoolean()) });
                                }
                            }
                        }
                        else
                        {
                            query = null;
                            sort  = null;
                        }
                    }

                    if (query != null)
                    {
                        try
                        {
                            searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState);
                        }
                        catch (SearcherExpiredException see)
                        {
                            // Expected; in a "real" app we would
                            // either forward this error to the user ("too
                            // much time has passed; please re-run your
                            // search") or sneakily just switch to newest
                            // searcher w/o telling them...
                            if (VERBOSE)
                            {
                                Console.WriteLine("  searcher expired during search: " + see);
                                Console.Out.Write(see.StackTrace);
                            }
                            // We can't do this in general: on a very slow
                            // computer it's possible the local searcher
                            // expires before we can finish our search:
                            // assert prevSearchState != null;
                            if (prevSearchState != null)
                            {
                                priorSearches.Remove(prevSearchState);
                            }
                        }
                    }
                }
                finally
                {
                    Nodes[myNodeID].Release(localShardSearcher);
                    foreach (IndexReader sub in subs)
                    {
                        if (sub != null)
                        {
                            sub.DecRef();
                        }
                    }
                }

                if (searchState != null && searchState.SearchAfterLocal != null && Random().Next(5) == 3)
                {
                    priorSearches.Add(searchState);
                    if (priorSearches.Count > 200)
                    {
                        priorSearches = (List <PreviousSearchState>)CollectionsHelper.Shuffle(priorSearches);
                        priorSearches.SubList(100, priorSearches.Count).Clear();
                    }
                }
            }

            Finish();
        }
예제 #11
0
        public virtual void TestMixedVectrosVectors()
        {
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetOpenMode(OpenMode.CREATE));
            Document          doc    = new Document();

            FieldType ft2 = new FieldType(TextField.TYPE_STORED);

            ft2.StoreTermVectors = true;

            FieldType ft3 = new FieldType(TextField.TYPE_STORED);

            ft3.StoreTermVectors         = true;
            ft3.StoreTermVectorPositions = true;

            FieldType ft4 = new FieldType(TextField.TYPE_STORED);

            ft4.StoreTermVectors       = true;
            ft4.StoreTermVectorOffsets = true;

            FieldType ft5 = new FieldType(TextField.TYPE_STORED);

            ft5.StoreTermVectors         = true;
            ft5.StoreTermVectorOffsets   = true;
            ft5.StoreTermVectorPositions = true;

            doc.Add(NewTextField("field", "one", Field.Store.YES));
            doc.Add(NewField("field", "one", ft2));
            doc.Add(NewField("field", "one", ft3));
            doc.Add(NewField("field", "one", ft4));
            doc.Add(NewField("field", "one", ft5));
            writer.AddDocument(doc);
            IndexReader reader = writer.Reader;

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            Query query = new TermQuery(new Term("field", "one"));

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            Fields vectors = searcher.IndexReader.GetTermVectors(hits[0].Doc);

            Assert.IsNotNull(vectors);
            Assert.AreEqual(1, vectors.Size);
            Terms vector = vectors.Terms("field");

            Assert.IsNotNull(vector);
            Assert.AreEqual(1, vector.Size());
            TermsEnum termsEnum = vector.Iterator(null);

            Assert.IsNotNull(termsEnum.Next());
            Assert.AreEqual("one", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(5, termsEnum.TotalTermFreq());
            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);

            Assert.IsNotNull(dpEnum);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(5, dpEnum.Freq());
            for (int i = 0; i < 5; i++)
            {
                Assert.AreEqual(i, dpEnum.NextPosition());
            }

            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.IsNotNull(dpEnum);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(5, dpEnum.Freq());
            for (int i = 0; i < 5; i++)
            {
                dpEnum.NextPosition();
                Assert.AreEqual(4 * i, dpEnum.StartOffset());
                Assert.AreEqual(4 * i + 3, dpEnum.EndOffset());
            }
            reader.Dispose();
        }
예제 #12
0
        /// <summary>
        /// Safe (but, slowish) default method to write every
        ///  vector field in the document.
        /// </summary>
        protected internal void AddAllDocVectors(Fields vectors, MergeState mergeState)
        {
            if (vectors == null)
            {
                StartDocument(0);
                FinishDocument();
                return;
            }

            int numFields = vectors.Size;

            if (numFields == -1)
            {
                // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
                numFields = 0;
                //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();)
                foreach (string it in vectors)
                {
                    numFields++;
                }
            }
            StartDocument(numFields);

            string lastFieldName = null;

            TermsEnum            termsEnum            = null;
            DocsAndPositionsEnum docsAndPositionsEnum = null;

            int fieldCount = 0;

            foreach (string fieldName in vectors)
            {
                fieldCount++;
                FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName);

                Debug.Assert(lastFieldName == null || fieldName.CompareTo(lastFieldName) > 0, "lastFieldName=" + lastFieldName + " fieldName=" + fieldName);
                lastFieldName = fieldName;

                Terms terms = vectors.Terms(fieldName);
                if (terms == null)
                {
                    // FieldsEnum shouldn't lie...
                    continue;
                }

                bool hasPositions = terms.HasPositions();
                bool hasOffsets   = terms.HasOffsets();
                bool hasPayloads  = terms.HasPayloads();
                Debug.Assert(!hasPayloads || hasPositions);

                int numTerms = (int)terms.Size();
                if (numTerms == -1)
                {
                    // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
                    numTerms  = 0;
                    termsEnum = terms.Iterator(termsEnum);
                    while (termsEnum.Next() != null)
                    {
                        numTerms++;
                    }
                }

                StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
                termsEnum = terms.Iterator(termsEnum);

                int termCount = 0;
                while (termsEnum.Next() != null)
                {
                    termCount++;

                    int freq = (int)termsEnum.TotalTermFreq();

                    StartTerm(termsEnum.Term(), freq);

                    if (hasPositions || hasOffsets)
                    {
                        docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                        Debug.Assert(docsAndPositionsEnum != null);

                        int docID = docsAndPositionsEnum.NextDoc();
                        Debug.Assert(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Debug.Assert(docsAndPositionsEnum.Freq() == freq);

                        for (int posUpto = 0; posUpto < freq; posUpto++)
                        {
                            int pos         = docsAndPositionsEnum.NextPosition();
                            int startOffset = docsAndPositionsEnum.StartOffset();
                            int endOffset   = docsAndPositionsEnum.EndOffset();

                            BytesRef payload = docsAndPositionsEnum.Payload;

                            Debug.Assert(!hasPositions || pos >= 0);
                            AddPosition(pos, startOffset, endOffset, payload);
                        }
                    }
                    FinishTerm();
                }
                Debug.Assert(termCount == numTerms);
                FinishField();
            }
            Debug.Assert(fieldCount == numFields);
            FinishDocument();
        }
예제 #13
0
				/// <exception cref="System.IO.IOException"></exception>
				internal SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum 
					tenum, int startFacetOrd, int endFacetOrd) : base(counts, total - counts[missingCountIndex
					], counts[missingCountIndex], endFacetOrd == missingCountIndex + 1 ? missingCountIndex
					 : endFacetOrd)
				{
					this.tenum = tenum;
					this.mergePos = startFacetOrd;
					if (tenum != null)
					{
						tenum.SeekExact(mergePos);
						mergeTerm = tenum.Term();
					}
				}
예제 #14
0
				/// <exception cref="System.IO.IOException"></exception>
				internal SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd
					, int endFacetOrd) : base(counts, total - counts[0], counts[0], endFacetOrd + 1)
				{
					this.tenum = tenum;
					this.mergePos = startFacetOrd == -1 ? 1 : startFacetOrd + 1;
					if (mergePos < maxTermPos)
					{
						tenum != null.SeekExact(startFacetOrd == -1 ? 0 : startFacetOrd);
						mergeTerm = tenum.Term();
					}
				}