コード例 #1
0
        public override void SetUp()
        {
            base.SetUp();
            // we generate aweful regexps: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            numIterations = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) ? 10 * RandomMultiplier : AtLeast(50);
            dir           = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)));
            Document          doc    = new Document();
            Field             field  = NewStringField("field", "", Field.Store.YES);

            doc.Add(field);
            terms = new JCG.SortedSet <BytesRef>();

            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random);
                field.SetStringValue(s);
                terms.Add(new BytesRef(s));
                writer.AddDocument(doc);
            }

            termsAutomaton = BasicAutomata.MakeStringUnion(terms);

            reader   = writer.GetReader();
            searcher = NewSearcher(reader);
            writer.Dispose();
        }
コード例 #2
0
        public virtual void TestIntersect()
        {
            for (int i = 0; i < numIterations; i++)
            {
                string                   reg       = AutomatonTestUtil.RandomRegexp(Random);
                Automaton                automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                CompiledAutomaton        ca        = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false);
                TermsEnum                te        = MultiFields.GetTerms(reader, "field").Intersect(ca, null);
                Automaton                expected  = BasicOperations.Intersection(termsAutomaton, automaton);
                JCG.SortedSet <BytesRef> found     = new JCG.SortedSet <BytesRef>();
                while (te.Next() != null)
                {
                    found.Add(BytesRef.DeepCopyOf(te.Term));
                }

                Automaton actual = BasicAutomata.MakeStringUnion(found);
                Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
            }
        }
コード例 #3
0
        public virtual void TestFiniteVersusInfinite()
        {
            for (int i = 0; i < numIterations; i++)
            {
                string           reg          = AutomatonTestUtil.RandomRegexp(Random);
                Automaton        automaton    = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                IList <BytesRef> matchedTerms = new List <BytesRef>();
                foreach (BytesRef t in terms)
                {
                    if (BasicOperations.Run(automaton, t.Utf8ToString()))
                    {
                        matchedTerms.Add(t);
                    }
                }

                Automaton alternate = BasicAutomata.MakeStringUnion(matchedTerms);
                //System.out.println("match " + matchedTerms.Size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
                //AutomatonTestUtil.minimizeSimple(alternate);
                //System.out.println("minmize done");
                AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
                AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
                CheckHits.CheckEqual(a1, searcher.Search(a1, 25).ScoreDocs, searcher.Search(a2, 25).ScoreDocs);
            }
        }
コード例 #4
0
        public virtual void TestIntersectRandom()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            HashSet <string>             terms        = new HashSet <string>();
            ICollection <string>         pendingTerms = new List <string>();
            IDictionary <BytesRef, int?> termToID     = new Dictionary <BytesRef, int?>();
            int id = 0;

            while (terms.Count != numTerms)
            {
                string s = RandomString;
                if (!terms.Contains(s))
                {
                    terms.Add(s);
                    pendingTerms.Add(s);
                    if (Random.Next(20) == 7)
                    {
                        AddDoc(w, pendingTerms, termToID, id++);
                    }
                }
            }
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[]         termsArray = new BytesRef[terms.Count];
            HashSet <BytesRef> termsSet   = new HashSet <BytesRef>();

            {
                int upto = 0;
                foreach (string s in terms)
                {
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;
                    termsSet.Add(b);
                }
                Array.Sort(termsArray);
            }

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);
                }
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            // NOTE: intentional insanity!!
            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                HashSet <string>     acceptTerms       = new HashSet <string>();
                SortedSet <BytesRef> sortedAcceptTerms = new SortedSet <BytesRef>();
                double    keepPct = Random.NextDouble();
                Automaton a;
                if (iter == 0)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: empty automaton");
                    }
                    a = BasicAutomata.MakeEmpty();
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    }
                    foreach (string s in terms)
                    {
                        string s2;
                        if (Random.NextDouble() <= keepPct)
                        {
                            s2 = s;
                        }
                        else
                        {
                            s2 = RandomString;
                        }
                        acceptTerms.Add(s2);
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    }
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
                }

                if (Random.NextBoolean())
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reduce the automaton");
                    }
                    a.Reduce();
                }

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[]         acceptTermsArray = new BytesRef[acceptTerms.Count];
                HashSet <BytesRef> acceptTermsSet   = new HashSet <BytesRef>();
                int upto = 0;
                foreach (string s in acceptTerms)
                {
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    acceptTermsSet.Add(b);
                    Assert.IsTrue(Accepts(c, b));
                }
                Array.Sort(acceptTermsArray);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                    {
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
                    }
                    Console.WriteLine(a.ToDot());
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                        {
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            }
                            Console.WriteLine("  state=" + state);
                        }
                    }

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm == null)
                    {
                        loc = 0;
                    }
                    else
                    {
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                        {
                            loc = -(loc + 1);
                        }
                        else
                        {
                            // startTerm exists in index
                            loc++;
                        }
                    }
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
                    {
                        loc++;
                    }

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                    {
                        BytesRef expected = termsArray[loc];
                        BytesRef actual   = te.Next();
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
                        }
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq);
                        docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        do
                        {
                            loc++;
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
                    }
                    Assert.IsNull(te.Next());
                }
            }

            r.Dispose();
            dir.Dispose();
        }