Ejemplo n.º 1
0
        public virtual void TestFiniteVersusInfinite()
        {
            for (int i = 0; i < numIterations; i++)
            {
                string           reg          = AutomatonTestUtil.RandomRegexp(Random);
                Automaton        automaton    = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                IList <BytesRef> matchedTerms = new List <BytesRef>();
                foreach (BytesRef t in terms)
                {
                    if (BasicOperations.Run(automaton, t.Utf8ToString()))
                    {
                        matchedTerms.Add(t);
                    }
                }

                Automaton alternate = BasicAutomata.MakeStringUnion(matchedTerms);
                //System.out.println("match " + matchedTerms.Size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
                //AutomatonTestUtil.minimizeSimple(alternate);
                //System.out.println("minmize done");
                AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
                AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
                CheckHits.CheckEqual(a1, searcher.Search(a1, 25).ScoreDocs, searcher.Search(a2, 25).ScoreDocs);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Extracts all <see cref="MultiTermQuery"/>s for <paramref name="field"/>, and returns equivalent
        /// automata that will match terms.
        /// </summary>
        internal static CharacterRunAutomaton[] ExtractAutomata(Query query, string field)
        {
            List <CharacterRunAutomaton> list = new List <CharacterRunAutomaton>();

            if (query is BooleanQuery)
            {
                BooleanClause[] clauses = ((BooleanQuery)query).GetClauses();
                foreach (BooleanClause clause in clauses)
                {
                    if (!clause.IsProhibited)
                    {
                        list.AddAll(Arrays.AsList(ExtractAutomata(clause.Query, field)));
                    }
                }
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (Query sub in ((DisjunctionMaxQuery)query).Disjuncts)
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanOrQuery)
            {
                foreach (Query sub in ((SpanOrQuery)query).GetClauses())
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanNearQuery)
            {
                foreach (Query sub in ((SpanNearQuery)query).GetClauses())
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanNotQuery)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((SpanNotQuery)query).Include, field)));
            }
            else if (query is SpanPositionCheckQuery)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((SpanPositionCheckQuery)query).Match, field)));
            }
            else if (query is ISpanMultiTermQueryWrapper)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((ISpanMultiTermQueryWrapper)query).WrappedQuery, field)));
            }
            else if (query is AutomatonQuery)
            {
                AutomatonQuery aq = (AutomatonQuery)query;
                if (aq.Field.Equals(field, StringComparison.Ordinal))
                {
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(aq.Automaton, () => aq.ToString()));
                }
            }
            else if (query is PrefixQuery)
            {
                PrefixQuery pq     = (PrefixQuery)query;
                Term        prefix = pq.Prefix;
                if (prefix.Field.Equals(field, StringComparison.Ordinal))
                {
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(
                                 BasicOperations.Concatenate(BasicAutomata.MakeString(prefix.Text()), BasicAutomata.MakeAnyString()),
                                 () => pq.ToString()));
                }
            }
            else if (query is FuzzyQuery)
            {
                FuzzyQuery fq = (FuzzyQuery)query;
                if (fq.Field.Equals(field, StringComparison.Ordinal))
                {
                    string utf16    = fq.Term.Text();
                    int[]  termText = new int[utf16.CodePointCount(0, utf16.Length)];
                    for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp))
                    {
                        termText[j++] = cp = utf16.CodePointAt(i);
                    }
                    int    termLength             = termText.Length;
                    int    prefixLength           = Math.Min(fq.PrefixLength, termLength);
                    string suffix                 = UnicodeUtil.NewString(termText, prefixLength, termText.Length - prefixLength);
                    LevenshteinAutomata builder   = new LevenshteinAutomata(suffix, fq.Transpositions);
                    Automaton           automaton = builder.ToAutomaton(fq.MaxEdits);
                    if (prefixLength > 0)
                    {
                        Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(termText, 0, prefixLength));
                        automaton = BasicOperations.Concatenate(prefix, automaton);
                    }
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(automaton, () => fq.ToString()));
                }
            }
            else if (query is TermRangeQuery)
            {
                TermRangeQuery tq = (TermRangeQuery)query;
                if (tq.Field.Equals(field, StringComparison.Ordinal))
                {
                    // this is *not* an automaton, but its very simple
                    list.Add(new SimpleCharacterRunAutomatonAnonymousHelper(BasicAutomata.MakeEmpty(), tq));
                }
            }
            return(list.ToArray(/*new CharacterRunAutomaton[list.size()]*/));
        }
Ejemplo n.º 3
0
        public virtual void TestFiniteVersusInfinite()
        {
            for (int i = 0; i < NumIterations; i++)
            {
                string reg = AutomatonTestUtil.RandomRegexp(Random());
                Automaton automaton = (new RegExp(reg, RegExp.NONE)).ToAutomaton();
                IList<BytesRef> matchedTerms = new List<BytesRef>();
                foreach (BytesRef t in Terms)
                {
                    if (BasicOperations.Run(automaton, t.Utf8ToString()))
                    {
                        matchedTerms.Add(t);
                    }
                }

                Automaton alternate = BasicAutomata.MakeStringUnion(matchedTerms);
                //System.out.println("match " + matchedTerms.Size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().Length);
                //AutomatonTestUtil.minimizeSimple(alternate);
                //System.out.println("minmize done");
                AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
                AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
                CheckHits.CheckEqual(a1, Searcher.Search(a1, 25).ScoreDocs, Searcher.Search(a2, 25).ScoreDocs);
            }
        }