Ejemplo n.º 1
0
        public virtual void TestEquals()
        {
            AutomatonQuery a1 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("foobar"));
            // reference to a1
            AutomatonQuery a2 = a1;
            // same as a1 (accepts the same language, same term)
            AutomatonQuery a3 = new AutomatonQuery(NewTerm("foobar"), BasicOperations.Concatenate(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")));
            // different than a1 (same term, but different language)
            AutomatonQuery a4 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("different"));
            // different than a1 (different term, same language)
            AutomatonQuery a5 = new AutomatonQuery(NewTerm("blah"), BasicAutomata.MakeString("foobar"));

            Assert.AreEqual(a1.GetHashCode(), a2.GetHashCode());
            Assert.AreEqual(a1, a2);

            Assert.AreEqual(a1.GetHashCode(), a3.GetHashCode());
            Assert.AreEqual(a1, a3);

            // different class
            AutomatonQuery w1 = new WildcardQuery(NewTerm("foobar"));
            // different class
            AutomatonQuery w2 = new RegexpQuery(NewTerm("foobar"));

            Assert.IsFalse(a1.Equals(w1));
            Assert.IsFalse(a1.Equals(w2));
            Assert.IsFalse(w1.Equals(w2));
            Assert.IsFalse(a1.Equals(a4));
            Assert.IsFalse(a1.Equals(a5));
            Assert.IsFalse(a1.Equals(null));
        }
Ejemplo n.º 2
0
        public virtual void TestSynOverHole()
        {
            TokenStream ts       = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("X", 0, 2), Token("b", 2, 1) });
            Automaton   actual   = (new TokenStreamToAutomaton()).ToAutomaton(ts);
            Automaton   a1       = BasicOperations.Union(Join(S2a("a"), SEP_A, HOLE_A), BasicAutomata.MakeString("X"));
            Automaton   expected = BasicOperations.Concatenate(a1, Join(SEP_A, S2a("b")));

            //toDot(actual);
            Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
        }
Ejemplo n.º 3
0
        internal Automaton ToLevenshteinAutomata(Automaton automaton)
        {
            var @ref = SpecialOperations.GetFiniteStrings(automaton, -1);

            Automaton[] subs = new Automaton[@ref.Count];
            int         upto = 0;

            foreach (IntsRef path in @ref)
            {
                if (path.Length <= nonFuzzyPrefix || path.Length < minFuzzyLength)
                {
                    subs[upto] = BasicAutomata.MakeString(path.Ints, path.Offset, path.Length);
                    upto++;
                }
                else
                {
                    Automaton prefix = BasicAutomata.MakeString(path.Ints, path.Offset, nonFuzzyPrefix);
                    int[]     ints   = new int[path.Length - nonFuzzyPrefix];
                    Array.Copy(path.Ints, path.Offset + nonFuzzyPrefix, ints, 0, ints.Length);
                    // TODO: maybe add alphaMin to LevenshteinAutomata,
                    // and pass 1 instead of 0?  We probably don't want
                    // to allow the trailing dedup bytes to be
                    // edited... but then 0 byte is "in general" allowed
                    // on input (but not in UTF8).
                    LevenshteinAutomata lev          = new LevenshteinAutomata(ints, unicodeAware ? char.MAX_CODE_POINT : 255, transpositions);
                    Automaton           levAutomaton = lev.ToAutomaton(maxEdits);
                    Automaton           combined     = BasicOperations.Concatenate(Arrays.AsList(prefix, levAutomaton));
                    combined.Deterministic = true; // its like the special case in concatenate itself, except we cloneExpanded already
                    subs[upto]             = combined;
                    upto++;
                }
            }

            if (subs.Length == 0)
            {
                // automaton is empty, there is no accepted paths through it
                return(BasicAutomata.MakeEmpty()); // matches nothing
            }
            else if (subs.Length == 1)
            {
                // no synonyms or anything: just a single path through the tokenstream
                return(subs[0]);
            }
            else
            {
                // multiple paths: this is really scary! is it slow?
                // maybe we should not do this and throw UOE?
                Automaton a = BasicOperations.Union(Arrays.AsList(subs));
                // TODO: we could call toLevenshteinAutomata() before det?
                // this only happens if you have multiple paths anyway (e.g. synonyms)
                BasicOperations.Determinize(a);

                return(a);
            }
        }
Ejemplo n.º 4
0
        private Automaton Join(params string[] strings)
        {
            IList <Automaton> @as = new List <Automaton>();

            foreach (string s in strings)
            {
                @as.Add(BasicAutomata.MakeString(s));
                @as.Add(SEP_A);
            }
            @as.RemoveAt(@as.Count - 1);
            return(BasicOperations.Concatenate(@as));
        }
Ejemplo n.º 5
0
        public virtual void TestRewritePrefix()
        {
            Automaton pfx = BasicAutomata.MakeString("do");

            pfx.ExpandSingleton(); // expand singleton representation for testing
            Automaton      prefixAutomaton = BasicOperations.Concatenate(pfx, BasicAutomata.MakeAnyString());
            AutomatonQuery aq    = new AutomatonQuery(NewTerm("bogus"), prefixAutomaton);
            Terms          terms = MultiFields.GetTerms(Searcher.IndexReader, FN);

            Assert.IsTrue(aq.GetTermsEnum(terms) is PrefixTermsEnum);
            Assert.AreEqual(3, AutomatonQueryNrHits(aq));
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Convert Lucene wildcard syntax into an automaton.
        /// <para/>
        /// @lucene.internal
        /// </summary>
        public static Automaton ToAutomaton(Term wildcardquery)
        {
            IList <Automaton> automata = new JCG.List <Automaton>();

            string wildcardText = wildcardquery.Text;

            for (int i = 0; i < wildcardText.Length;)
            {
                int c      = Character.CodePointAt(wildcardText, i);
                int length = Character.CharCount(c);
                switch (c)
                {
                case WILDCARD_STRING:
                    automata.Add(BasicAutomata.MakeAnyString());
                    break;

                case WILDCARD_CHAR:
                    automata.Add(BasicAutomata.MakeAnyChar());
                    break;

                case WILDCARD_ESCAPE:
                    // add the next codepoint instead, if it exists
                    if (i + length < wildcardText.Length)
                    {
                        int nextChar = Character.CodePointAt(wildcardText, i + length);
                        length += Character.CharCount(nextChar);
                        automata.Add(BasicAutomata.MakeChar(nextChar));
                        break;
                    }     // else fallthru, lenient parsing with a trailing \
                    goto default;

                default:
                    automata.Add(BasicAutomata.MakeChar(c));
                    break;
                }
                i += length;
            }

            return(BasicOperations.Concatenate(automata));
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Initialize levenshtein DFAs up to maxDistance, if possible </summary>
        private IList <CompiledAutomaton> InitAutomata(int maxDistance)
        {
            IList <CompiledAutomaton> runAutomata = dfaAtt.Automata;

            //System.out.println("cached automata size: " + runAutomata.size());
            if (runAutomata.Count <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)
            {
                LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.NewString(m_termText, m_realPrefixLength, m_termText.Length - m_realPrefixLength), transpositions);

                for (int i = runAutomata.Count; i <= maxDistance; i++)
                {
                    Automaton a = builder.ToAutomaton(i);
                    //System.out.println("compute automaton n=" + i);
                    // constant prefix
                    if (m_realPrefixLength > 0)
                    {
                        Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(m_termText, 0, m_realPrefixLength));
                        a = BasicOperations.Concatenate(prefix, a);
                    }
                    runAutomata.Add(new CompiledAutomaton(a, true, false));
                }
            }
            return(runAutomata);
        }
Ejemplo n.º 8
0
 private Automaton Join(params Automaton[] @as)
 {
     return(BasicOperations.Concatenate(@as));
 }
Ejemplo n.º 9
0
        /// <summary>
        /// Extracts all <see cref="MultiTermQuery"/>s for <paramref name="field"/>, and returns equivalent
        /// automata that will match terms.
        /// </summary>
        internal static CharacterRunAutomaton[] ExtractAutomata(Query query, string field)
        {
            List <CharacterRunAutomaton> list = new List <CharacterRunAutomaton>();

            if (query is BooleanQuery)
            {
                BooleanClause[] clauses = ((BooleanQuery)query).GetClauses();
                foreach (BooleanClause clause in clauses)
                {
                    if (!clause.IsProhibited)
                    {
                        list.AddAll(Arrays.AsList(ExtractAutomata(clause.Query, field)));
                    }
                }
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (Query sub in ((DisjunctionMaxQuery)query).Disjuncts)
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanOrQuery)
            {
                foreach (Query sub in ((SpanOrQuery)query).GetClauses())
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanNearQuery)
            {
                foreach (Query sub in ((SpanNearQuery)query).GetClauses())
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanNotQuery)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((SpanNotQuery)query).Include, field)));
            }
            else if (query is SpanPositionCheckQuery)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((SpanPositionCheckQuery)query).Match, field)));
            }
            else if (query is ISpanMultiTermQueryWrapper)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((ISpanMultiTermQueryWrapper)query).WrappedQuery, field)));
            }
            else if (query is AutomatonQuery)
            {
                AutomatonQuery aq = (AutomatonQuery)query;
                if (aq.Field.Equals(field, StringComparison.Ordinal))
                {
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(aq.Automaton, () => aq.ToString()));
                }
            }
            else if (query is PrefixQuery)
            {
                PrefixQuery pq     = (PrefixQuery)query;
                Term        prefix = pq.Prefix;
                if (prefix.Field.Equals(field, StringComparison.Ordinal))
                {
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(
                                 BasicOperations.Concatenate(BasicAutomata.MakeString(prefix.Text()), BasicAutomata.MakeAnyString()),
                                 () => pq.ToString()));
                }
            }
            else if (query is FuzzyQuery)
            {
                FuzzyQuery fq = (FuzzyQuery)query;
                if (fq.Field.Equals(field, StringComparison.Ordinal))
                {
                    string utf16    = fq.Term.Text();
                    int[]  termText = new int[utf16.CodePointCount(0, utf16.Length)];
                    for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp))
                    {
                        termText[j++] = cp = utf16.CodePointAt(i);
                    }
                    int    termLength             = termText.Length;
                    int    prefixLength           = Math.Min(fq.PrefixLength, termLength);
                    string suffix                 = UnicodeUtil.NewString(termText, prefixLength, termText.Length - prefixLength);
                    LevenshteinAutomata builder   = new LevenshteinAutomata(suffix, fq.Transpositions);
                    Automaton           automaton = builder.ToAutomaton(fq.MaxEdits);
                    if (prefixLength > 0)
                    {
                        Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(termText, 0, prefixLength));
                        automaton = BasicOperations.Concatenate(prefix, automaton);
                    }
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(automaton, () => fq.ToString()));
                }
            }
            else if (query is TermRangeQuery)
            {
                TermRangeQuery tq = (TermRangeQuery)query;
                if (tq.Field.Equals(field, StringComparison.Ordinal))
                {
                    // this is *not* an automaton, but its very simple
                    list.Add(new SimpleCharacterRunAutomatonAnonymousHelper(BasicAutomata.MakeEmpty(), tq));
                }
            }
            return(list.ToArray(/*new CharacterRunAutomaton[list.size()]*/));
        }