示例#1
0
        internal Automaton ToLevenshteinAutomata(Automaton automaton)
        {
            var @ref = SpecialOperations.GetFiniteStrings(automaton, -1);

            Automaton[] subs = new Automaton[@ref.Count];
            int         upto = 0;

            foreach (IntsRef path in @ref)
            {
                if (path.Length <= nonFuzzyPrefix || path.Length < minFuzzyLength)
                {
                    subs[upto] = BasicAutomata.MakeString(path.Ints, path.Offset, path.Length);
                    upto++;
                }
                else
                {
                    Automaton prefix = BasicAutomata.MakeString(path.Ints, path.Offset, nonFuzzyPrefix);
                    int[]     ints   = new int[path.Length - nonFuzzyPrefix];
                    Array.Copy(path.Ints, path.Offset + nonFuzzyPrefix, ints, 0, ints.Length);
                    // TODO: maybe add alphaMin to LevenshteinAutomata,
                    // and pass 1 instead of 0?  We probably don't want
                    // to allow the trailing dedup bytes to be
                    // edited... but then 0 byte is "in general" allowed
                    // on input (but not in UTF8).
                    LevenshteinAutomata lev          = new LevenshteinAutomata(ints, unicodeAware ? char.MAX_CODE_POINT : 255, transpositions);
                    Automaton           levAutomaton = lev.ToAutomaton(maxEdits);
                    Automaton           combined     = BasicOperations.Concatenate(Arrays.AsList(prefix, levAutomaton));
                    combined.Deterministic = true; // its like the special case in concatenate itself, except we cloneExpanded already
                    subs[upto]             = combined;
                    upto++;
                }
            }

            if (subs.Length == 0)
            {
                // automaton is empty, there is no accepted paths through it
                return(BasicAutomata.MakeEmpty()); // matches nothing
            }
            else if (subs.Length == 1)
            {
                // no synonyms or anything: just a single path through the tokenstream
                return(subs[0]);
            }
            else
            {
                // multiple paths: this is really scary! is it slow?
                // maybe we should not do this and throw UOE?
                Automaton a = BasicOperations.Union(Arrays.AsList(subs));
                // TODO: we could call toLevenshteinAutomata() before det?
                // this only happens if you have multiple paths anyway (e.g. synonyms)
                BasicOperations.Determinize(a);

                return(a);
            }
        }
示例#2
0
        /// <summary>
        /// Initialize levenshtein DFAs up to maxDistance, if possible </summary>
        private IList <CompiledAutomaton> InitAutomata(int maxDistance)
        {
            IList <CompiledAutomaton> runAutomata = dfaAtt.Automata;

            //System.out.println("cached automata size: " + runAutomata.size());
            if (runAutomata.Count <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)
            {
                LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.NewString(m_termText, m_realPrefixLength, m_termText.Length - m_realPrefixLength), transpositions);

                for (int i = runAutomata.Count; i <= maxDistance; i++)
                {
                    Automaton a = builder.ToAutomaton(i);
                    //System.out.println("compute automaton n=" + i);
                    // constant prefix
                    if (m_realPrefixLength > 0)
                    {
                        Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(m_termText, 0, m_realPrefixLength));
                        a = BasicOperations.Concatenate(prefix, a);
                    }
                    runAutomata.Add(new CompiledAutomaton(a, true, false));
                }
            }
            return(runAutomata);
        }
示例#3
0
        /// <summary>
        /// initialize levenshtein DFAs up to maxDistance, if possible </summary>
        private IList<CompiledAutomaton> InitAutomata(int maxDistance)
        {
            IList<CompiledAutomaton> runAutomata = DfaAtt.Automata();
            //System.out.println("cached automata size: " + runAutomata.size());
            if (runAutomata.Count <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)
            {
                LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.NewString(TermText, RealPrefixLength, TermText.Length - RealPrefixLength), Transpositions);

                for (int i = runAutomata.Count; i <= maxDistance; i++)
                {
                    Automaton a = builder.ToAutomaton(i);
                    //System.out.println("compute automaton n=" + i);
                    // constant prefix
                    if (RealPrefixLength > 0)
                    {
                        Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(TermText, 0, RealPrefixLength));
                        a = BasicOperations.Concatenate(prefix, a);
                    }
                    runAutomata.Add(new CompiledAutomaton(a, true, false));
                }
            }
            return runAutomata;
        }
示例#4
0
        /// <summary>
        /// Extracts all <see cref="MultiTermQuery"/>s for <paramref name="field"/>, and returns equivalent
        /// automata that will match terms.
        /// </summary>
        internal static CharacterRunAutomaton[] ExtractAutomata(Query query, string field)
        {
            List <CharacterRunAutomaton> list = new List <CharacterRunAutomaton>();

            if (query is BooleanQuery)
            {
                BooleanClause[] clauses = ((BooleanQuery)query).GetClauses();
                foreach (BooleanClause clause in clauses)
                {
                    if (!clause.IsProhibited)
                    {
                        list.AddAll(Arrays.AsList(ExtractAutomata(clause.Query, field)));
                    }
                }
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (Query sub in ((DisjunctionMaxQuery)query).Disjuncts)
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanOrQuery)
            {
                foreach (Query sub in ((SpanOrQuery)query).GetClauses())
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanNearQuery)
            {
                foreach (Query sub in ((SpanNearQuery)query).GetClauses())
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanNotQuery)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((SpanNotQuery)query).Include, field)));
            }
            else if (query is SpanPositionCheckQuery)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((SpanPositionCheckQuery)query).Match, field)));
            }
            else if (query is ISpanMultiTermQueryWrapper)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((ISpanMultiTermQueryWrapper)query).WrappedQuery, field)));
            }
            else if (query is AutomatonQuery)
            {
                AutomatonQuery aq = (AutomatonQuery)query;
                if (aq.Field.Equals(field, StringComparison.Ordinal))
                {
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(aq.Automaton, () => aq.ToString()));
                }
            }
            else if (query is PrefixQuery)
            {
                PrefixQuery pq     = (PrefixQuery)query;
                Term        prefix = pq.Prefix;
                if (prefix.Field.Equals(field, StringComparison.Ordinal))
                {
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(
                                 BasicOperations.Concatenate(BasicAutomata.MakeString(prefix.Text()), BasicAutomata.MakeAnyString()),
                                 () => pq.ToString()));
                }
            }
            else if (query is FuzzyQuery)
            {
                FuzzyQuery fq = (FuzzyQuery)query;
                if (fq.Field.Equals(field, StringComparison.Ordinal))
                {
                    string utf16    = fq.Term.Text();
                    int[]  termText = new int[utf16.CodePointCount(0, utf16.Length)];
                    for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp))
                    {
                        termText[j++] = cp = utf16.CodePointAt(i);
                    }
                    int    termLength             = termText.Length;
                    int    prefixLength           = Math.Min(fq.PrefixLength, termLength);
                    string suffix                 = UnicodeUtil.NewString(termText, prefixLength, termText.Length - prefixLength);
                    LevenshteinAutomata builder   = new LevenshteinAutomata(suffix, fq.Transpositions);
                    Automaton           automaton = builder.ToAutomaton(fq.MaxEdits);
                    if (prefixLength > 0)
                    {
                        Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(termText, 0, prefixLength));
                        automaton = BasicOperations.Concatenate(prefix, automaton);
                    }
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(automaton, () => fq.ToString()));
                }
            }
            else if (query is TermRangeQuery)
            {
                TermRangeQuery tq = (TermRangeQuery)query;
                if (tq.Field.Equals(field, StringComparison.Ordinal))
                {
                    // this is *not* an automaton, but its very simple
                    list.Add(new SimpleCharacterRunAutomatonAnonymousHelper(BasicAutomata.MakeEmpty(), tq));
                }
            }
            return(list.ToArray(/*new CharacterRunAutomaton[list.size()]*/));
        }