Beispiel #1
0
        /// <summary>
        /// Returns an automaton that accepts the concatenation of the languages of the
        /// given automata.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Concatenate(Automaton a1, Automaton a2)
        {
            if (a1.IsSingleton && a2.IsSingleton)
            {
                return(BasicAutomata.MakeString(a1.singleton + a2.singleton));
            }
            if (IsEmpty(a1) || IsEmpty(a2))
            {
                return(BasicAutomata.MakeEmpty());
            }
            // adding epsilon transitions with the NFA concatenation algorithm
            // in this case always produces a resulting DFA, preventing expensive
            // redundant determinize() calls for this common case.
            bool deterministic = a1.IsSingleton && a2.IsDeterministic;

            if (a1 == a2)
            {
                a1 = a1.CloneExpanded();
                a2 = a2.CloneExpanded();
            }
            else
            {
                a1 = a1.CloneExpandedIfRequired();
                a2 = a2.CloneExpandedIfRequired();
            }
            foreach (State s in a1.GetAcceptStates())
            {
                s.accept = false;
                s.AddEpsilon(a2.initial);
            }
            a1.deterministic = deterministic;
            //a1.clearHashCode();
            a1.ClearNumberedStates();
            a1.CheckMinimizeAlways();
            return(a1);
        }
Beispiel #2
0
        /// <summary>
        /// Returns an automaton that accepts the concatenation of the languages of the
        /// given automata.
        /// <para/>
        /// Complexity: linear in total number of states.
        /// </summary>
        public static Automaton Concatenate(IList <Automaton> l)
        {
            if (l.Count == 0)
            {
                return(BasicAutomata.MakeEmptyString());
            }
            bool all_singleton = true;

            foreach (Automaton a in l)
            {
                if (!a.IsSingleton)
                {
                    all_singleton = false;
                    break;
                }
            }
            if (all_singleton)
            {
                StringBuilder b = new StringBuilder();
                foreach (Automaton a in l)
                {
                    b.Append(a.singleton);
                }
                return(BasicAutomata.MakeString(b.ToString()));
            }
            else
            {
                foreach (Automaton a in l)
                {
                    if (BasicOperations.IsEmpty(a))
                    {
                        return(BasicAutomata.MakeEmpty());
                    }
                }
                JCG.HashSet <int> ids = new JCG.HashSet <int>();
                foreach (Automaton a in l)
                {
                    ids.Add(a.GetHashCode());
                }
                bool      has_aliases = ids.Count != l.Count;
                Automaton b           = l[0];
                if (has_aliases)
                {
                    b = b.CloneExpanded();
                }
                else
                {
                    b = b.CloneExpandedIfRequired();
                }
                ISet <State> ac    = b.GetAcceptStates();
                bool         first = true;
                foreach (Automaton a in l)
                {
                    if (first)
                    {
                        first = false;
                    }
                    else
                    {
                        if (a.IsEmptyString)
                        {
                            continue;
                        }
                        Automaton aa = a;
                        if (has_aliases)
                        {
                            aa = aa.CloneExpanded();
                        }
                        else
                        {
                            aa = aa.CloneExpandedIfRequired();
                        }
                        ISet <State> ns = aa.GetAcceptStates();
                        foreach (State s in ac)
                        {
                            s.accept = false;
                            s.AddEpsilon(aa.initial);
                            if (s.accept)
                            {
                                ns.Add(s);
                            }
                        }
                        ac = ns;
                    }
                }
                b.deterministic = false;
                //b.clearHashCode();
                b.ClearNumberedStates();
                b.CheckMinimizeAlways();
                return(b);
            }
        }
Beispiel #3
0
        /// <summary>
        /// Compute a DFA that accepts all strings within an edit distance of <paramref name="n"/>.
        /// <para>
        /// All automata have the following properties:
        /// <list type="bullet">
        ///     <item><description>They are deterministic (DFA).</description></item>
        ///     <item><description>There are no transitions to dead states.</description></item>
        ///     <item><description>They are not minimal (some transitions could be combined).</description></item>
        /// </list>
        /// </para>
        /// </summary>
        public virtual Automaton ToAutomaton(int n)
        {
            if (n == 0)
            {
                return(BasicAutomata.MakeString(word, 0, word.Length));
            }

            if (n >= descriptions.Length)
            {
                return(null);
            }

            int range = 2 * n + 1;
            ParametricDescription description = descriptions[n];

            // the number of states is based on the length of the word and n
            State[] states = new State[description.Count];
            // create all states, and mark as accept states if appropriate
            for (int i = 0; i < states.Length; i++)
            {
                states[i]        = new State();
                states[i].number = i;
                states[i].Accept = description.IsAccept(i);
            }
            // create transitions from state to state
            for (int k = 0; k < states.Length; k++)
            {
                int xpos = description.GetPosition(k);
                if (xpos < 0)
                {
                    continue;
                }
                int end = xpos + Math.Min(word.Length - xpos, range);

                for (int x = 0; x < alphabet.Length; x++)
                {
                    int ch = alphabet[x];
                    // get the characteristic vector at this position wrt ch
                    int cvec = GetVector(ch, xpos, end);
                    int dest = description.Transition(k, xpos, cvec);
                    if (dest >= 0)
                    {
                        states[k].AddTransition(new Transition(ch, states[dest]));
                    }
                }
                // add transitions for all other chars in unicode
                // by definition, their characteristic vectors are always 0,
                // because they do not exist in the input string.
                int dest_ = description.Transition(k, xpos, 0); // by definition
                if (dest_ >= 0)
                {
                    for (int r = 0; r < numRanges; r++)
                    {
                        states[k].AddTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest_]));
                    }
                }
            }

            Automaton a = new Automaton(states[0]);

            a.IsDeterministic = true;
            // we create some useless unconnected states, and its a net-win overall to remove these,
            // as well as to combine any adjacent transitions (it makes later algorithms more efficient).
            // so, while we could set our numberedStates here, its actually best not to, and instead to
            // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
            //a.setNumberedStates(states);
            a.Reduce();
            // we need not trim transitions to dead states, as they are not created.
            //a.restoreInvariant();
            return(a);
        }
Beispiel #4
0
        private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider)
        {
            IList <Automaton> list;
            Automaton         a = null;

            switch (kind)
            {
            case Kind.REGEXP_UNION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
                a = BasicOperations.Union(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CONCATENATION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                a = BasicOperations.Concatenate(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_INTERSECTION:
                a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider));
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_OPTIONAL:
                a = exp1.ToAutomaton(automata, automaton_provider).Optional();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MIN:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MINMAX:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_COMPLEMENT:
                a = exp1.ToAutomaton(automata, automaton_provider).Complement();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CHAR:
                a = BasicAutomata.MakeChar(c);
                break;

            case Kind.REGEXP_CHAR_RANGE:
                a = BasicAutomata.MakeCharRange(from, to);
                break;

            case Kind.REGEXP_ANYCHAR:
                a = BasicAutomata.MakeAnyChar();
                break;

            case Kind.REGEXP_EMPTY:
                a = BasicAutomata.MakeEmpty();
                break;

            case Kind.REGEXP_STRING:
                a = BasicAutomata.MakeString(s);
                break;

            case Kind.REGEXP_ANYSTRING:
                a = BasicAutomata.MakeAnyString();
                break;

            case Kind.REGEXP_AUTOMATON:
                Automaton aa = null;
                if (automata != null)
                {
                    aa = automata[s];
                }
                if (aa == null && automaton_provider != null)
                {
                    try
                    {
                        aa = automaton_provider.GetAutomaton(s);
                    }
                    catch (System.IO.IOException e)
                    {
                        throw new System.ArgumentException(e.ToString(), e);
                    }
                }
                if (aa == null)
                {
                    throw new System.ArgumentException("'" + s + "' not found");
                }
                a = (Automaton)aa.Clone();     // always clone here (ignore allow_mutate)
                break;

            case Kind.REGEXP_INTERVAL:
                a = BasicAutomata.MakeInterval(min, max, digits);
                break;
            }
            return(a);
        }
Beispiel #5
0
        public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify)
        {
            if (simplify)
            {
                // Test whether the automaton is a "simple" form and
                // if so, don't create a runAutomaton.  Note that on a
                // large automaton these tests could be costly:
                if (BasicOperations.IsEmpty(automaton))
                {
                    // matches nothing
                    Type              = AUTOMATON_TYPE.NONE;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else if (BasicOperations.IsTotal(automaton))
                {
                    // matches all possible strings
                    Type              = AUTOMATON_TYPE.ALL;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else
                {
                    string commonPrefix;
                    string singleton;
                    if (automaton.Singleton == null)
                    {
                        commonPrefix = SpecialOperations.GetCommonPrefix(automaton);
                        if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix)))
                        {
                            singleton = commonPrefix;
                        }
                        else
                        {
                            singleton = null;
                        }
                    }
                    else
                    {
                        commonPrefix = null;
                        singleton    = automaton.Singleton;
                    }

                    if (singleton != null)
                    {
                        // matches a fixed string in singleton or expanded
                        // representation
                        Type              = AUTOMATON_TYPE.SINGLE;
                        Term              = new BytesRef(singleton);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                    else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString())))
                    {
                        // matches a constant prefix
                        Type              = AUTOMATON_TYPE.PREFIX;
                        Term              = new BytesRef(commonPrefix);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                }
            }

            Type = AUTOMATON_TYPE.NORMAL;
            Term = null;
            if (finite == null)
            {
                this.Finite = SpecialOperations.IsFinite(automaton);
            }
            else
            {
                this.Finite = finite;
            }
            Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton);

            if (this.Finite == true)
            {
                CommonSuffixRef = null;
            }
            else
            {
                CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8);
            }
            RunAutomaton      = new ByteRunAutomaton(utf8, true);
            sortedTransitions = utf8.GetSortedTransitions();
        }