Ejemplo n.º 1
0
        /// <summary>
        /// Returns an automaton that accepts the union of the languages of the given
        /// automata.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Union(ICollection <Automaton> l)
        {
            JCG.HashSet <int> ids = new JCG.HashSet <int>();
            foreach (Automaton a in l)
            {
                ids.Add(a.GetHashCode());
            }
            bool  has_aliases = ids.Count != l.Count;
            State s           = new State();

            foreach (Automaton b in l)
            {
                if (BasicOperations.IsEmpty(b))
                {
                    continue;
                }
                Automaton bb = b;
                if (has_aliases)
                {
                    bb = bb.CloneExpanded();
                }
                else
                {
                    bb = bb.CloneExpandedIfRequired();
                }
                s.AddEpsilon(bb.initial);
            }
            Automaton a_ = new Automaton
            {
                initial       = s,
                deterministic = false
            };

            //a.clearHashCode();
            a_.ClearNumberedStates();
            a_.CheckMinimizeAlways();
            return(a_);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Returns a (deterministic) automaton that accepts the intersection of the
 /// language of <paramref name="a1"/> and the complement of the language of
 /// <paramref name="a2"/>. As a side-effect, the automata may be determinized, if not
 /// already deterministic.
 /// <para/>
 /// Complexity: quadratic in number of states (if already deterministic).
 /// </summary>
 public static Automaton Minus(Automaton a1, Automaton a2)
 {
     if (BasicOperations.IsEmpty(a1) || a1 == a2)
     {
         return(BasicAutomata.MakeEmpty());
     }
     if (BasicOperations.IsEmpty(a2))
     {
         return(a1.CloneIfRequired());
     }
     if (a1.IsSingleton)
     {
         if (BasicOperations.Run(a2, a1.singleton))
         {
             return(BasicAutomata.MakeEmpty());
         }
         else
         {
             return(a1.CloneIfRequired());
         }
     }
     return(Intersection(a1, a2.Complement()));
 }
Ejemplo n.º 3
0
        private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider)
        {
            IList <Automaton> list;
            Automaton         a = null;

            switch (kind)
            {
            case Kind.REGEXP_UNION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
                a = BasicOperations.Union(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CONCATENATION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                a = BasicOperations.Concatenate(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_INTERSECTION:
                a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider));
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_OPTIONAL:
                a = exp1.ToAutomaton(automata, automaton_provider).Optional();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MIN:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MINMAX:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_COMPLEMENT:
                a = exp1.ToAutomaton(automata, automaton_provider).Complement();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CHAR:
                a = BasicAutomata.MakeChar(c);
                break;

            case Kind.REGEXP_CHAR_RANGE:
                a = BasicAutomata.MakeCharRange(from, to);
                break;

            case Kind.REGEXP_ANYCHAR:
                a = BasicAutomata.MakeAnyChar();
                break;

            case Kind.REGEXP_EMPTY:
                a = BasicAutomata.MakeEmpty();
                break;

            case Kind.REGEXP_STRING:
                a = BasicAutomata.MakeString(s);
                break;

            case Kind.REGEXP_ANYSTRING:
                a = BasicAutomata.MakeAnyString();
                break;

            case Kind.REGEXP_AUTOMATON:
                Automaton aa = null;
                if (automata != null)
                {
                    aa = automata[s];
                }
                if (aa == null && automaton_provider != null)
                {
                    try
                    {
                        aa = automaton_provider.GetAutomaton(s);
                    }
                    catch (System.IO.IOException e)
                    {
                        throw new System.ArgumentException(e.ToString(), e);
                    }
                }
                if (aa == null)
                {
                    throw new System.ArgumentException("'" + s + "' not found");
                }
                a = (Automaton)aa.Clone();     // always clone here (ignore allow_mutate)
                break;

            case Kind.REGEXP_INTERVAL:
                a = BasicAutomata.MakeInterval(min, max, digits);
                break;
            }
            return(a);
        }
Ejemplo n.º 4
0
        public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify)
        {
            if (simplify)
            {
                // Test whether the automaton is a "simple" form and
                // if so, don't create a runAutomaton.  Note that on a
                // large automaton these tests could be costly:
                if (BasicOperations.IsEmpty(automaton))
                {
                    // matches nothing
                    Type              = AUTOMATON_TYPE.NONE;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else if (BasicOperations.IsTotal(automaton))
                {
                    // matches all possible strings
                    Type              = AUTOMATON_TYPE.ALL;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else
                {
                    string commonPrefix;
                    string singleton;
                    if (automaton.Singleton == null)
                    {
                        commonPrefix = SpecialOperations.GetCommonPrefix(automaton);
                        if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix)))
                        {
                            singleton = commonPrefix;
                        }
                        else
                        {
                            singleton = null;
                        }
                    }
                    else
                    {
                        commonPrefix = null;
                        singleton    = automaton.Singleton;
                    }

                    if (singleton != null)
                    {
                        // matches a fixed string in singleton or expanded
                        // representation
                        Type              = AUTOMATON_TYPE.SINGLE;
                        Term              = new BytesRef(singleton);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                    else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString())))
                    {
                        // matches a constant prefix
                        Type              = AUTOMATON_TYPE.PREFIX;
                        Term              = new BytesRef(commonPrefix);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                }
            }

            Type = AUTOMATON_TYPE.NORMAL;
            Term = null;
            if (finite == null)
            {
                this.Finite = SpecialOperations.IsFinite(automaton);
            }
            else
            {
                this.Finite = finite;
            }
            Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton);

            if (this.Finite == true)
            {
                CommonSuffixRef = null;
            }
            else
            {
                CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8);
            }
            RunAutomaton      = new ByteRunAutomaton(utf8, true);
            sortedTransitions = utf8.GetSortedTransitions();
        }
Ejemplo n.º 5
0
 /// <summary>
 /// See <see cref="BasicOperations.Determinize(Automaton)"/>.
 /// </summary>
 public virtual void Determinize()
 {
     BasicOperations.Determinize(this);
 }
Ejemplo n.º 6
0
 /// <summary>
 /// See <see cref="BasicOperations.Union(ICollection{Automaton})"/>.
 /// </summary>
 public static Automaton Union(ICollection <Automaton> l)
 {
     return(BasicOperations.Union(l));
 }
Ejemplo n.º 7
0
 /// <summary>
 /// See <see cref="BasicOperations.Concatenate(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Concatenate(Automaton a)
 {
     return(BasicOperations.Concatenate(this, a));
 }
Ejemplo n.º 8
0
 /// <summary>
 /// See <see cref="BasicOperations.Minus(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Minus(Automaton a)
 {
     return(BasicOperations.Minus(this, a));
 }
Ejemplo n.º 9
0
 /// <summary>
 /// See <see cref="BasicOperations.Intersection(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Intersection(Automaton a)
 {
     return(BasicOperations.Intersection(this, a));
 }
Ejemplo n.º 10
0
 /// <summary>
 /// See <see cref="BasicOperations.Repeat(Automaton, int, int)"/>.
 /// </summary>
 public virtual Automaton Repeat(int min, int max)
 {
     return(BasicOperations.Repeat(this, min, max));
 }
Ejemplo n.º 11
0
 /// <summary>
 /// See <see cref="BasicOperations.Complement(Automaton)"/>.
 /// </summary>
 public virtual Automaton Complement()
 {
     return(BasicOperations.Complement(this));
 }
Ejemplo n.º 12
0
 /// <summary>
 /// See <see cref="BasicOperations.Repeat(Automaton)"/>.
 /// </summary>
 public virtual Automaton Repeat()
 {
     return(BasicOperations.Repeat(this));
 }
Ejemplo n.º 13
0
 /// <summary>
 /// See <see cref="BasicOperations.Optional(Automaton)"/>.
 /// </summary>
 public virtual Automaton Optional()
 {
     return(BasicOperations.Optional(this));
 }
Ejemplo n.º 14
0
 /// <summary>
 /// See <see cref="BasicOperations.Concatenate(IList{Automaton})"/>.
 /// </summary>
 public static Automaton Concatenate(IList <Automaton> l)
 {
     return(BasicOperations.Concatenate(l));
 }
Ejemplo n.º 15
0
        /// <summary>
        /// Returns an automaton that accepts the intersection of the languages of the
        /// given automata. Never modifies the input automata languages.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static Automaton Intersection(Automaton a1, Automaton a2)
        {
            if (a1.IsSingleton)
            {
                if (BasicOperations.Run(a2, a1.singleton))
                {
                    return(a1.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a2.IsSingleton)
            {
                if (BasicOperations.Run(a1, a2.singleton))
                {
                    return(a2.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a1 == a2)
            {
                return(a1.CloneIfRequired());
            }
            Transition[][]    transitions1 = a1.GetSortedTransitions();
            Transition[][]    transitions2 = a2.GetSortedTransitions();
            Automaton         c            = new Automaton();
            Queue <StatePair> worklist     = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList
            Dictionary <StatePair, StatePair> newstates = new Dictionary <StatePair, StatePair>();
            StatePair p = new StatePair(c.initial, a1.initial, a2.initial);

            worklist.Enqueue(p);
            newstates[p] = p;
            while (worklist.Count > 0)
            {
                p          = worklist.Dequeue();
                p.s.accept = p.s1.accept && p.s2.accept;
                Transition[] t1 = transitions1[p.s1.number];
                Transition[] t2 = transitions2[p.s2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].max >= t1[n1].min)
                        {
                            StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                            if (!newstates.TryGetValue(q, out StatePair r) || r is null)
                            {
                                q.s = new State();
                                worklist.Enqueue(q);
                                newstates[q] = q;
                                r            = q;
                            }
                            int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min;
                            int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max;
                            p.s.AddTransition(new Transition(min, max, r.s));
                        }
                    }
                }
            }
            c.deterministic = a1.deterministic && a2.deterministic;
            c.RemoveDeadTransitions();
            c.CheckMinimizeAlways();
            return(c);
        }
Ejemplo n.º 16
0
 /// <summary>
 /// See <see cref="BasicOperations.SubsetOf(Automaton, Automaton)"/>.
 /// </summary>
 public virtual bool SubsetOf(Automaton a)
 {
     return(BasicOperations.SubsetOf(this, a));
 }
Ejemplo n.º 17
0
        /// <summary>
        /// Returns true if the language of <paramref name="a1"/> is a subset of the language
        /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
        /// not already marked as deterministic.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static bool SubsetOf(Automaton a1, Automaton a2)
        {
            if (a1 == a2)
            {
                return(true);
            }
            if (a1.IsSingleton)
            {
                if (a2.IsSingleton)
                {
                    return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal));
                }
                return(BasicOperations.Run(a2, a1.singleton));
            }
            a2.Determinize();
            Transition[][]    transitions1 = a1.GetSortedTransitions();
            Transition[][]    transitions2 = a2.GetSortedTransitions();
            Queue <StatePair> worklist     = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList

            JCG.HashSet <StatePair> visited = new JCG.HashSet <StatePair>();
            StatePair p = new StatePair(a1.initial, a2.initial);

            worklist.Enqueue(p);
            visited.Add(p);
            while (worklist.Count > 0)
            {
                p = worklist.Dequeue();
                if (p.s1.accept && !p.s2.accept)
                {
                    return(false);
                }
                Transition[] t1 = transitions1[p.s1.number];
                Transition[] t2 = transitions2[p.s2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    int min1 = t1[n1].min, max1 = t1[n1].max;

                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].min > min1)
                        {
                            return(false);
                        }
                        if (t2[n2].max < Character.MaxCodePoint)
                        {
                            min1 = t2[n2].max + 1;
                        }
                        else
                        {
                            min1 = Character.MaxCodePoint;
                            max1 = Character.MinCodePoint;
                        }
                        StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                        if (!visited.Contains(q))
                        {
                            worklist.Enqueue(q);
                            visited.Add(q);
                        }
                    }
                    if (min1 <= max1)
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }
Ejemplo n.º 18
0
 /// <summary>
 /// See <see cref="BasicOperations.Union(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Union(Automaton a)
 {
     return(BasicOperations.Union(this, a));
 }
Ejemplo n.º 19
0
        /// <summary>
        /// Returns an automaton that accepts the concatenation of the languages of the
        /// given automata.
        /// <para/>
        /// Complexity: linear in total number of states.
        /// </summary>
        public static Automaton Concatenate(IList <Automaton> l)
        {
            if (l.Count == 0)
            {
                return(BasicAutomata.MakeEmptyString());
            }
            bool all_singleton = true;

            foreach (Automaton a in l)
            {
                if (!a.IsSingleton)
                {
                    all_singleton = false;
                    break;
                }
            }
            if (all_singleton)
            {
                StringBuilder b = new StringBuilder();
                foreach (Automaton a in l)
                {
                    b.Append(a.singleton);
                }
                return(BasicAutomata.MakeString(b.ToString()));
            }
            else
            {
                foreach (Automaton a in l)
                {
                    if (BasicOperations.IsEmpty(a))
                    {
                        return(BasicAutomata.MakeEmpty());
                    }
                }
                JCG.HashSet <int> ids = new JCG.HashSet <int>();
                foreach (Automaton a in l)
                {
                    ids.Add(a.GetHashCode());
                }
                bool      has_aliases = ids.Count != l.Count;
                Automaton b           = l[0];
                if (has_aliases)
                {
                    b = b.CloneExpanded();
                }
                else
                {
                    b = b.CloneExpandedIfRequired();
                }
                ISet <State> ac    = b.GetAcceptStates();
                bool         first = true;
                foreach (Automaton a in l)
                {
                    if (first)
                    {
                        first = false;
                    }
                    else
                    {
                        if (a.IsEmptyString)
                        {
                            continue;
                        }
                        Automaton aa = a;
                        if (has_aliases)
                        {
                            aa = aa.CloneExpanded();
                        }
                        else
                        {
                            aa = aa.CloneExpandedIfRequired();
                        }
                        ISet <State> ns = aa.GetAcceptStates();
                        foreach (State s in ac)
                        {
                            s.accept = false;
                            s.AddEpsilon(aa.initial);
                            if (s.accept)
                            {
                                ns.Add(s);
                            }
                        }
                        ac = ns;
                    }
                }
                b.deterministic = false;
                //b.clearHashCode();
                b.ClearNumberedStates();
                b.CheckMinimizeAlways();
                return(b);
            }
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Returns true if the language of <paramref name="a1"/> is a subset of the language
        /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
        /// not already marked as deterministic.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static bool SubsetOf(Automaton a1, Automaton a2)
        {
            if (a1 == a2)
            {
                return(true);
            }
            if (a1.IsSingleton)
            {
                if (a2.IsSingleton)
                {
                    return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal));
                }
                return(BasicOperations.Run(a2, a1.singleton));
            }
            a2.Determinize();
            Transition[][]         transitions1 = a1.GetSortedTransitions();
            Transition[][]         transitions2 = a2.GetSortedTransitions();
            LinkedList <StatePair> worklist     = new LinkedList <StatePair>();
            HashSet <StatePair>    visited      = new HashSet <StatePair>();
            StatePair p = new StatePair(a1.initial, a2.initial);

            worklist.AddLast(p);
            visited.Add(p);
            while (worklist.Count > 0)
            {
                p = worklist.First.Value;
                worklist.Remove(p);
                if (p.S1.accept && !p.S2.accept)
                {
                    return(false);
                }
                Transition[] t1 = transitions1[p.S1.number];
                Transition[] t2 = transitions2[p.S2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    int min1 = t1[n1].min, max1 = t1[n1].max;

                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].min > min1)
                        {
                            return(false);
                        }
                        if (t2[n2].max < Character.MAX_CODE_POINT)
                        {
                            min1 = t2[n2].max + 1;
                        }
                        else
                        {
                            min1 = Character.MAX_CODE_POINT;
                            max1 = Character.MIN_CODE_POINT;
                        }
                        StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                        if (!visited.Contains(q))
                        {
                            worklist.AddLast(q);
                            visited.Add(q);
                        }
                    }
                    if (min1 <= max1)
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }