Пример #1
0
        public static FA Set(int[] ranges, int accept = -1)
        {
            var result = new FA();
            var final  = new FA(true, accept);

            result.AddInpTrans(ranges, final);
            return(result);
        }
Пример #2
0
        public static FA Optional(FA expr, int accept = -1)
        {
            var result = expr.Clone();
            var f      = result.FirstAcceptingState;

            f.AcceptSymbol = accept;
            result.EpsilonTransitions.Add(f);
            return(result);
        }
Пример #3
0
        static FA[] _ToFAs(Ast[] asts, int match = 0)
        {
            var result = new FA[asts.Length];

            for (var i = 0; i < result.Length; i++)
            {
                result[i] = asts[i].ToFA(match);
            }
            return(result);
        }
Пример #4
0
        static void _Concat(FA lhs, FA rhs)
        {
            //Debug.Assert(lhs != rhs);
            var f = lhs.FirstAcceptingState;

            //Debug.Assert(null != rhs.FirstAcceptingState);
            f.IsAccepting = false;
            f.EpsilonTransitions.Add(rhs);
            //Debug.Assert(null!= lhs.FirstAcceptingState);
        }
Пример #5
0
        static FA[] _FromAsts(Ast[] asts, int match = 0)
        {
            var result = new FA[asts.Length];

            for (var i = 0; i < result.Length; i++)
            {
                result[i] = FromAst(asts[i], match);
            }
            return(result);
        }
Пример #6
0
        public static FA Literal(IEnumerable <int> @string, int accept = -1)
        {
            var result  = new FA();
            var current = result;

            foreach (var ch in @string)
            {
                current.IsAccepting = false;
                var fa = new FA(true, accept);
                current.AddInpTrans(new int[] { ch, ch }, fa);
                current = fa;
            }
            return(result);
        }
Пример #7
0
 static bool _TryForwardNeutral(FA fa, out FA result)
 {
     if (!fa.IsNeutral)
     {
         result = fa;
         return(false);
     }
     result = fa;
     foreach (var efa in fa.EpsilonTransitions)
     {
         result = efa;
         break;
     }
     return(fa != result);            // false if circular
 }
Пример #8
0
        static FA _ForwardNeutrals(FA fa)
        {
            if (null == fa)
            {
                throw new ArgumentNullException(nameof(fa));
            }
            var result = fa;

            while (_TryForwardNeutral(result, out result))
            {
                ;
            }


            return(result);
        }
Пример #9
0
        public static FA Concat(IEnumerable <FA> exprs, int accept = -1)
        {
            FA result = null, left = null, right = null;

            foreach (var val in exprs)
            {
                if (null == val)
                {
                    continue;
                }
                //Debug.Assert(null != val.FirstAcceptingState);
                var nval = val.Clone();
                //Debug.Assert(null != nval.FirstAcceptingState);
                if (null == left)
                {
                    if (null == result)
                    {
                        result = nval;
                    }
                    left = nval;
                    //Debug.Assert(null != left.FirstAcceptingState);
                    continue;
                }
                if (null == right)
                {
                    right = nval;
                }

                //Debug.Assert(null != left.FirstAcceptingState);
                nval = right.Clone();
                _Concat(left, nval);
                right = null;
                left  = nval;

                //Debug.Assert(null != left.FirstAcceptingState);
            }
            if (null != right)
            {
                right.FirstAcceptingState.AcceptSymbol = accept;
            }
            else
            {
                result.FirstAcceptingState.AcceptSymbol = accept;
            }
            return(result);
        }
Пример #10
0
        static void EmitAstInnerPart(FA fa, IDictionary <FA, int> rendered, IList <int[]> prog)
        {
            if (fa.IsFinal)
            {
                return;
            }
            int swfixup = prog.Count;
            var sw      = new List <int>();

            sw.Add(Switch);
            prog.Add(null);
            foreach (var trns in fa.InputTransitions)
            {
                var dst = -1;
                if (!rendered.TryGetValue(trns.Value, out dst))
                {
                    dst = prog.Count;
                    rendered.Add(trns.Value, dst);
                    EmitAstInnerPart(trns.Value, rendered, prog);
                }
                sw.Add(trns.Key.Key);
                sw.Add(trns.Key.Value);
                sw.Add(-1);
                sw.Add(dst);
            }
            if (0 < fa.InputTransitions.Count && 0 < fa.EpsilonTransitions.Count)
            {
                sw.Add(-2);
            }
            else if (0 == fa.InputTransitions.Count)
            {
                sw[0] = Jmp;
            }
            foreach (var efa in fa.EpsilonTransitions)
            {
                int dst;
                if (!rendered.TryGetValue(efa, out dst))
                {
                    dst = prog.Count;
                    rendered.Add(efa, dst);
                    EmitAstInnerPart(efa, rendered, prog);
                }
                sw.Add(dst);
            }
            prog[swfixup] = sw.ToArray();
        }
Пример #11
0
        public static FA Or(IEnumerable <FA> exprs, int accept = -1)
        {
            var result = new FA();
            var final  = new FA(true, accept);

            foreach (var fa in exprs)
            {
                if (null != fa)
                {
                    var nfa = fa.Clone();
                    result.EpsilonTransitions.Add(nfa);
                    var nffa = nfa.FirstAcceptingState;
                    nffa.IsAccepting = false;
                    nffa.EpsilonTransitions.Add(final);
                }
                else if (!result.EpsilonTransitions.Contains(final))
                {
                    result.EpsilonTransitions.Add(final);
                }
            }
            return(result);
        }
Пример #12
0
 public void AddInpTrans(int[] ranges, FA dst)
 {
     foreach (var trns in InputTransitions)
     {
         if (dst != trns.Key)
         {
             if (RangeUtility.Intersects(trns.Value, ranges))
             {
                 throw new ArgumentException("There already is a transition to a different state on at least part of the specified input ranges");
             }
         }
     }
     int[] currentRanges = null;
     if (InputTransitions.TryGetValue(dst, out currentRanges))
     {
         InputTransitions[dst] = RangeUtility.Merge(currentRanges, ranges);
     }
     else
     {
         InputTransitions.Add(dst, ranges);
     }
 }
Пример #13
0
        /// <summary>
        /// Compiles a single regular expression into a program segment
        /// </summary>
        /// <param name="input">The expression to compile</param>
        /// <param name="optimize">Indicates whether or not to optimize the code</param>
        /// <returns>A part of a program</returns>
        public static int[][] CompileRegexPart(LexContext input, bool optimize = true)
        {
            var ast  = Ast.Parse(input);
            var prog = new List <int[]>();
            FA  fa   = null;

            if (optimize)
            {
                try
                {
                    fa = ast.ToFA(0);
                }
                // we can't do lazy expressions
                catch (NotSupportedException) { }
                if (null != fa)
                {
                    Compiler.EmitFAPart(fa, prog);
                    prog = new List <int[]>(Compiler.RemoveDeadCode(prog));
                    return(prog.ToArray());
                }
            }
            Compiler.EmitAstInnerPart(ast, prog);
            return(prog.ToArray());
        }
Пример #14
0
        internal static int[][] EmitLexer(bool optimize, params Ast[] expressions)
        {
            var parts = new KeyValuePair <int, int[][]> [expressions.Length];

            for (var i = 0; i < expressions.Length; ++i)
            {
                var l  = new List <int[]>();
                FA  fa = null;
                if (optimize)
                {
                    try
                    {
                        fa = FA.FromAst(expressions[i]);
                    }
                    // we can't do lazy expressions
                    catch (NotSupportedException) { }
                }
                //fa = null;// for testing
                if (null != fa)
                {
                    EmitPart(fa, l);
                }
                else
                {
                    EmitPart(expressions[i], l);
                }
                parts[i] = new KeyValuePair <int, int[][]>(i, l.ToArray());
            }
            var result = EmitLexer(parts);

            if (optimize)
            {
                result = _RemoveDeadCode(result);
            }
            return(result);
        }
Пример #15
0
        public static FA Repeat(FA expr, int minOccurs = -1, int maxOccurs = -1, int accept = -1)
        {
            expr = expr.Clone();
            if (minOccurs > 0 && maxOccurs > 0 && minOccurs > maxOccurs)
            {
                throw new ArgumentOutOfRangeException(nameof(maxOccurs));
            }
            FA result;

            switch (minOccurs)
            {
            case -1:
            case 0:
                switch (maxOccurs)
                {
                case -1:
                case 0:
                    return(Repeat(Optional(expr, accept), 1, 0, accept));

                /*result = new FA();
                 * var final = new FA(true, accept);
                 * final.EpsilonTransitions.Add(result);
                 * foreach (var afa in expr.FillAcceptingStates())
                 * {
                 *      afa.IsAccepting = false;
                 *      afa.EpsilonTransitions.Add(final);
                 * }
                 * result.EpsilonTransitions.Add(expr);
                 * result.EpsilonTransitions.Add(final);
                 * //Debug.Assert(null != result.FirstAcceptingState);
                 * return result;*/
                case 1:
                    result = Optional(expr, accept);
                    //Debug.Assert(null != result.FirstAcceptingState);
                    return(result);

                default:
                    var l = new List <FA>();
                    expr = Optional(expr);
                    l.Add(expr);
                    for (int i = 1; i < maxOccurs; ++i)
                    {
                        l.Add(expr.Clone());
                    }
                    result = Concat(l, accept);
                    //Debug.Assert(null != result.FirstAcceptingState);
                    return(result);
                }

            case 1:
                switch (maxOccurs)
                {
                case -1:
                case 0:
                    result = new FA();
                    var final = new FA(true, accept);
                    final.EpsilonTransitions.Add(result);
                    foreach (var afa in expr.FillAcceptingStates())
                    {
                        afa.IsAccepting = false;
                        afa.EpsilonTransitions.Add(final);
                    }
                    result.EpsilonTransitions.Add(expr);
                    //Debug.Assert(null != result.FirstAcceptingState);
                    return(result);

                case 1:
                    //Debug.Assert(null != expr.FirstAcceptingState);
                    return(expr);

                default:
                    result = Concat(new FA[] { expr, Repeat(expr.Clone(), 0, maxOccurs - 1) }, accept);
                    //Debug.Assert(null != result.FirstAcceptingState);
                    return(result);
                }

            default:
                switch (maxOccurs)
                {
                case -1:
                case 0:
                    result = Concat(new FA[] { Repeat(expr, minOccurs, minOccurs, accept), Repeat(expr, 0, 0, accept) }, accept);
                    //Debug.Assert(null != result.FirstAcceptingState);
                    return(result);

                case 1:
                    throw new ArgumentOutOfRangeException(nameof(maxOccurs));

                default:
                    if (minOccurs == maxOccurs)
                    {
                        var l = new List <FA>();
                        l.Add(expr);
                        //Debug.Assert(null != expr.FirstAcceptingState);
                        for (int i = 1; i < minOccurs; ++i)
                        {
                            var e = expr.Clone();
                            //Debug.Assert(null != e.FirstAcceptingState);
                            l.Add(e);
                        }
                        result = Concat(l, accept);
                        //Debug.Assert(null != result.FirstAcceptingState);
                        return(result);
                    }
                    result = Concat(new FA[] { Repeat(expr.Clone(), minOccurs, minOccurs, accept), Repeat(Optional(expr.Clone()), maxOccurs - minOccurs, maxOccurs - minOccurs, accept) }, accept);
                    //Debug.Assert(null != result.FirstAcceptingState);
                    return(result);
                }
            }
            // should never get here
            throw new NotImplementedException();
        }
Пример #16
0
        // hate taking object but have no choice
        // expressions can be KeyValuePair<int,object> where object is Ast or string or int[][] and int is the symbol id
        internal static int[][] EmitLexer(bool optimize, params KeyValuePair <int, object>[] parts)
        {
            var fragments = new List <int[][]>(parts.Length);
            var ordered   = new List <object>();          // i wish C# had proper unions
            var i         = 0;

            if (optimize)
            {
                var workingFA = new List <FA>();
                while (i < parts.Length)
                {
                    while (i < parts.Length)
                    {
                        var ast = parts[i].Value as Ast;
                        if (null == ast)
                        {
                            var str = parts[i].Value as string;
                            if (null != str)
                            {
                                ast = Ast.FromLiteral(str);
                            }
                        }
                        if (null != ast)
                        {
                            FA fa = null;
                            try
                            {
                                fa = ast.ToFA(parts[i].Key);
                            }
                            // we can't do lazy expressions
                            catch (NotSupportedException) { }
                            if (null == fa)
                            {
                                if (0 < workingFA.Count)
                                {
                                    ordered.Add(workingFA);
                                    workingFA = new List <FA>();
                                }
                                break;
                            }
                            workingFA.Add(fa);
                        }
                        else
                        {
                            break;
                        }
                        ++i;
                    }
                    if (i == parts.Length)
                    {
                        if (0 < workingFA.Count)
                        {
                            ordered.Add(workingFA);
                            workingFA = new List <FA>();
                        }
                    }
                    while (i < parts.Length)
                    {
                        var ast = parts[i].Value as Ast;
                        if (null == ast)
                        {
                            var str = parts[i].Value as string;
                            if (null != str)
                            {
                                ast = Ast.FromLiteral(str);
                            }
                        }
                        if (null != ast)
                        {
                            FA fa;
                            try
                            {
                                fa = ast.ToFA(parts[i].Key);
                                workingFA.Add(fa);
                                ++i;
                                if (i == parts.Length)
                                {
                                    ordered.Add(workingFA);
                                }
                                break;
                            }
                            catch { }
                        }
                        ordered.Add(parts[i]);
                        ++i;
                    }
                }
                i = 0;
                for (int ic = ordered.Count; i < ic; ++i)
                {
                    var l = ordered[i] as List <FA>;
                    if (null != l)
                    {
                        var root = new FA();
                        for (int jc = l.Count, j = 0; j < jc; ++j)
                        {
                            root.EpsilonTransitions.Add(l[j]);
                        }
                        root = root.ToDfa();
                        root.TrimDuplicates();
                        ordered[i] = root;
                    }
                }
            }
            else
            {
                for (i = 0; i < parts.Length; ++i)
                {
                    ordered.Add(parts[i]);
                }
            }
            i = 0;
            for (var ic = ordered.Count; i < ic; ++i)
            {
                var l  = new List <int[]>();
                var fa = ordered[i] as FA;
                if (null != fa)
                {
                    EmitFAPart(fa, l);
                }
                else
                {
                    if (ordered[i] is KeyValuePair <int, object> )
                    {
                        var kvp = (KeyValuePair <int, object>)ordered[i];
                        var ast = kvp.Value as Ast;
                        if (null != ast)
                        {
                            EmitAstInnerPart(ast, l);
                            var save = new int[] { Save, 1 };
                            l.Add(save);
                            var match = new int[2];
                            match[0] = Match;
                            match[1] = kvp.Key;
                            l.Add(match);
                        }
                        else
                        {
                            var frag = kvp.Value as int[][];
                            Fixup(frag, l.Count);
                            l.AddRange(frag);
                            // TODO: add check for linker attribute (somehow?)
                            var save = new int[] { Save, 1 };
                            l.Add(save);
                            var match = new int[2];
                            match[0] = Match;
                            match[1] = kvp.Key;
                            l.Add(match);
                        }
                    }
                }
                fragments.Add(l.ToArray());
            }
            var result = _EmitLexer(fragments);

            if (optimize)
            {
                // remove dead code
                var code = new List <int[]>(RemoveDeadCode(result));
                var pc   = code[1];
                // remove initial jmp for error handling, if we can, replacing it with the switch's default
                // from the next line
                if (3 == pc.Length && Jmp == pc[0])
                {
                    if (2 == pc[1] && result.Length - 3 == pc[2] && Switch == code[2][0])
                    {
                        pc = code[2];
                        var idef = Array.IndexOf(pc, -2);
                        if (0 > idef)
                        {
                            var nsw = new int[pc.Length + 2];
                            Array.Copy(pc, 0, nsw, 0, pc.Length);
                            nsw[nsw.Length - 2] = -2;
                            nsw[nsw.Length - 1] = result.Length - 3;
                            code[2]             = nsw;
                            code.RemoveAt(1);
                            result = code.ToArray();
                            Fixup(result, -1);
                        }
                    }
                }
            }
            return(result);
        }
Пример #17
0
        internal static void EmitPart(FA gnfa, IList <int[]> prog)
        {
            // TODO: Make sure this is an actual GNFA and not just an NFA
            // NFA that is not a GNFA will not work
            gnfa = gnfa.ToGnfa();
            gnfa.TrimNeutrals();
            var rendered  = new Dictionary <FA, int>();
            var swFixups  = new Dictionary <FA, int>();
            var jmpFixups = new Dictionary <FA, int>();
            var l         = new List <FA>();

            gnfa.FillClosure(l);
            // move the accepting state to the end
            var fas  = gnfa.FirstAcceptingState;
            var afai = l.IndexOf(fas);

            l.RemoveAt(afai);
            l.Add(fas);
            for (int ic = l.Count, i = 0; i < ic; ++i)
            {
                var fa = l[i];
                rendered.Add(fa, prog.Count);
                if (!fa.IsFinal)
                {
                    int swfixup = prog.Count;
                    prog.Add(null);
                    swFixups.Add(fa, swfixup);
                }

                /*if(ic-1!=i)
                 * {
                 *      if (0==fa.EpsilonTransitions.Count)
                 *      {
                 *              jmpFixups.Add(fa, prog.Count);
                 *              prog.Add(null);
                 *      }
                 * }*/
            }
            for (int ic = l.Count, i = 0; i < ic; ++i)
            {
                var fa = l[i];
                if (!fa.IsFinal)
                {
                    var sw = new List <int>();
                    sw.Add(Switch);
                    int[] simple = null;
                    if (1 == fa.InputTransitions.Count && 0 == fa.EpsilonTransitions.Count)
                    {
                        foreach (var trns in fa.InputTransitions)
                        {
                            if (l.IndexOf(trns.Key) == i + 1)
                            {
                                simple = trns.Value;
                            }
                        }
                    }
                    if (null != simple)
                    {
                        if (2 < simple.Length || simple[0] != simple[1])
                        {
                            sw[0] = Set;
                            sw.AddRange(simple);
                        }
                        else
                        {
                            sw[0] = Char;
                            sw.Add(simple[0]);
                        }
                    }
                    else
                    {
                        foreach (var trns in fa.InputTransitions)
                        {
                            var dst = rendered[trns.Key];
                            sw.AddRange(trns.Value);
                            sw.Add(-1);
                            sw.Add(dst);
                        }
                        if (0 < fa.InputTransitions.Count && 0 < fa.EpsilonTransitions.Count)
                        {
                            sw.Add(-2);
                        }
                        else if (0 == fa.InputTransitions.Count)
                        {
                            sw[0] = Jmp;
                        }
                        foreach (var efa in fa.EpsilonTransitions)
                        {
                            var dst = rendered[efa];
                            sw.Add(dst);
                        }
                    }
                    prog[swFixups[fa]] = sw.ToArray();
                }

                var jfi = -1;
                if (jmpFixups.TryGetValue(fa, out jfi))
                {
                    var jmp = new int[2];
                    jmp[0]    = Jmp;
                    jmp[1]    = prog.Count;
                    prog[jfi] = jmp;
                }
            }
        }
Пример #18
0
        internal static void EmitFAPart(FA fa, IList <int[]> prog)
        {
            //fa = fa.ToDfa();
            //fa.TrimDuplicates();
            //fa = fa.ToGnfa();
            if (fa.IsNeutral)
            {
                foreach (var efa in fa.EpsilonTransitions)
                {
                    fa = efa;
                }
            }
            var acc = fa.FillAcceptingStates();

            foreach (var afa in acc)
            {
                if (!afa.IsFinal)
                {
                    var ffa = new FA(true, afa.AcceptSymbol);
                    afa.EpsilonTransitions.Add(ffa);
                    afa.IsAccepting = false;
                }
            }
            var rendered  = new Dictionary <FA, int>();
            var swFixups  = new Dictionary <FA, int>();
            var jmpFixups = new Dictionary <FA, int>();
            var l         = new List <FA>();

            fa.FillClosure(l);
            for (int ic = l.Count, i = 0; i < ic; ++i)
            {
                var reused = false;
                var cfa    = l[i];
                if (!cfa.IsFinal)
                {
                    rendered.Add(cfa, prog.Count);
                }
                else
                {
                    foreach (var r in rendered)
                    {
                        if (r.Key.IsFinal)
                        {
                            if (r.Key.IsAccepting && cfa.AcceptSymbol == r.Key.AcceptSymbol)
                            {
                                // we can reuse this
                                rendered.Add(cfa, r.Value);
                                reused = true;
                                break;
                            }
                        }
                    }
                    if (!reused)
                    {
                        rendered.Add(cfa, prog.Count);
                    }
                }

                if (!cfa.IsFinal)
                {
                    int swfixup = prog.Count;
                    prog.Add(null);                     // switch
                    swFixups.Add(cfa, swfixup);
                }
                else
                {
#if DEBUG
                    System.Diagnostics.Debug.Assert(cfa.IsAccepting);
#endif
                    if (!reused)
                    {
                        prog.Add(new int[] { Save, 1 });                         // save
                        prog.Add(new int[] { Match, cfa.AcceptSymbol });
                    }
                }
            }
            for (int ic = l.Count, i = 0; i < ic; ++i)
            {
                var cfa = l[i];

                if (!cfa.IsFinal)
                {
                    var sw = new List <int>();
                    sw.Add(Switch);

                    var rngGrps = cfa.FillInputTransitionRangesGroupedByState();
                    foreach (var grp in rngGrps)
                    {
                        var dst = rendered[grp.Key];
                        sw.AddRange(grp.Value);
                        sw.Add(-1);
                        sw.Add(dst);
                    }
                    if (1 < sw.Count)
                    {
                        if (0 < cfa.EpsilonTransitions.Count)
                        {
                            sw.Add(-2);
                            foreach (var efa in cfa.EpsilonTransitions)
                            {
                                var dst = rendered[efa];
                                sw.Add(dst);
                            }
                        }
                    }
                    else
                    {
                        // basically a NOP. Will get removed
                        sw[0] = Jmp;
                        sw.Add(swFixups[cfa] + 1);
                    }
                    prog[swFixups[cfa]] = sw.ToArray();
                }

                var jfi = -1;
                if (jmpFixups.TryGetValue(cfa, out jfi))
                {
                    var jmp = new int[2];
                    jmp[0]    = Jmp;
                    jmp[1]    = prog.Count;
                    prog[jfi] = jmp;
                }
            }
        }