public static FA Set(int[] ranges, int accept = -1) { var result = new FA(); var final = new FA(true, accept); result.AddInpTrans(ranges, final); return(result); }
public static FA Optional(FA expr, int accept = -1) { var result = expr.Clone(); var f = result.FirstAcceptingState; f.AcceptSymbol = accept; result.EpsilonTransitions.Add(f); return(result); }
static FA[] _ToFAs(Ast[] asts, int match = 0) { var result = new FA[asts.Length]; for (var i = 0; i < result.Length; i++) { result[i] = asts[i].ToFA(match); } return(result); }
static void _Concat(FA lhs, FA rhs) { //Debug.Assert(lhs != rhs); var f = lhs.FirstAcceptingState; //Debug.Assert(null != rhs.FirstAcceptingState); f.IsAccepting = false; f.EpsilonTransitions.Add(rhs); //Debug.Assert(null!= lhs.FirstAcceptingState); }
static FA[] _FromAsts(Ast[] asts, int match = 0) { var result = new FA[asts.Length]; for (var i = 0; i < result.Length; i++) { result[i] = FromAst(asts[i], match); } return(result); }
public static FA Literal(IEnumerable <int> @string, int accept = -1) { var result = new FA(); var current = result; foreach (var ch in @string) { current.IsAccepting = false; var fa = new FA(true, accept); current.AddInpTrans(new int[] { ch, ch }, fa); current = fa; } return(result); }
static bool _TryForwardNeutral(FA fa, out FA result) { if (!fa.IsNeutral) { result = fa; return(false); } result = fa; foreach (var efa in fa.EpsilonTransitions) { result = efa; break; } return(fa != result); // false if circular }
static FA _ForwardNeutrals(FA fa) { if (null == fa) { throw new ArgumentNullException(nameof(fa)); } var result = fa; while (_TryForwardNeutral(result, out result)) { ; } return(result); }
public static FA Concat(IEnumerable <FA> exprs, int accept = -1) { FA result = null, left = null, right = null; foreach (var val in exprs) { if (null == val) { continue; } //Debug.Assert(null != val.FirstAcceptingState); var nval = val.Clone(); //Debug.Assert(null != nval.FirstAcceptingState); if (null == left) { if (null == result) { result = nval; } left = nval; //Debug.Assert(null != left.FirstAcceptingState); continue; } if (null == right) { right = nval; } //Debug.Assert(null != left.FirstAcceptingState); nval = right.Clone(); _Concat(left, nval); right = null; left = nval; //Debug.Assert(null != left.FirstAcceptingState); } if (null != right) { right.FirstAcceptingState.AcceptSymbol = accept; } else { result.FirstAcceptingState.AcceptSymbol = accept; } return(result); }
static void EmitAstInnerPart(FA fa, IDictionary <FA, int> rendered, IList <int[]> prog) { if (fa.IsFinal) { return; } int swfixup = prog.Count; var sw = new List <int>(); sw.Add(Switch); prog.Add(null); foreach (var trns in fa.InputTransitions) { var dst = -1; if (!rendered.TryGetValue(trns.Value, out dst)) { dst = prog.Count; rendered.Add(trns.Value, dst); EmitAstInnerPart(trns.Value, rendered, prog); } sw.Add(trns.Key.Key); sw.Add(trns.Key.Value); sw.Add(-1); sw.Add(dst); } if (0 < fa.InputTransitions.Count && 0 < fa.EpsilonTransitions.Count) { sw.Add(-2); } else if (0 == fa.InputTransitions.Count) { sw[0] = Jmp; } foreach (var efa in fa.EpsilonTransitions) { int dst; if (!rendered.TryGetValue(efa, out dst)) { dst = prog.Count; rendered.Add(efa, dst); EmitAstInnerPart(efa, rendered, prog); } sw.Add(dst); } prog[swfixup] = sw.ToArray(); }
public static FA Or(IEnumerable <FA> exprs, int accept = -1) { var result = new FA(); var final = new FA(true, accept); foreach (var fa in exprs) { if (null != fa) { var nfa = fa.Clone(); result.EpsilonTransitions.Add(nfa); var nffa = nfa.FirstAcceptingState; nffa.IsAccepting = false; nffa.EpsilonTransitions.Add(final); } else if (!result.EpsilonTransitions.Contains(final)) { result.EpsilonTransitions.Add(final); } } return(result); }
public void AddInpTrans(int[] ranges, FA dst) { foreach (var trns in InputTransitions) { if (dst != trns.Key) { if (RangeUtility.Intersects(trns.Value, ranges)) { throw new ArgumentException("There already is a transition to a different state on at least part of the specified input ranges"); } } } int[] currentRanges = null; if (InputTransitions.TryGetValue(dst, out currentRanges)) { InputTransitions[dst] = RangeUtility.Merge(currentRanges, ranges); } else { InputTransitions.Add(dst, ranges); } }
/// <summary> /// Compiles a single regular expression into a program segment /// </summary> /// <param name="input">The expression to compile</param> /// <param name="optimize">Indicates whether or not to optimize the code</param> /// <returns>A part of a program</returns> public static int[][] CompileRegexPart(LexContext input, bool optimize = true) { var ast = Ast.Parse(input); var prog = new List <int[]>(); FA fa = null; if (optimize) { try { fa = ast.ToFA(0); } // we can't do lazy expressions catch (NotSupportedException) { } if (null != fa) { Compiler.EmitFAPart(fa, prog); prog = new List <int[]>(Compiler.RemoveDeadCode(prog)); return(prog.ToArray()); } } Compiler.EmitAstInnerPart(ast, prog); return(prog.ToArray()); }
internal static int[][] EmitLexer(bool optimize, params Ast[] expressions) { var parts = new KeyValuePair <int, int[][]> [expressions.Length]; for (var i = 0; i < expressions.Length; ++i) { var l = new List <int[]>(); FA fa = null; if (optimize) { try { fa = FA.FromAst(expressions[i]); } // we can't do lazy expressions catch (NotSupportedException) { } } //fa = null;// for testing if (null != fa) { EmitPart(fa, l); } else { EmitPart(expressions[i], l); } parts[i] = new KeyValuePair <int, int[][]>(i, l.ToArray()); } var result = EmitLexer(parts); if (optimize) { result = _RemoveDeadCode(result); } return(result); }
public static FA Repeat(FA expr, int minOccurs = -1, int maxOccurs = -1, int accept = -1) { expr = expr.Clone(); if (minOccurs > 0 && maxOccurs > 0 && minOccurs > maxOccurs) { throw new ArgumentOutOfRangeException(nameof(maxOccurs)); } FA result; switch (minOccurs) { case -1: case 0: switch (maxOccurs) { case -1: case 0: return(Repeat(Optional(expr, accept), 1, 0, accept)); /*result = new FA(); * var final = new FA(true, accept); * final.EpsilonTransitions.Add(result); * foreach (var afa in expr.FillAcceptingStates()) * { * afa.IsAccepting = false; * afa.EpsilonTransitions.Add(final); * } * result.EpsilonTransitions.Add(expr); * result.EpsilonTransitions.Add(final); * //Debug.Assert(null != result.FirstAcceptingState); * return result;*/ case 1: result = Optional(expr, accept); //Debug.Assert(null != result.FirstAcceptingState); return(result); default: var l = new List <FA>(); expr = Optional(expr); l.Add(expr); for (int i = 1; i < maxOccurs; ++i) { l.Add(expr.Clone()); } result = Concat(l, accept); //Debug.Assert(null != result.FirstAcceptingState); return(result); } case 1: switch (maxOccurs) { case -1: case 0: result = new FA(); var final = new FA(true, accept); final.EpsilonTransitions.Add(result); foreach (var afa in expr.FillAcceptingStates()) { afa.IsAccepting = false; afa.EpsilonTransitions.Add(final); } result.EpsilonTransitions.Add(expr); //Debug.Assert(null != result.FirstAcceptingState); return(result); case 1: //Debug.Assert(null != expr.FirstAcceptingState); return(expr); default: result = Concat(new FA[] { expr, Repeat(expr.Clone(), 0, maxOccurs - 1) }, accept); //Debug.Assert(null != result.FirstAcceptingState); return(result); } default: switch (maxOccurs) { case -1: case 0: result = Concat(new FA[] { Repeat(expr, minOccurs, minOccurs, accept), Repeat(expr, 0, 0, accept) }, accept); //Debug.Assert(null != result.FirstAcceptingState); return(result); case 1: throw new ArgumentOutOfRangeException(nameof(maxOccurs)); default: if (minOccurs == maxOccurs) { var l = new List <FA>(); l.Add(expr); //Debug.Assert(null != expr.FirstAcceptingState); for (int i = 1; i < minOccurs; ++i) { var e = expr.Clone(); //Debug.Assert(null != e.FirstAcceptingState); l.Add(e); } result = Concat(l, accept); //Debug.Assert(null != result.FirstAcceptingState); return(result); } result = Concat(new FA[] { Repeat(expr.Clone(), minOccurs, minOccurs, accept), Repeat(Optional(expr.Clone()), maxOccurs - minOccurs, maxOccurs - minOccurs, accept) }, accept); //Debug.Assert(null != result.FirstAcceptingState); return(result); } } // should never get here throw new NotImplementedException(); }
// hate taking object but have no choice // expressions can be KeyValuePair<int,object> where object is Ast or string or int[][] and int is the symbol id internal static int[][] EmitLexer(bool optimize, params KeyValuePair <int, object>[] parts) { var fragments = new List <int[][]>(parts.Length); var ordered = new List <object>(); // i wish C# had proper unions var i = 0; if (optimize) { var workingFA = new List <FA>(); while (i < parts.Length) { while (i < parts.Length) { var ast = parts[i].Value as Ast; if (null == ast) { var str = parts[i].Value as string; if (null != str) { ast = Ast.FromLiteral(str); } } if (null != ast) { FA fa = null; try { fa = ast.ToFA(parts[i].Key); } // we can't do lazy expressions catch (NotSupportedException) { } if (null == fa) { if (0 < workingFA.Count) { ordered.Add(workingFA); workingFA = new List <FA>(); } break; } workingFA.Add(fa); } else { break; } ++i; } if (i == parts.Length) { if (0 < workingFA.Count) { ordered.Add(workingFA); workingFA = new List <FA>(); } } while (i < parts.Length) { var ast = parts[i].Value as Ast; if (null == ast) { var str = parts[i].Value as string; if (null != str) { ast = Ast.FromLiteral(str); } } if (null != ast) { FA fa; try { fa = ast.ToFA(parts[i].Key); workingFA.Add(fa); ++i; if (i == parts.Length) { ordered.Add(workingFA); } break; } catch { } } ordered.Add(parts[i]); ++i; } } i = 0; for (int ic = ordered.Count; i < ic; ++i) { var l = ordered[i] as List <FA>; if (null != l) { var root = new FA(); for (int jc = l.Count, j = 0; j < jc; ++j) { root.EpsilonTransitions.Add(l[j]); } root = root.ToDfa(); root.TrimDuplicates(); ordered[i] = root; } } } else { for (i = 0; i < parts.Length; ++i) { ordered.Add(parts[i]); } } i = 0; for (var ic = ordered.Count; i < ic; ++i) { var l = new List <int[]>(); var fa = ordered[i] as FA; if (null != fa) { EmitFAPart(fa, l); } else { if (ordered[i] is KeyValuePair <int, object> ) { var kvp = (KeyValuePair <int, object>)ordered[i]; var ast = kvp.Value as Ast; if (null != ast) { EmitAstInnerPart(ast, l); var save = new int[] { Save, 1 }; l.Add(save); var match = new int[2]; match[0] = Match; match[1] = kvp.Key; l.Add(match); } else { var frag = kvp.Value as int[][]; Fixup(frag, l.Count); l.AddRange(frag); // TODO: add check for linker attribute (somehow?) var save = new int[] { Save, 1 }; l.Add(save); var match = new int[2]; match[0] = Match; match[1] = kvp.Key; l.Add(match); } } } fragments.Add(l.ToArray()); } var result = _EmitLexer(fragments); if (optimize) { // remove dead code var code = new List <int[]>(RemoveDeadCode(result)); var pc = code[1]; // remove initial jmp for error handling, if we can, replacing it with the switch's default // from the next line if (3 == pc.Length && Jmp == pc[0]) { if (2 == pc[1] && result.Length - 3 == pc[2] && Switch == code[2][0]) { pc = code[2]; var idef = Array.IndexOf(pc, -2); if (0 > idef) { var nsw = new int[pc.Length + 2]; Array.Copy(pc, 0, nsw, 0, pc.Length); nsw[nsw.Length - 2] = -2; nsw[nsw.Length - 1] = result.Length - 3; code[2] = nsw; code.RemoveAt(1); result = code.ToArray(); Fixup(result, -1); } } } } return(result); }
internal static void EmitPart(FA gnfa, IList <int[]> prog) { // TODO: Make sure this is an actual GNFA and not just an NFA // NFA that is not a GNFA will not work gnfa = gnfa.ToGnfa(); gnfa.TrimNeutrals(); var rendered = new Dictionary <FA, int>(); var swFixups = new Dictionary <FA, int>(); var jmpFixups = new Dictionary <FA, int>(); var l = new List <FA>(); gnfa.FillClosure(l); // move the accepting state to the end var fas = gnfa.FirstAcceptingState; var afai = l.IndexOf(fas); l.RemoveAt(afai); l.Add(fas); for (int ic = l.Count, i = 0; i < ic; ++i) { var fa = l[i]; rendered.Add(fa, prog.Count); if (!fa.IsFinal) { int swfixup = prog.Count; prog.Add(null); swFixups.Add(fa, swfixup); } /*if(ic-1!=i) * { * if (0==fa.EpsilonTransitions.Count) * { * jmpFixups.Add(fa, prog.Count); * prog.Add(null); * } * }*/ } for (int ic = l.Count, i = 0; i < ic; ++i) { var fa = l[i]; if (!fa.IsFinal) { var sw = new List <int>(); sw.Add(Switch); int[] simple = null; if (1 == fa.InputTransitions.Count && 0 == fa.EpsilonTransitions.Count) { foreach (var trns in fa.InputTransitions) { if (l.IndexOf(trns.Key) == i + 1) { simple = trns.Value; } } } if (null != simple) { if (2 < simple.Length || simple[0] != simple[1]) { sw[0] = Set; sw.AddRange(simple); } else { sw[0] = Char; sw.Add(simple[0]); } } else { foreach (var trns in fa.InputTransitions) { var dst = rendered[trns.Key]; sw.AddRange(trns.Value); sw.Add(-1); sw.Add(dst); } if (0 < fa.InputTransitions.Count && 0 < fa.EpsilonTransitions.Count) { sw.Add(-2); } else if (0 == fa.InputTransitions.Count) { sw[0] = Jmp; } foreach (var efa in fa.EpsilonTransitions) { var dst = rendered[efa]; sw.Add(dst); } } prog[swFixups[fa]] = sw.ToArray(); } var jfi = -1; if (jmpFixups.TryGetValue(fa, out jfi)) { var jmp = new int[2]; jmp[0] = Jmp; jmp[1] = prog.Count; prog[jfi] = jmp; } } }
internal static void EmitFAPart(FA fa, IList <int[]> prog) { //fa = fa.ToDfa(); //fa.TrimDuplicates(); //fa = fa.ToGnfa(); if (fa.IsNeutral) { foreach (var efa in fa.EpsilonTransitions) { fa = efa; } } var acc = fa.FillAcceptingStates(); foreach (var afa in acc) { if (!afa.IsFinal) { var ffa = new FA(true, afa.AcceptSymbol); afa.EpsilonTransitions.Add(ffa); afa.IsAccepting = false; } } var rendered = new Dictionary <FA, int>(); var swFixups = new Dictionary <FA, int>(); var jmpFixups = new Dictionary <FA, int>(); var l = new List <FA>(); fa.FillClosure(l); for (int ic = l.Count, i = 0; i < ic; ++i) { var reused = false; var cfa = l[i]; if (!cfa.IsFinal) { rendered.Add(cfa, prog.Count); } else { foreach (var r in rendered) { if (r.Key.IsFinal) { if (r.Key.IsAccepting && cfa.AcceptSymbol == r.Key.AcceptSymbol) { // we can reuse this rendered.Add(cfa, r.Value); reused = true; break; } } } if (!reused) { rendered.Add(cfa, prog.Count); } } if (!cfa.IsFinal) { int swfixup = prog.Count; prog.Add(null); // switch swFixups.Add(cfa, swfixup); } else { #if DEBUG System.Diagnostics.Debug.Assert(cfa.IsAccepting); #endif if (!reused) { prog.Add(new int[] { Save, 1 }); // save prog.Add(new int[] { Match, cfa.AcceptSymbol }); } } } for (int ic = l.Count, i = 0; i < ic; ++i) { var cfa = l[i]; if (!cfa.IsFinal) { var sw = new List <int>(); sw.Add(Switch); var rngGrps = cfa.FillInputTransitionRangesGroupedByState(); foreach (var grp in rngGrps) { var dst = rendered[grp.Key]; sw.AddRange(grp.Value); sw.Add(-1); sw.Add(dst); } if (1 < sw.Count) { if (0 < cfa.EpsilonTransitions.Count) { sw.Add(-2); foreach (var efa in cfa.EpsilonTransitions) { var dst = rendered[efa]; sw.Add(dst); } } } else { // basically a NOP. Will get removed sw[0] = Jmp; sw.Add(swFixups[cfa] + 1); } prog[swFixups[cfa]] = sw.ToArray(); } var jfi = -1; if (jmpFixups.TryGetValue(cfa, out jfi)) { var jmp = new int[2]; jmp[0] = Jmp; jmp[1] = prog.Count; prog[jfi] = jmp; } } }