public FA ToFA(int match) { Ast ast = this; if (ast.IsLazy) { throw new NotSupportedException("The AST node cannot be lazy"); } switch (ast.Kind) { case Ast.Alt: return(FA.Or(_ToFAs(ast.Exprs, match), match)); case Ast.Cat: if (1 == ast.Exprs.Length) { return(ast.Exprs[0].ToFA(match)); } return(FA.Concat(_ToFAs(ast.Exprs, match), match)); case Ast.Dot: return(FA.Set(new int[] { 0, 0xd7ff, 0xe000, 0x10ffff }, match)); case Ast.Lit: return(FA.Literal(new int[] { ast.Value }, match)); case Ast.NSet: var pairs = RangeUtility.ToPairs(ast.Ranges); RangeUtility.NormalizeRangeList(pairs); var pairl = new List <KeyValuePair <int, int> >(RangeUtility.NotRanges(pairs)); return(FA.Set(RangeUtility.FromPairs(pairl), match)); case Ast.NUCode: pairs = RangeUtility.ToPairs(CharacterClasses.UnicodeCategories[ast.Value]); RangeUtility.NormalizeRangeList(pairs); pairl = new List <KeyValuePair <int, int> >(RangeUtility.NotRanges(pairs)); return(FA.Set(RangeUtility.FromPairs(pairl), match)); case Ast.Opt: return(FA.Optional(ast.Exprs[0].ToFA(match), match)); case Ast.Plus: return(FA.Repeat(ast.Exprs[0].ToFA(match), 1, 0, match)); case Ast.Rep: return(FA.Repeat(ast.Exprs[0].ToFA(match), ast.Min, ast.Max, match)); case Ast.Set: return(FA.Set(ast.Ranges, match)); case Ast.Star: return(FA.Repeat(ast.Exprs[0].ToFA(match), 0, 0, match)); case Ast.UCode: return(FA.Set(CharacterClasses.UnicodeCategories[ast.Value], match)); default: throw new NotImplementedException(); } }
static int[] _GetFirsts(int[][] part, int index) { if (part.Length <= index) { return(new int[0]); } int idx; List <int> resl; int[] result; var pc = part[index]; switch (pc[0]) { case Char: return(new int[] { pc[1], pc[1] }); case Set: result = new int[pc.Length - 1]; Array.Copy(pc, 1, result, 0, result.Length); return(result); case NSet: result = new int[pc.Length - 1]; Array.Copy(pc, 1, result, 0, result.Length); return(RangeUtility.FromPairs(new List <KeyValuePair <int, int> >(RangeUtility.NotRanges(RangeUtility.ToPairs(result))))); case Any: return(new int[] { 0, 0x10ffff }); case UCode: result = CharacterClasses.UnicodeCategories[pc[1]]; return(result); case NUCode: result = CharacterClasses.UnicodeCategories[pc[1]]; Array.Copy(pc, 1, result, 0, result.Length); return(RangeUtility.FromPairs(new List <KeyValuePair <int, int> >(RangeUtility.NotRanges(RangeUtility.ToPairs(result))))); case Switch: resl = new List <int>(); idx = 1; while (pc.Length > idx && -2 != pc[idx]) { if (-1 == pc[idx]) { idx += 2; continue; } resl.Add(pc[idx]); } if (pc.Length > idx && -2 == pc[idx]) { ++idx; while (pc.Length > idx) { resl.AddRange(_GetFirsts(part, pc[idx])); ++idx; } } return(resl.ToArray()); case Jmp: resl = new List <int>(); idx = 1; while (pc.Length > idx) { resl.AddRange(_GetFirsts(part, pc[idx])); ++idx; } return(resl.ToArray()); case Match: return(new int[0]); case Save: return(_GetFirsts(part, index + 1)); } // should never get here throw new NotImplementedException(); }