public void ParseTest(string expression) { var provider = new UnicodeCharSetProvider(UnicodeRanges.FromUnicodeName); var result = CharsetParser.Parse(expression).Compute(provider); this.output.WriteLine(result.ToString()); }
private int Test <TLetter>(string pattern, bool caseSensitive, IUnicodeMapper <TLetter> mapper, TLetter?eof, RangeSet <TLetter> validRanges, out AlphabetBuilder <TLetter> builder) where TLetter : struct, IEquatable <TLetter>, IComparable <TLetter> { this.output.WriteLine("Input regex (Case Sensitive: {0}, EOF letter: {1}):", caseSensitive, eof.HasValue); this.output.WriteLine(pattern); var provider = new UnicodeCharSetProvider(); var regex = RegexParser.Parse(pattern, null).ToInvariant(mapper, provider, caseSensitive); this.output.WriteLine(""); this.output.WriteLine("{0} regex:", typeof(TLetter).Name); this.output.WriteLine(regex.ToString()); builder = new AlphabetBuilder <TLetter>(regex, eof, validRanges); this.output.WriteLine(""); this.output.WriteLine("Generated letter mapping:"); foreach (var pair in builder.AlphabetById) { this.output.WriteLine("{0}: {1} ({2})", pair.Key, pair.Value, pair.Value.Count); } this.output.WriteLine(""); this.output.WriteLine("Letter Regex:"); this.output.WriteLine(builder.Expression.ToString()); this.output.WriteLine(""); this.output.WriteLine("Mapping function pseudocode:"); var inSwitch = false; foreach (var grouping in builder .AlphabetById .SelectMany(p => p.Value.Select(r => new KeyValuePair <Range <TLetter>, LetterId>(r, p.Key))) .GroupBy(p => new { Range = (!typeof(TLetter).IsPrimitive) || p.Key.Expand().Skip(2).Any(), LetterId = p.Value }, p => p.Key) .OrderBy(p => p.Key.Range) .ThenBy(p => p.Key.LetterId)) { if (grouping.Key.Range) { if (inSwitch) { this.output.WriteLine("}"); inSwitch = false; } this.output.WriteLine("if ({0}) return {1}", string.Join(" ||" + Environment.NewLine + " ", grouping.OrderBy(r => r.From).Select(r => r.From.CompareTo(r.To) == 0 ? $"(v == '{r.From}')" : $"(v >= '{r.From}' && v <= '{r.To}')")), grouping.Key.LetterId); } else { if (!inSwitch) { this.output.WriteLine("switch (v) {"); inSwitch = true; } this.output.WriteLine("{0}" + Environment.NewLine + " return {1}", string.Join(Environment.NewLine, grouping.SelectMany(g => g.Expand()).OrderBy(r => r).Select(r => $" case '{r}':")), grouping.Key.LetterId); } } if (inSwitch) { this.output.WriteLine("}"); } return(builder.AlphabetById.Count); }
private CharsetGrammar() { var provider = new UnicodeCharSetProvider(); var mapper = new UnicodeUtf16Mapper(false, false); var rx = RegexLexer.CreateRx(mapper); var rxWhitespace = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymWhitespace), SymWhitespace, 0); var rxCharset = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymCharset), SymCharset, 0); var rxRegexCharset = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymRegexCharset), SymRegexCharset, 0); var rxUnion = new RxAccept <char>(RegexMatchSet.FromChars('|', '+').ToInvariant(mapper, provider, true), SymUnion, 0); var rxSubtract = new RxAccept <char>(RegexMatchSet.FromChars('-').ToInvariant(mapper, provider, true), SymSubtract, 0); var rxIntersect = new RxAccept <char>(RegexMatchSet.FromChars('&').ToInvariant(mapper, provider, true), SymIntersect, 0); var rxDifference = new RxAccept <char>(RegexMatchSet.FromChars('^').ToInvariant(mapper, provider, true), SymDifference, 0); var rxNegate = new RxAccept <char>(RegexMatchSet.FromChars('~').ToInvariant(mapper, provider, true), SymNegate, 0); var rxParensOpen = new RxAccept <char>(RegexMatchSet.FromChars('(').ToInvariant(mapper, provider, true), SymParensOpen, 0); var rxParensClose = new RxAccept <char>(RegexMatchSet.FromChars(')').ToInvariant(mapper, provider, true), SymParensClose, 0); var alpha = new AlphabetBuilder <char>( new RxAlternation <char>(rxWhitespace, new RxAlternation <char>(rxCharset, new RxAlternation <char>(rxRegexCharset, new RxAlternation <char>(rxUnion, new RxAlternation <char>(rxSubtract, new RxAlternation <char>(rxIntersect, new RxAlternation <char>(rxDifference, new RxAlternation <char>(rxNegate, new RxAlternation <char>(rxParensOpen, rxParensClose))))))))), Utf16Chars.EOF, Utf16Chars.ValidBmp); var nfa = NfaBuilder <LetterId> .Build(alpha.Expression); var dfa = DfaBuilder <LetterId> .Build(nfa, LetterId.Eof); if (dfa.StartState.Id != default(Id <DfaState <LetterId> >)) { throw new InvalidOperationException($"Internal error: Unexpected DFA start state {dfa.StartState.Id}"); } this.stateMachine = DfaStateMachineEmitter.CreateExpression(dfa, AlphabetMapperEmitter <char> .CreateExpression(alpha)).Compile(); this.table = new LalrTableGenerator(new GrammarBuilder(-2, -1, SymExpression) { { SymUnionExpression, SymExpression, SymUnion, SymNegateExpression }, { SymExpression, SymUnionExpression }, { SymSubtractExpression, SymExpression, SymSubtract, SymNegateExpression }, { SymExpression, SymSubtractExpression }, { SymIntersectExpression, SymExpression, SymIntersect, SymNegateExpression }, { SymExpression, SymIntersectExpression }, { SymDifferenceExpression, SymExpression, SymDifference, SymNegateExpression }, { SymExpression, SymDifferenceExpression }, { SymExpression, SymNegateExpression }, { SymNegateExpression, SymNegate, SymValueExpression }, { SymNegateExpression, SymValueExpression }, { SymValueExpression, SymParensOpen, SymExpression, SymParensClose }, { SymValueExpression, SymCharset }, { SymValueExpression, SymRegexCharset } }) .ComputeTable(); }
public SemanticParserGrammarBuilder(IUnicodeMapper <TInput> mapper, TInput?eof) { string GetGrammarKeyForDisplay() { return($"typeof({typeof(TAstNode).FullName})"); } string MemberInfoForDisplay(MethodBase member) { return(member == null ? "(assembly)" : $"{member.DeclaringType.FullName}.{member.Name}"); } var errors = new List <Exception>(); try { var parts = SemanticParserGrammar <TAstNode, TInput, TPosition> .FindGrammarParts() .OrderByDescending(p => p.Key.GetType().Name) .ThenBy(p => (p.Key as GrammarSymbolAttribute)?.SymbolName ?? (p.Key as CharsetAttribute)?.CharsetName ?? "") .ToList(); // Compute charsets var charsetQueue = new Queue <KeyValuePair <string, CharsetNode> >(parts .Select(p => p.Key) .OfType <CharsetAttribute>() .Select(a => new KeyValuePair <string, CharsetNode>(a.CharsetName, CharsetParser.Parse(a.CharsetExpression)))); var charsets = charsetQueue .SelectMany(p => p.Value.GetCharsetNames()) .Except(charsetQueue.Select(p => p.Key), StringComparer.OrdinalIgnoreCase) .ToDictionary(n => n, UnicodeRanges.FromUnicodeName, StringComparer.OrdinalIgnoreCase); var provider = new UnicodeCharSetProvider(charsets); var skipCount = 0; while (charsetQueue.Count > 0) { var current = charsetQueue.Dequeue(); if (current.Value.GetCharsetNames().All(charsets.ContainsKey)) { charsets.Add(current.Key, current.Value.Compute(provider)); skipCount = 0; } else { charsetQueue.Enqueue(current); if (skipCount++ > charsetQueue.Count) { errors.Add(new InvalidOperationException($"The charsets cannot be computed because {String.Join(", ", charsetQueue.Select(p => p.Key))} contain circular references")); break; } } } // Gather symbol information var startsymbol = parts.Select(p => p.Key).OfType <StartSymbolAttribute>().SingleOrDefault(); if (startsymbol == null) { errors.Add(new InvalidOperationException($"Start symbol has not been defined: [assembly: StartSymbol({GetGrammarKeyForDisplay()}, ...)]")); } foreach (var symbol in parts .Select(p => p.Key) .OfType <GrammarSymbolAttribute>() .GroupBy(a => a.SymbolName, a => a.SymbolKind, StringComparer.OrdinalIgnoreCase)) { if (symbol.Distinct().Skip(1).Any()) { errors.Add(new InvalidOperationException($"The symbol {symbol.Key} must not be defined as both terminal and nonterminal")); } else if (StringComparer.OrdinalIgnoreCase.Equals(symbol.Key, startsymbol?.SymbolName) && (symbol.First() != SymbolKind.Nonterminal)) { errors.Add(new InvalidOperationException($"The start symbol {symbol.Key} must be a nonterminal")); } this.symbolsByName.Add(symbol.Key, this.symbolsByName.Count + 1); } SymbolId GetSymbol(string symbolName) { if (this.symbolsByName.TryGetValue(symbolName, out var id)) { return(id); } errors.Add(new InvalidOperationException($"The symbol {symbolName} has not been defined. If the symbol name is correct, define it as virtual: [assembly: VirtualSymbol({GetGrammarKeyForDisplay()}, ...)]")); return(SymbolId.Eof); } MethodBase PopulateGenericArguments(MethodBase methodBase, GrammarSymbolAttribute attribute) { var genericTypeParameters = attribute.GenericTypeParameters; if (methodBase?.DeclaringType.IsGenericTypeDefinition == true) { var typeGenericArguments = methodBase.DeclaringType.GetGenericArguments(); if (genericTypeParameters.Length < typeGenericArguments.Length) { errors.Add(new InvalidOperationException($"Missing type generic arguments for {attribute} on {MemberInfoForDisplay(methodBase)}")); return(methodBase); } var genericType = methodBase.DeclaringType.MakeGenericType(genericTypeParameters.Take(typeGenericArguments.Length).ToArray()); genericTypeParameters = genericTypeParameters.Skip(typeGenericArguments.Length).ToArray(); IReadOnlyDictionary <Type, Type> genericArgumentMap = genericType.GetGenericArguments().Select((t, ix) => new KeyValuePair <Type, Type>(typeGenericArguments[ix], t)).ToDictionary(p => p.Key, p => p.Value); var mappedParameters = methodBase.GetParameters().Select(p => genericArgumentMap.GetValueOrDefault(p.ParameterType, p.ParameterType)).ToArray(); if (methodBase is ConstructorInfo) { methodBase = genericType.GetConstructor(mappedParameters); } else { methodBase = genericType.GetMethod(methodBase.Name, BindingFlags.Static | BindingFlags.Public, null, mappedParameters, null); } } if (methodBase is MethodInfo method && method.IsGenericMethodDefinition) { if (method.GetGenericArguments().Length != genericTypeParameters.Length) { errors.Add(new InvalidOperationException($"Invalid number of method generic arguments for {attribute} on {MemberInfoForDisplay(methodBase)}")); } methodBase = method.MakeGenericMethod(genericTypeParameters); } else if (genericTypeParameters.Length > 0) { errors.Add(new InvalidOperationException($"Excess generic arguments for {attribute} on {MemberInfoForDisplay(methodBase)}")); } return(methodBase); }
public GrmGrammar() { this.Resolve = ((IReadOnlyDictionary <SymbolId, string>) new Dictionary <SymbolId, string>() { { SymbolId.Eof, "(EOF)" }, { SymUnknown, "(Unknown)" }, { SymWhitespace, "(Whitespace)" }, { SymNewline, "(Newline)" }, { SymLineComment, "(LineComment)" }, { SymBlockComment, "(BlockComment)" }, { SymParameterName, "ParameterName" }, { SymNonterminal, "Nonterminal" }, { SymTerminal, "Terminal" }, { SymAssign, "=" }, { SymDefine, "::=" }, { SymQuestion, "?" }, { SymStar, "*" }, { SymParensOpen, "(" }, { SymParensClose, ")" }, { SymPlus, "+" }, { SymMinus, "-" }, { SymOr, "|" }, { SymSetLiteral, "SetLiteral" }, { SymSetName, "SetName" }, { SymInit, "<Init>" }, { SymGrammar, "<Grammar>" }, { SymContent, "<Content>" }, { SymDefinition, "<Definition>" }, { SymNlOpt, "<NlOpt>" }, { SymNl, "<Nl>" }, { SymParameter, "<Parameter>" }, { SymParameterBody, "<ParameterBody>" }, { SymParameterItems, "<ParameterItems>" }, { SymParameterItem, "<ParameterItem>" }, { SymSetDecl, "<SetDecl>" }, { SymSetExp, "<SetExp>" }, { SymSetItem, "<SetItem>" }, { SymTerminalDecl, "<TerminalDecl>" }, { SymTerminalName, "<TerminalName>" }, { SymRegExp, "<RegExp>" }, { SymRegExpSeq, "<RegExpSeq>" }, { SymRegExpItem, "<RegExpItem>" }, { SymRegExp2, "<RegExp2>" }, { SymKleeneOpt, "<KleeneOpt>" }, { SymRuleDecl, "<RuleDecl>" }, { SymHandles, "<Handles>" }, { SymHandle, "<Handle>" }, { SymSymbol, "<Symbol>" }, }) .CreateGetter(); var mapper = new UnicodeUtf16Mapper(false, false); var charsetPrintable = Codepoints.ValidBmp - UnicodeRanges.FromUnicodeCategory(UnicodeCategory.Control) - UnicodeRanges.InCombiningDiacriticalMarks; var charsetAlphanumeric = UnicodeRanges.Letter | UnicodeRanges.Number; var charset = new UnicodeCharSetProvider(new Dictionary <string, RangeSet <Codepoint> >() { { "Parameter Ch", charsetPrintable - (Codepoint)'\'' - (Codepoint)'"' }, { "Nonterminal Ch", charsetAlphanumeric | '_' | '-' | '.' | ' ' }, { "Terminal Ch", charsetAlphanumeric | '_' | '-' | '.' }, { "Literal Ch", charsetPrintable - (Codepoint)'\'' }, { "Set Literal Ch", charsetPrintable - (Codepoint)'[' - (Codepoint)']' - (Codepoint)'\'' }, { "Set Name Ch", charsetPrintable - (Codepoint)'{' - (Codepoint)'}' }, { "Whitespace Ch", UnicodeRanges.SpaceSeparator | '\t' | '\v' } }); this.DfaStateMachine = new LexerBuilder <char>(mapper, Utf16Chars.EOF, charset) { { SymParameterName, @"""{Parameter Ch}+""" }, { SymNonterminal, @"<{Nonterminal Ch}+>" }, { SymTerminal, @"{Terminal Ch}+|'{Literal Ch}*'" }, { SymSetLiteral, @"\[({Set Literal Ch}+|'{Literal Ch}*')+\]" }, { SymSetName, @"\{{Set Name Ch}+\}" }, { SymWhitespace, @"{Whitespace Ch}+" }, { SymNewline, @"\r\n?|\n\r?" }, { SymLineComment, @"![^\r\n]*" }, { SymBlockComment, @"!\*([^\*]|\*[^!])*\*!" }, { SymAssign, @"=" }, { SymDefine, @"::=" }, { SymPlus, @"\+" }, { SymMinus, @"\-" }, { SymOr, @"\|" }, { SymQuestion, @"\?" }, { SymStar, @"\*" }, { SymParensOpen, @"\(" }, { SymParensClose, @"\)" } } .CreateStateMachine(out var dfaStartState) .Compile(); this.DfaStartState = dfaStartState; this.LalrTable = new LalrTableGenerator(new GrammarBuilder(SymUnknown, SymInit, SymGrammar) { { SymGrammar, SymNlOpt, SymContent }, { SymContent, SymContent, SymDefinition }, { SymContent, SymDefinition }, { SymDefinition, SymParameter }, { SymDefinition, SymSetDecl }, { SymDefinition, SymTerminalDecl }, { SymDefinition, SymRuleDecl }, { SymNlOpt, SymNewline, SymNlOpt }, { SymNlOpt }, { SymNl, SymNewline, SymNl }, { SymNl, SymNewline }, { SymParameter, SymParameterName, SymNlOpt, SymAssign, SymParameterBody, SymNl }, { SymParameterBody, SymParameterBody, SymNlOpt, SymOr, SymParameterItems }, { SymParameterBody, SymParameterItems }, { SymParameterItems, SymParameterItems, SymParameterItem }, { SymParameterItems, SymParameterItem }, { SymParameterItem, SymParameterName }, { SymParameterItem, SymTerminal }, { SymParameterItem, SymSetLiteral }, { SymParameterItem, SymSetName }, { SymParameterItem, SymNonterminal }, { SymSetDecl, SymSetName, SymNlOpt, SymAssign, SymSetExp, SymNl }, { SymSetExp, SymSetExp, SymNlOpt, SymPlus, SymSetItem }, { SymSetExp, SymSetExp, SymNlOpt, SymMinus, SymSetItem }, { SymSetExp, SymSetItem }, { SymSetItem, SymSetLiteral }, { SymSetItem, SymSetName }, { SymTerminalDecl, SymTerminalName, SymNlOpt, SymAssign, SymRegExp, SymNl }, { SymTerminalName, SymTerminalName, SymTerminal }, { SymTerminalName, SymTerminal }, { SymRegExp, SymRegExp, SymNlOpt, SymOr, SymRegExpSeq }, { SymRegExp, SymRegExpSeq }, { SymRegExpSeq, SymRegExpSeq, SymRegExpItem }, { SymRegExpSeq, SymRegExpItem }, { SymRegExpItem, SymSetLiteral, SymKleeneOpt }, { SymRegExpItem, SymSetName, SymKleeneOpt }, { SymRegExpItem, SymTerminal, SymKleeneOpt }, { SymRegExpItem, SymParensOpen, SymRegExp2, SymParensClose, SymKleeneOpt }, { SymRegExp2, SymRegExp2, SymOr, SymRegExpSeq }, { SymRegExp2, SymRegExpSeq }, { SymKleeneOpt, SymPlus }, { SymKleeneOpt, SymQuestion }, { SymKleeneOpt, SymStar }, { SymKleeneOpt }, { SymRuleDecl, SymNonterminal, SymNlOpt, SymDefine, SymHandles, SymNl }, { SymHandles, SymHandles, SymNlOpt, SymOr, SymHandle }, { SymHandles, SymHandle }, { SymHandle, SymHandle, SymSymbol }, { SymHandle }, { SymSymbol, SymTerminal }, { SymSymbol, SymNonterminal } }) .ComputeTable(); }