Esempio n. 1
0
        public void ParseTest(string expression)
        {
            var provider = new UnicodeCharSetProvider(UnicodeRanges.FromUnicodeName);
            var result   = CharsetParser.Parse(expression).Compute(provider);

            this.output.WriteLine(result.ToString());
        }
Esempio n. 2
0
        private int Test <TLetter>(string pattern, bool caseSensitive, IUnicodeMapper <TLetter> mapper, TLetter?eof, RangeSet <TLetter> validRanges, out AlphabetBuilder <TLetter> builder)
            where TLetter : struct, IEquatable <TLetter>, IComparable <TLetter>
        {
            this.output.WriteLine("Input regex (Case Sensitive: {0}, EOF letter: {1}):", caseSensitive, eof.HasValue);
            this.output.WriteLine(pattern);
            var provider = new UnicodeCharSetProvider();
            var regex    = RegexParser.Parse(pattern, null).ToInvariant(mapper, provider, caseSensitive);

            this.output.WriteLine("");
            this.output.WriteLine("{0} regex:", typeof(TLetter).Name);
            this.output.WriteLine(regex.ToString());
            builder = new AlphabetBuilder <TLetter>(regex, eof, validRanges);
            this.output.WriteLine("");
            this.output.WriteLine("Generated letter mapping:");
            foreach (var pair in builder.AlphabetById)
            {
                this.output.WriteLine("{0}: {1} ({2})", pair.Key, pair.Value, pair.Value.Count);
            }
            this.output.WriteLine("");
            this.output.WriteLine("Letter Regex:");
            this.output.WriteLine(builder.Expression.ToString());
            this.output.WriteLine("");
            this.output.WriteLine("Mapping function pseudocode:");
            var inSwitch = false;

            foreach (var grouping in builder
                     .AlphabetById
                     .SelectMany(p => p.Value.Select(r => new KeyValuePair <Range <TLetter>, LetterId>(r, p.Key)))
                     .GroupBy(p => new {
                Range = (!typeof(TLetter).IsPrimitive) || p.Key.Expand().Skip(2).Any(),
                LetterId = p.Value
            }, p => p.Key)
                     .OrderBy(p => p.Key.Range)
                     .ThenBy(p => p.Key.LetterId))
            {
                if (grouping.Key.Range)
                {
                    if (inSwitch)
                    {
                        this.output.WriteLine("}");
                        inSwitch = false;
                    }
                    this.output.WriteLine("if ({0}) return {1}", string.Join(" ||" + Environment.NewLine + "    ", grouping.OrderBy(r => r.From).Select(r => r.From.CompareTo(r.To) == 0 ? $"(v == '{r.From}')" : $"(v >= '{r.From}' && v <= '{r.To}')")), grouping.Key.LetterId);
                }
                else
                {
                    if (!inSwitch)
                    {
                        this.output.WriteLine("switch (v) {");
                        inSwitch = true;
                    }
                    this.output.WriteLine("{0}" + Environment.NewLine + "        return {1}", string.Join(Environment.NewLine, grouping.SelectMany(g => g.Expand()).OrderBy(r => r).Select(r => $"    case '{r}':")), grouping.Key.LetterId);
                }
            }
            if (inSwitch)
            {
                this.output.WriteLine("}");
            }
            return(builder.AlphabetById.Count);
        }
Esempio n. 3
0
        private CharsetGrammar()
        {
            var provider       = new UnicodeCharSetProvider();
            var mapper         = new UnicodeUtf16Mapper(false, false);
            var rx             = RegexLexer.CreateRx(mapper);
            var rxWhitespace   = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymWhitespace), SymWhitespace, 0);
            var rxCharset      = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymCharset), SymCharset, 0);
            var rxRegexCharset = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymRegexCharset), SymRegexCharset, 0);
            var rxUnion        = new RxAccept <char>(RegexMatchSet.FromChars('|', '+').ToInvariant(mapper, provider, true), SymUnion, 0);
            var rxSubtract     = new RxAccept <char>(RegexMatchSet.FromChars('-').ToInvariant(mapper, provider, true), SymSubtract, 0);
            var rxIntersect    = new RxAccept <char>(RegexMatchSet.FromChars('&').ToInvariant(mapper, provider, true), SymIntersect, 0);
            var rxDifference   = new RxAccept <char>(RegexMatchSet.FromChars('^').ToInvariant(mapper, provider, true), SymDifference, 0);
            var rxNegate       = new RxAccept <char>(RegexMatchSet.FromChars('~').ToInvariant(mapper, provider, true), SymNegate, 0);
            var rxParensOpen   = new RxAccept <char>(RegexMatchSet.FromChars('(').ToInvariant(mapper, provider, true), SymParensOpen, 0);
            var rxParensClose  = new RxAccept <char>(RegexMatchSet.FromChars(')').ToInvariant(mapper, provider, true), SymParensClose, 0);
            var alpha          = new AlphabetBuilder <char>(
                new RxAlternation <char>(rxWhitespace,
                                         new RxAlternation <char>(rxCharset,
                                                                  new RxAlternation <char>(rxRegexCharset,
                                                                                           new RxAlternation <char>(rxUnion,
                                                                                                                    new RxAlternation <char>(rxSubtract,
                                                                                                                                             new RxAlternation <char>(rxIntersect,
                                                                                                                                                                      new RxAlternation <char>(rxDifference,
                                                                                                                                                                                               new RxAlternation <char>(rxNegate,
                                                                                                                                                                                                                        new RxAlternation <char>(rxParensOpen, rxParensClose))))))))),
                Utf16Chars.EOF,
                Utf16Chars.ValidBmp);
            var nfa = NfaBuilder <LetterId> .Build(alpha.Expression);

            var dfa = DfaBuilder <LetterId> .Build(nfa, LetterId.Eof);

            if (dfa.StartState.Id != default(Id <DfaState <LetterId> >))
            {
                throw new InvalidOperationException($"Internal error: Unexpected DFA start state {dfa.StartState.Id}");
            }
            this.stateMachine = DfaStateMachineEmitter.CreateExpression(dfa, AlphabetMapperEmitter <char> .CreateExpression(alpha)).Compile();
            this.table        = new LalrTableGenerator(new GrammarBuilder(-2, -1, SymExpression)
            {
                { SymUnionExpression, SymExpression, SymUnion, SymNegateExpression },
                { SymExpression, SymUnionExpression },
                { SymSubtractExpression, SymExpression, SymSubtract, SymNegateExpression },
                { SymExpression, SymSubtractExpression },
                { SymIntersectExpression, SymExpression, SymIntersect, SymNegateExpression },
                { SymExpression, SymIntersectExpression },
                { SymDifferenceExpression, SymExpression, SymDifference, SymNegateExpression },
                { SymExpression, SymDifferenceExpression },
                { SymExpression, SymNegateExpression },
                { SymNegateExpression, SymNegate, SymValueExpression },
                { SymNegateExpression, SymValueExpression },
                { SymValueExpression, SymParensOpen, SymExpression, SymParensClose },
                { SymValueExpression, SymCharset },
                { SymValueExpression, SymRegexCharset }
            })
                                .ComputeTable();
        }
        public SemanticParserGrammarBuilder(IUnicodeMapper <TInput> mapper, TInput?eof)
        {
            string GetGrammarKeyForDisplay()
            {
                return($"typeof({typeof(TAstNode).FullName})");
            }

            string MemberInfoForDisplay(MethodBase member)
            {
                return(member == null ? "(assembly)" : $"{member.DeclaringType.FullName}.{member.Name}");
            }

            var errors = new List <Exception>();

            try {
                var parts = SemanticParserGrammar <TAstNode, TInput, TPosition> .FindGrammarParts()
                            .OrderByDescending(p => p.Key.GetType().Name)
                            .ThenBy(p => (p.Key as GrammarSymbolAttribute)?.SymbolName ?? (p.Key as CharsetAttribute)?.CharsetName ?? "")
                            .ToList();

                // Compute charsets
                var charsetQueue = new Queue <KeyValuePair <string, CharsetNode> >(parts
                                                                                   .Select(p => p.Key)
                                                                                   .OfType <CharsetAttribute>()
                                                                                   .Select(a => new KeyValuePair <string, CharsetNode>(a.CharsetName, CharsetParser.Parse(a.CharsetExpression))));
                var charsets = charsetQueue
                               .SelectMany(p => p.Value.GetCharsetNames())
                               .Except(charsetQueue.Select(p => p.Key), StringComparer.OrdinalIgnoreCase)
                               .ToDictionary(n => n, UnicodeRanges.FromUnicodeName, StringComparer.OrdinalIgnoreCase);
                var provider  = new UnicodeCharSetProvider(charsets);
                var skipCount = 0;
                while (charsetQueue.Count > 0)
                {
                    var current = charsetQueue.Dequeue();
                    if (current.Value.GetCharsetNames().All(charsets.ContainsKey))
                    {
                        charsets.Add(current.Key, current.Value.Compute(provider));
                        skipCount = 0;
                    }
                    else
                    {
                        charsetQueue.Enqueue(current);
                        if (skipCount++ > charsetQueue.Count)
                        {
                            errors.Add(new InvalidOperationException($"The charsets cannot be computed because {String.Join(", ", charsetQueue.Select(p => p.Key))} contain circular references"));
                            break;
                        }
                    }
                }

                // Gather symbol information
                var startsymbol = parts.Select(p => p.Key).OfType <StartSymbolAttribute>().SingleOrDefault();
                if (startsymbol == null)
                {
                    errors.Add(new InvalidOperationException($"Start symbol has not been defined: [assembly: StartSymbol({GetGrammarKeyForDisplay()}, ...)]"));
                }
                foreach (var symbol in parts
                         .Select(p => p.Key)
                         .OfType <GrammarSymbolAttribute>()
                         .GroupBy(a => a.SymbolName, a => a.SymbolKind, StringComparer.OrdinalIgnoreCase))
                {
                    if (symbol.Distinct().Skip(1).Any())
                    {
                        errors.Add(new InvalidOperationException($"The symbol {symbol.Key} must not be defined as both terminal and nonterminal"));
                    }
                    else if (StringComparer.OrdinalIgnoreCase.Equals(symbol.Key, startsymbol?.SymbolName) && (symbol.First() != SymbolKind.Nonterminal))
                    {
                        errors.Add(new InvalidOperationException($"The start symbol {symbol.Key} must be a nonterminal"));
                    }
                    this.symbolsByName.Add(symbol.Key, this.symbolsByName.Count + 1);
                }

                SymbolId GetSymbol(string symbolName)
                {
                    if (this.symbolsByName.TryGetValue(symbolName, out var id))
                    {
                        return(id);
                    }
                    errors.Add(new InvalidOperationException($"The symbol {symbolName} has not been defined. If the symbol name is correct, define it as virtual: [assembly: VirtualSymbol({GetGrammarKeyForDisplay()}, ...)]"));
                    return(SymbolId.Eof);
                }

                MethodBase PopulateGenericArguments(MethodBase methodBase, GrammarSymbolAttribute attribute)
                {
                    var genericTypeParameters = attribute.GenericTypeParameters;

                    if (methodBase?.DeclaringType.IsGenericTypeDefinition == true)
                    {
                        var typeGenericArguments = methodBase.DeclaringType.GetGenericArguments();
                        if (genericTypeParameters.Length < typeGenericArguments.Length)
                        {
                            errors.Add(new InvalidOperationException($"Missing type generic arguments for {attribute} on {MemberInfoForDisplay(methodBase)}"));
                            return(methodBase);
                        }
                        var genericType = methodBase.DeclaringType.MakeGenericType(genericTypeParameters.Take(typeGenericArguments.Length).ToArray());
                        genericTypeParameters = genericTypeParameters.Skip(typeGenericArguments.Length).ToArray();
                        IReadOnlyDictionary <Type, Type> genericArgumentMap = genericType.GetGenericArguments().Select((t, ix) => new KeyValuePair <Type, Type>(typeGenericArguments[ix], t)).ToDictionary(p => p.Key, p => p.Value);
                        var mappedParameters = methodBase.GetParameters().Select(p => genericArgumentMap.GetValueOrDefault(p.ParameterType, p.ParameterType)).ToArray();
                        if (methodBase is ConstructorInfo)
                        {
                            methodBase = genericType.GetConstructor(mappedParameters);
                        }
                        else
                        {
                            methodBase = genericType.GetMethod(methodBase.Name, BindingFlags.Static | BindingFlags.Public, null, mappedParameters, null);
                        }
                    }
                    if (methodBase is MethodInfo method && method.IsGenericMethodDefinition)
                    {
                        if (method.GetGenericArguments().Length != genericTypeParameters.Length)
                        {
                            errors.Add(new InvalidOperationException($"Invalid number of method generic arguments for {attribute} on {MemberInfoForDisplay(methodBase)}"));
                        }
                        methodBase = method.MakeGenericMethod(genericTypeParameters);
                    }
                    else if (genericTypeParameters.Length > 0)
                    {
                        errors.Add(new InvalidOperationException($"Excess generic arguments for {attribute} on {MemberInfoForDisplay(methodBase)}"));
                    }
                    return(methodBase);
                }
Esempio n. 5
0
            public GrmGrammar()
            {
                this.Resolve = ((IReadOnlyDictionary <SymbolId, string>) new Dictionary <SymbolId, string>()
                {
                    { SymbolId.Eof, "(EOF)" },
                    { SymUnknown, "(Unknown)" },
                    { SymWhitespace, "(Whitespace)" },
                    { SymNewline, "(Newline)" },
                    { SymLineComment, "(LineComment)" },
                    { SymBlockComment, "(BlockComment)" },
                    { SymParameterName, "ParameterName" },
                    { SymNonterminal, "Nonterminal" },
                    { SymTerminal, "Terminal" },
                    { SymAssign, "=" },
                    { SymDefine, "::=" },
                    { SymQuestion, "?" },
                    { SymStar, "*" },
                    { SymParensOpen, "(" },
                    { SymParensClose, ")" },
                    { SymPlus, "+" },
                    { SymMinus, "-" },
                    { SymOr, "|" },
                    { SymSetLiteral, "SetLiteral" },
                    { SymSetName, "SetName" },
                    { SymInit, "<Init>" },
                    { SymGrammar, "<Grammar>" },
                    { SymContent, "<Content>" },
                    { SymDefinition, "<Definition>" },
                    { SymNlOpt, "<NlOpt>" },
                    { SymNl, "<Nl>" },
                    { SymParameter, "<Parameter>" },
                    { SymParameterBody, "<ParameterBody>" },
                    { SymParameterItems, "<ParameterItems>" },
                    { SymParameterItem, "<ParameterItem>" },
                    { SymSetDecl, "<SetDecl>" },
                    { SymSetExp, "<SetExp>" },
                    { SymSetItem, "<SetItem>" },
                    { SymTerminalDecl, "<TerminalDecl>" },
                    { SymTerminalName, "<TerminalName>" },
                    { SymRegExp, "<RegExp>" },
                    { SymRegExpSeq, "<RegExpSeq>" },
                    { SymRegExpItem, "<RegExpItem>" },
                    { SymRegExp2, "<RegExp2>" },
                    { SymKleeneOpt, "<KleeneOpt>" },
                    { SymRuleDecl, "<RuleDecl>" },
                    { SymHandles, "<Handles>" },
                    { SymHandle, "<Handle>" },
                    { SymSymbol, "<Symbol>" },
                })
                               .CreateGetter();
                var mapper              = new UnicodeUtf16Mapper(false, false);
                var charsetPrintable    = Codepoints.ValidBmp - UnicodeRanges.FromUnicodeCategory(UnicodeCategory.Control) - UnicodeRanges.InCombiningDiacriticalMarks;
                var charsetAlphanumeric = UnicodeRanges.Letter | UnicodeRanges.Number;
                var charset             = new UnicodeCharSetProvider(new Dictionary <string, RangeSet <Codepoint> >()
                {
                    { "Parameter Ch", charsetPrintable - (Codepoint)'\'' - (Codepoint)'"' },
                    { "Nonterminal Ch", charsetAlphanumeric | '_' | '-' | '.' | ' ' },
                    { "Terminal Ch", charsetAlphanumeric | '_' | '-' | '.' },
                    { "Literal Ch", charsetPrintable - (Codepoint)'\'' },
                    { "Set Literal Ch", charsetPrintable - (Codepoint)'[' - (Codepoint)']' - (Codepoint)'\'' },
                    { "Set Name Ch", charsetPrintable - (Codepoint)'{' - (Codepoint)'}' },
                    { "Whitespace Ch", UnicodeRanges.SpaceSeparator | '\t' | '\v' }
                });

                this.DfaStateMachine = new LexerBuilder <char>(mapper, Utf16Chars.EOF, charset)
                {
                    { SymParameterName, @"""{Parameter Ch}+""" },
                    { SymNonterminal, @"<{Nonterminal Ch}+>" },
                    { SymTerminal, @"{Terminal Ch}+|'{Literal Ch}*'" },
                    { SymSetLiteral, @"\[({Set Literal Ch}+|'{Literal Ch}*')+\]" },
                    { SymSetName, @"\{{Set Name Ch}+\}" },
                    { SymWhitespace, @"{Whitespace Ch}+" },
                    { SymNewline, @"\r\n?|\n\r?" },
                    { SymLineComment, @"![^\r\n]*" },
                    { SymBlockComment, @"!\*([^\*]|\*[^!])*\*!" },
                    { SymAssign, @"=" },
                    { SymDefine, @"::=" },
                    { SymPlus, @"\+" },
                    { SymMinus, @"\-" },
                    { SymOr, @"\|" },
                    { SymQuestion, @"\?" },
                    { SymStar, @"\*" },
                    { SymParensOpen, @"\(" },
                    { SymParensClose, @"\)" }
                }
                .CreateStateMachine(out var dfaStartState)
                .Compile();
                this.DfaStartState = dfaStartState;
                this.LalrTable     = new LalrTableGenerator(new GrammarBuilder(SymUnknown, SymInit, SymGrammar)
                {
                    { SymGrammar, SymNlOpt, SymContent },
                    { SymContent, SymContent, SymDefinition },
                    { SymContent, SymDefinition },
                    { SymDefinition, SymParameter },
                    { SymDefinition, SymSetDecl },
                    { SymDefinition, SymTerminalDecl },
                    { SymDefinition, SymRuleDecl },
                    { SymNlOpt, SymNewline, SymNlOpt },
                    { SymNlOpt },
                    { SymNl, SymNewline, SymNl },
                    { SymNl, SymNewline },
                    { SymParameter, SymParameterName, SymNlOpt, SymAssign, SymParameterBody, SymNl },
                    { SymParameterBody, SymParameterBody, SymNlOpt, SymOr, SymParameterItems },
                    { SymParameterBody, SymParameterItems },
                    { SymParameterItems, SymParameterItems, SymParameterItem },
                    { SymParameterItems, SymParameterItem },
                    { SymParameterItem, SymParameterName },
                    { SymParameterItem, SymTerminal },
                    { SymParameterItem, SymSetLiteral },
                    { SymParameterItem, SymSetName },
                    { SymParameterItem, SymNonterminal },
                    { SymSetDecl, SymSetName, SymNlOpt, SymAssign, SymSetExp, SymNl },
                    { SymSetExp, SymSetExp, SymNlOpt, SymPlus, SymSetItem },
                    { SymSetExp, SymSetExp, SymNlOpt, SymMinus, SymSetItem },
                    { SymSetExp, SymSetItem },
                    { SymSetItem, SymSetLiteral },
                    { SymSetItem, SymSetName },
                    { SymTerminalDecl, SymTerminalName, SymNlOpt, SymAssign, SymRegExp, SymNl },
                    { SymTerminalName, SymTerminalName, SymTerminal },
                    { SymTerminalName, SymTerminal },
                    { SymRegExp, SymRegExp, SymNlOpt, SymOr, SymRegExpSeq },
                    { SymRegExp, SymRegExpSeq },
                    { SymRegExpSeq, SymRegExpSeq, SymRegExpItem },
                    { SymRegExpSeq, SymRegExpItem },
                    { SymRegExpItem, SymSetLiteral, SymKleeneOpt },
                    { SymRegExpItem, SymSetName, SymKleeneOpt },
                    { SymRegExpItem, SymTerminal, SymKleeneOpt },
                    { SymRegExpItem, SymParensOpen, SymRegExp2, SymParensClose, SymKleeneOpt },
                    { SymRegExp2, SymRegExp2, SymOr, SymRegExpSeq },
                    { SymRegExp2, SymRegExpSeq },
                    { SymKleeneOpt, SymPlus },
                    { SymKleeneOpt, SymQuestion },
                    { SymKleeneOpt, SymStar },
                    { SymKleeneOpt },
                    { SymRuleDecl, SymNonterminal, SymNlOpt, SymDefine, SymHandles, SymNl },
                    { SymHandles, SymHandles, SymNlOpt, SymOr, SymHandle },
                    { SymHandles, SymHandle },
                    { SymHandle, SymHandle, SymSymbol },
                    { SymHandle },
                    { SymSymbol, SymTerminal },
                    { SymSymbol, SymNonterminal }
                })
                                     .ComputeTable();
            }