public LexerRule( TokenDefinition definition, TTok token, bool publicChild = false, InfixAttribute infix = null, IReadOnlyList <LexerRule <TTok> > subTokens = null) { Definition = definition; Token = token; SubTokens = subTokens; CanBePrefix = infix?.CanBePrefix ?? true; CanBePostfix = infix?.CanBePostfix ?? true; IsPublicChild = publicChild; }
/// <summary> Create a token reader for a specified set of rules. </summary> /// <remarks> /// Rules are applied in order. Longest match is kept, first match is kept /// if two matches are of equal length. The rule set should be designed so /// that at least one /// </remarks> public TokenReader( IEnumerable <LexerRule <TTok> > rules, TTok error, TTok endOfStream, TTok?indent = null, TTok?dedent = null, TTok?newline = null, bool escapeNewlines = false, TokenDefinition comments = null) { Rules = rules.ToArray(); Error = error; EndOfStream = endOfStream; Indent = indent ?? endOfStream; Dedent = dedent ?? endOfStream; EndOfLine = newline ?? endOfStream; if (indent != null || dedent != null) { _indents = new Stack <int>(); if (indent == null) { throw new ArgumentNullException(nameof(indent)); } if (dedent == null) { throw new ArgumentException(nameof(dedent)); } } EscapeNewlines = escapeNewlines; Comments = comments; var publicChildren = new Dictionary <TTok, IReadOnlyList <TTok> >(); foreach (var r in Rules) { GetPublicChildren(r, publicChildren); } PublicChildren = publicChildren; }
/// <summary> /// Initializes all static readonly members of this class from the attributes /// on the <typeparamref name="TTok"/> enumeration. /// </summary> static ReflectionTokenReader() { var t = typeof(TTok); if (!t.IsEnum) { throw new ArgumentException($"Type {t} is not an enum.", nameof(TTok)); } // The TokensAttribute itself var tokensAttribute = t.GetCustomAttribute <TokensAttribute>(); if (tokensAttribute == null) { throw new ArgumentException($"Enum {t} does not carry {nameof(TokensAttribute)}.", nameof(TTok)); } var commentStart = tokensAttribute.Comments[0]; var commentStartsWith = "[]\\(.<".IndexOf(commentStart) > -1 ? null : new string(commentStart, 1); var csComments = tokensAttribute.Comments.StartsWith("\\G") ? tokensAttribute.Comments : $"\\G({tokensAttribute.Comments})"; StaticComments = new TokenDefinition(new Regex(csComments), startsWith: commentStartsWith); StaticEscapeNewlines = tokensAttribute.EscapeNewlines; // The enumeration contents var names = t.GetEnumNames(); var values = t.GetEnumValues(); var pairs = names.Select((n, i) => new KeyValuePair <string, TTok>(n, (TTok)values.GetValue(i))).ToArray(); // Detect 'end', 'indent', 'dedent' and 'error', and extract the structure and // definition for the other rules. var infix = new Dictionary <TTok, InfixAttribute>(); var definitions = new Dictionary <TTok, TokenDefinition>(); var parent = new Dictionary <TTok, TTok>(); var publicChild = new HashSet <TTok>(); foreach (var kv in pairs) { var name = kv.Key; var tok = kv.Value; var mbr = t.GetMember(name)[0]; var endAttribute = mbr.GetCustomAttribute <EndAttribute>(); if (endAttribute != null) { StaticEnd = tok; continue; } var errorAttribute = mbr.GetCustomAttribute <ErrorAttribute>(); if (errorAttribute != null) { StaticError = tok; continue; } var indentAttribute = mbr.GetCustomAttribute <IndentAttribute>(); if (indentAttribute != null) { StaticIndent = tok; continue; } var endOfLineAttribute = mbr.GetCustomAttribute <EndOfLineAttribute>(); if (endOfLineAttribute != null) { StaticEndOfLine = tok; continue; } var dedentAttribute = mbr.GetCustomAttribute <DedentAttribute>(); if (dedentAttribute != null) { StaticDedent = tok; continue; } var infixAttribute = mbr.GetCustomAttribute <InfixAttribute>(); if (infixAttribute != null) { infix.Add(tok, infixAttribute); } var fromAttribute = mbr.GetCustomAttribute <FromAttribute>(); if (fromAttribute != null) { parent.Add(tok, (TTok)(object)fromAttribute.Parent); if (!fromAttribute.IsPrivate) { publicChild.Add(tok); } // No continue: still need to determine definition } var patternAttribute = mbr.GetCustomAttribute <PatternAttribute>(); if (patternAttribute != null) { definitions.Add(tok, patternAttribute.ToDefinition()); continue; } var anyAttribute = mbr.GetCustomAttribute <AnyAttribute>(); if (anyAttribute != null) { definitions.Add(tok, anyAttribute.ToDefinition()); continue; } var ciAttribute = mbr.GetCustomAttribute <CiAttribute>(); if (ciAttribute != null) { definitions.Add(tok, new AnyAttribute(name) { CaseSensitive = false }.ToDefinition()); } } // Construct the actual rules. This algorithm is NOT optimal, but most // languages have a small enough number of tokens that this des not matter. var seen = new HashSet <TTok>(); StaticRules = definitions .Where(kv => !parent.ContainsKey(kv.Key)) .Select(kv => new LexerRule <TTok>( kv.Value, kv.Key, publicChild.Contains(kv.Key), infix.TryGetValue(kv.Key, out var inf) ? inf : null, SubRules(kv.Key, definitions, infix, parent, publicChild, seen))) .ToArray(); }