Exemple #1
0
 public LexerRule(
     TokenDefinition definition,
     TTok token,
     bool publicChild     = false,
     InfixAttribute infix = null,
     IReadOnlyList <LexerRule <TTok> > subTokens = null)
 {
     Definition    = definition;
     Token         = token;
     SubTokens     = subTokens;
     CanBePrefix   = infix?.CanBePrefix ?? true;
     CanBePostfix  = infix?.CanBePostfix ?? true;
     IsPublicChild = publicChild;
 }
Exemple #2
0
        /// <summary> Create a token reader for a specified set of rules. </summary>
        /// <remarks>
        /// Rules are applied in order. Longest match is kept, first match is kept
        /// if two matches are of equal length. The rule set should be designed so
        /// that at least one
        /// </remarks>
        public TokenReader(
            IEnumerable <LexerRule <TTok> > rules,
            TTok error,
            TTok endOfStream,
            TTok?indent              = null,
            TTok?dedent              = null,
            TTok?newline             = null,
            bool escapeNewlines      = false,
            TokenDefinition comments = null)
        {
            Rules       = rules.ToArray();
            Error       = error;
            EndOfStream = endOfStream;
            Indent      = indent ?? endOfStream;
            Dedent      = dedent ?? endOfStream;
            EndOfLine   = newline ?? endOfStream;

            if (indent != null || dedent != null)
            {
                _indents = new Stack <int>();

                if (indent == null)
                {
                    throw new ArgumentNullException(nameof(indent));
                }

                if (dedent == null)
                {
                    throw new ArgumentException(nameof(dedent));
                }
            }

            EscapeNewlines = escapeNewlines;
            Comments       = comments;

            var publicChildren = new Dictionary <TTok, IReadOnlyList <TTok> >();

            foreach (var r in Rules)
            {
                GetPublicChildren(r, publicChildren);
            }

            PublicChildren = publicChildren;
        }
        /// <summary>
        ///     Initializes all static readonly members of this class from the attributes
        ///     on the <typeparamref name="TTok"/> enumeration.
        /// </summary>
        static ReflectionTokenReader()
        {
            var t = typeof(TTok);

            if (!t.IsEnum)
            {
                throw new ArgumentException($"Type {t} is not an enum.", nameof(TTok));
            }

            // The TokensAttribute itself

            var tokensAttribute = t.GetCustomAttribute <TokensAttribute>();

            if (tokensAttribute == null)
            {
                throw new ArgumentException($"Enum {t} does not carry {nameof(TokensAttribute)}.", nameof(TTok));
            }

            var commentStart      = tokensAttribute.Comments[0];
            var commentStartsWith = "[]\\(.<".IndexOf(commentStart) > -1 ? null : new string(commentStart, 1);

            var csComments = tokensAttribute.Comments.StartsWith("\\G")
                ? tokensAttribute.Comments
                : $"\\G({tokensAttribute.Comments})";

            StaticComments       = new TokenDefinition(new Regex(csComments), startsWith: commentStartsWith);
            StaticEscapeNewlines = tokensAttribute.EscapeNewlines;

            // The enumeration contents
            var names  = t.GetEnumNames();
            var values = t.GetEnumValues();

            var pairs = names.Select((n, i) => new KeyValuePair <string, TTok>(n, (TTok)values.GetValue(i))).ToArray();

            // Detect 'end', 'indent', 'dedent' and 'error', and extract the structure and
            // definition for the other rules.

            var infix       = new Dictionary <TTok, InfixAttribute>();
            var definitions = new Dictionary <TTok, TokenDefinition>();
            var parent      = new Dictionary <TTok, TTok>();
            var publicChild = new HashSet <TTok>();

            foreach (var kv in pairs)
            {
                var name = kv.Key;
                var tok  = kv.Value;
                var mbr  = t.GetMember(name)[0];

                var endAttribute = mbr.GetCustomAttribute <EndAttribute>();
                if (endAttribute != null)
                {
                    StaticEnd = tok;
                    continue;
                }

                var errorAttribute = mbr.GetCustomAttribute <ErrorAttribute>();
                if (errorAttribute != null)
                {
                    StaticError = tok;
                    continue;
                }

                var indentAttribute = mbr.GetCustomAttribute <IndentAttribute>();
                if (indentAttribute != null)
                {
                    StaticIndent = tok;
                    continue;
                }

                var endOfLineAttribute = mbr.GetCustomAttribute <EndOfLineAttribute>();
                if (endOfLineAttribute != null)
                {
                    StaticEndOfLine = tok;
                    continue;
                }

                var dedentAttribute = mbr.GetCustomAttribute <DedentAttribute>();
                if (dedentAttribute != null)
                {
                    StaticDedent = tok;
                    continue;
                }

                var infixAttribute = mbr.GetCustomAttribute <InfixAttribute>();
                if (infixAttribute != null)
                {
                    infix.Add(tok, infixAttribute);
                }

                var fromAttribute = mbr.GetCustomAttribute <FromAttribute>();
                if (fromAttribute != null)
                {
                    parent.Add(tok, (TTok)(object)fromAttribute.Parent);
                    if (!fromAttribute.IsPrivate)
                    {
                        publicChild.Add(tok);
                    }
                    // No continue: still need to determine definition
                }

                var patternAttribute = mbr.GetCustomAttribute <PatternAttribute>();
                if (patternAttribute != null)
                {
                    definitions.Add(tok, patternAttribute.ToDefinition());
                    continue;
                }

                var anyAttribute = mbr.GetCustomAttribute <AnyAttribute>();
                if (anyAttribute != null)
                {
                    definitions.Add(tok, anyAttribute.ToDefinition());
                    continue;
                }

                var ciAttribute = mbr.GetCustomAttribute <CiAttribute>();
                if (ciAttribute != null)
                {
                    definitions.Add(tok, new AnyAttribute(name)
                    {
                        CaseSensitive = false
                    }.ToDefinition());
                }
            }

            // Construct the actual rules. This algorithm is NOT optimal, but most
            // languages have a small enough number of tokens that this des not matter.

            var seen = new HashSet <TTok>();

            StaticRules = definitions
                          .Where(kv => !parent.ContainsKey(kv.Key))
                          .Select(kv => new LexerRule <TTok>(
                                      kv.Value,
                                      kv.Key,
                                      publicChild.Contains(kv.Key),
                                      infix.TryGetValue(kv.Key, out var inf) ? inf : null,
                                      SubRules(kv.Key, definitions, infix, parent, publicChild, seen)))
                          .ToArray();
        }