Exemplo n.º 1
0
        /// <summary>
        /// A utility function that does the tokenize and parse steps together from a string.
        /// </summary>
        /// <param name="grammarText">The text to parse.</param>
        /// <returns>A parsed grammar.</returns>
        public static GrammarSymbol Parse(string grammarText)
        {
            var textIndex = new TokenTextIndex(grammarText);
            ImmutableArray <Token> tokens = Lexer.Lex(textIndex);

            return(new Parser(tokens).ParseGrammar());
        }
Exemplo n.º 2
0
        /// <summary>
        /// Do not call this constructor directly. Initializes a new instance of the
        /// <see cref="Token"/> class. Call <see cref="TokenTextIndex.Token(int, int, TokenKind)"/> instead.
        /// </summary>
        /// <param name="factory">The token text index into which this token forms a pointer to.</param>
        /// <param name="offset">The offset in <paramref name="factory"/> from which this token
        /// starts.</param>
        /// <param name="length">The length of text represented by this token.</param>
        /// <param name="kind">The kind of token this is.</param>
        internal Token(TokenTextIndex factory, int offset, int length, TokenKind kind)
        {
            string underlyingString = factory.Text;

            Debug.Assert(offset + length <= underlyingString.Length);
            _tokenFactory = factory;
            this.Offset   = offset;
            this.Length   = length;
            this.Kind     = kind;
        }
Exemplo n.º 3
0
        private static IEnumerable <Token> LexImpl(TokenTextIndex tokenFactory)
        {
            string     text                  = tokenFactory.Text;
            LexerState state                 = LexerState.SkipWhitespace;
            int        tokenStart            = 0;
            int        multiLineCommentStart = 0;
            int        valueLeftBraceDepth   = 0;

            for (int idx = 0; idx < text.Length; ++idx)
            {
                char ch = text[idx];
                // Note: The "error detection" cases are later in the switch because we
                // expect them to be visited less often. (and the C# compiler emits the branches
                // in order)
                switch (state)
                {
                case LexerState.SkipWhitespace:
                    // Putting Default first because we expect most of the time to be skipping
                    // whitespace.
                    tokenStart = idx;
                    switch (ch)
                    {
                    case ' ':
                    case '\t':
                    case '\r':
                    case '\n':
                    case '\u2028':
                    case '\u2029':
                        // Skip whitespace
                        break;

                    case '\'':
                        state = LexerState.CollectingString;
                        break;

                    case '/':
                        state = LexerState.CommentCandidate;
                        break;

                    case '|':
                        yield return(tokenFactory.Token(idx, TokenKind.Pipe));

                        break;

                    case ':':
                        yield return(tokenFactory.Token(idx, TokenKind.Colon));

                        break;

                    case ';':
                        yield return(tokenFactory.Token(idx, TokenKind.Semicolon));

                        break;

                    case '.':
                        state = LexerState.DotsCandidate;
                        break;

                    case '(':
                        yield return(tokenFactory.Token(idx, TokenKind.Lparen));

                        break;

                    case ')':
                        yield return(tokenFactory.Token(idx, TokenKind.Rparen));

                        break;

                    case '*':
                        yield return(tokenFactory.Token(idx, TokenKind.Star));

                        break;

                    case '+':
                        yield return(tokenFactory.Token(idx, TokenKind.Plus));

                        break;

                    case '?':
                        yield return(tokenFactory.Token(idx, TokenKind.Question));

                        break;

                    default:
                        state = LexerState.CollectingIdentifier;
                        break;
                    }
                    break;

                case LexerState.CollectingString:
                    if (ch == '\'')
                    {
                        yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.String));

                        state = LexerState.SkipWhitespace;
                    }
                    break;

                case LexerState.SkipSingleLineComment:
                    switch (ch)
                    {
                    case '\r':
                    case '\n':
                    case '\u2028':
                    case '\u2029':
                        state = LexerState.SkipWhitespace;
                        break;
                    }
                    break;

                case LexerState.CommentCandidate:
                    switch (ch)
                    {
                    case '/':
                        state = LexerState.SkipSingleLineComment;
                        break;

                    case '*':
                        state = LexerState.MultiLineComment;
                        multiLineCommentStart = idx - 1;
                        break;

                    default:
                        throw new G4ParseFailureException(tokenFactory.Location(idx - 1), Strings.UnrecognizedForwardSlash);
                    }
                    break;

                case LexerState.MultiLineComment:
                    switch (ch)
                    {
                    case '*':
                        state = LexerState.MultiLineCommentStar;
                        break;

                    case '@':
                        state      = LexerState.CollectingAnnotation;
                        tokenStart = idx;
                        break;

                    case '{':
                        state      = LexerState.CollectingAnnotationValue;
                        tokenStart = idx;
                        break;
                    }
                    break;

                case LexerState.MultiLineCommentStar:
                    switch (ch)
                    {
                    case '*':
                        // Do nothing, e.g. in case *****/
                        break;

                    case '@':
                        state      = LexerState.CollectingAnnotation;
                        tokenStart = idx;
                        break;

                    case '{':
                        state      = LexerState.CollectingAnnotationValue;
                        tokenStart = idx;
                        break;

                    case '/':
                        state = LexerState.SkipWhitespace;
                        break;

                    default:
                        state = LexerState.MultiLineComment;
                        break;
                    }
                    break;

                case LexerState.CollectingAnnotation:
                    switch (ch)
                    {
                    case ' ':
                    case '\t':
                    case '\r':
                    case '\n':
                    case '\u2028':
                    case '\u2029':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation));

                        state = LexerState.MultiLineComment;
                        break;

                    case '*':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation));

                        state = LexerState.MultiLineCommentStar;
                        break;

                    case '{':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation));

                        valueLeftBraceDepth = 0;
                        state      = LexerState.CollectingAnnotationValue;
                        tokenStart = idx;
                        break;

                    case '@':
                        throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnrecognizedAtInAnnotation);
                    }
                    break;

                case LexerState.CollectingAnnotationValue:
                    switch (ch)
                    {
                    case '{':
                        valueLeftBraceDepth++;
                        break;

                    case '}':
                        if (valueLeftBraceDepth > 0)
                        {
                            valueLeftBraceDepth--;
                        }
                        else
                        {
                            yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.AnnotationValue));

                            state = LexerState.MultiLineComment;
                        }
                        break;

                    case '*':
                        state = LexerState.CollectingAnnotationValueStar;
                        break;
                    }
                    break;

                case LexerState.CollectingIdentifier:
                    switch (ch)
                    {
                    case ' ':
                    case '\t':
                    case '\r':
                    case '\n':
                    case '\u2028':
                    case '\u2029':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '\'':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        tokenStart = idx;
                        state      = LexerState.CollectingString;
                        break;

                    case '/':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        state = LexerState.CommentCandidate;
                        break;

                    case '|':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Pipe));

                        state = LexerState.SkipWhitespace;
                        break;

                    case ':':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Colon));

                        state = LexerState.SkipWhitespace;
                        break;

                    case ';':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Semicolon));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '.':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        tokenStart = idx;
                        state      = LexerState.DotsCandidate;
                        break;

                    case '(':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Lparen));

                        state = LexerState.SkipWhitespace;
                        break;

                    case ')':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Rparen));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '*':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Star));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '+':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Plus));

                        state = LexerState.SkipWhitespace;
                        break;

                    case '?':
                        yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier));

                        yield return(tokenFactory.Token(idx, TokenKind.Question));

                        state = LexerState.SkipWhitespace;
                        break;
                    }
                    break;

                case LexerState.CollectingAnnotationValueStar:
                    switch (ch)
                    {
                    case '}':
                        yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.AnnotationValue));

                        state = LexerState.MultiLineComment;
                        break;

                    case '/':
                        throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnclosedAnnotation);

                    default:
                        state = LexerState.CollectingAnnotationValue;
                        break;
                    }
                    break;

                case LexerState.DotsCandidate:
                    switch (ch)
                    {
                    case '.':
                        yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.Dots));

                        state = LexerState.SkipWhitespace;
                        break;

                    default:
                        throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.SingleDot);
                    }
                    break;
                }
            }

            switch (state)
            {
            case LexerState.CollectingIdentifier:
                yield return(tokenFactory.Token(tokenStart, text.Length, TokenKind.Identifier));

                break;

            case LexerState.MultiLineComment:
            case LexerState.MultiLineCommentStar:
            case LexerState.CollectingAnnotation:
            case LexerState.CollectingAnnotationValue:
            case LexerState.CollectingAnnotationValueStar:
                throw new G4ParseFailureException(tokenFactory.Location(multiLineCommentStart), Strings.UnclosedMultiLineComment);

            case LexerState.CommentCandidate:
                throw new G4ParseFailureException(tokenFactory.Location(text.Length), Strings.UnrecognizedForwardSlash);

            case LexerState.CollectingString:
                throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnclosedString);

            case LexerState.DotsCandidate:
                throw new G4ParseFailureException(tokenFactory.Location(text.Length), Strings.SingleDot);

            case LexerState.SkipWhitespace:
            case LexerState.SkipSingleLineComment:
                // OK (do nothing)
                break;
            }
        }
Exemplo n.º 4
0
 /// <summary>Lexes text in the given <see cref="TokenTextIndex"/>.</summary>
 /// <exception cref="G4ParseFailureException">Thrown when the input is not lexically valid.</exception>
 /// <param name="tokenFactory">The token factory from which lexical analysis will be performed.</param>
 /// <returns>An <see cref="ImmutableArray{Token}"/> containing the generated tokens.</returns>
 public static ImmutableArray <Token> Lex(TokenTextIndex tokenFactory)
 {
     return(LexImpl(tokenFactory).ToImmutableArray());
 }
Exemplo n.º 5
0
        //private static IDisposable sw;

        /// <summary>Main entry-point for this application.</summary>
        /// <param name="args">Array of command-line argument strings.</param>
        /// <returns>Exit-code for the process - 0 for success, else an error code.</returns>
        public static int Main(string[] args)
        {
            using (var config = new DataModelGeneratorConfiguration())
            {
                if (!config.ParseArgs(args))
                {
                    return(1);
                }

                string inputText;
                using (var sr = new StreamReader(config.InputStream))
                {
                    inputText = sr.ReadToEnd();
                }

                GrammarSymbol grammar;
                try
                {
                    var factory = new TokenTextIndex(inputText);
                    System.Collections.Immutable.ImmutableArray <Token> tokens = Lexer.Lex(factory);
                    var parser = new Parser(tokens);
                    grammar = parser.ParseGrammar();
                }
                catch (G4ParseFailureException ex)
                {
                    Console.Error.WriteLine(ex.Message);
                    return(1);
                }

                var builder = new DataModelBuilder();
                for (int idx = 1; idx < grammar.Children.Length; ++idx)
                {
                    try
                    {
                        GrammarSymbol prod = grammar.Children[idx];
                        builder.CompileProduction(prod);
                    }
                    catch (G4ParseFailureException ex)
                    {
                        Console.WriteLine(Strings.FailedToCompileProduction + ex.Message);
                    }
                }

                string generatedCSharp;
                string generatedJsonSchema;
                try
                {
                    DataModel model = builder.Link(config.InputFilePath, DataModelMetadata.FromGrammar(grammar));
                    model = MemberHoister.HoistTypes(model);
                    var cr = new CodeWriter();
                    CodeGenerator.WriteDataModel(cr, model);
                    generatedCSharp = cr.ToString();

                    cr = new CodeWriter(2, ' ');
                    JsonSchemaGenerator.WriteSchema(cr, model);
                    generatedJsonSchema = cr.ToString();
                }
                catch (G4ParseFailureException ex)
                {
                    Console.Error.WriteLine(ex.Message);
                    return(1);
                }

                if (config.ToConsole)
                {
                    Console.WriteLine(generatedCSharp);
                }

                using (var sw = new StreamWriter(config.OutputStream))
                {
                    sw.Write(generatedCSharp);
                }

                string jsonSchemaPath = Path.GetFileNameWithoutExtension(config.OutputFilePath);
                jsonSchemaPath = Path.Combine(Path.GetDirectoryName(config.OutputFilePath), jsonSchemaPath + ".schema.json");

                File.WriteAllText(jsonSchemaPath, generatedJsonSchema);

                return(0);
            }
        }