/// <summary> /// A utility function that does the tokenize and parse steps together from a string. /// </summary> /// <param name="grammarText">The text to parse.</param> /// <returns>A parsed grammar.</returns> public static GrammarSymbol Parse(string grammarText) { var textIndex = new TokenTextIndex(grammarText); ImmutableArray <Token> tokens = Lexer.Lex(textIndex); return(new Parser(tokens).ParseGrammar()); }
/// <summary> /// Do not call this constructor directly. Initializes a new instance of the /// <see cref="Token"/> class. Call <see cref="TokenTextIndex.Token(int, int, TokenKind)"/> instead. /// </summary> /// <param name="factory">The token text index into which this token forms a pointer to.</param> /// <param name="offset">The offset in <paramref name="factory"/> from which this token /// starts.</param> /// <param name="length">The length of text represented by this token.</param> /// <param name="kind">The kind of token this is.</param> internal Token(TokenTextIndex factory, int offset, int length, TokenKind kind) { string underlyingString = factory.Text; Debug.Assert(offset + length <= underlyingString.Length); _tokenFactory = factory; this.Offset = offset; this.Length = length; this.Kind = kind; }
private static IEnumerable <Token> LexImpl(TokenTextIndex tokenFactory) { string text = tokenFactory.Text; LexerState state = LexerState.SkipWhitespace; int tokenStart = 0; int multiLineCommentStart = 0; int valueLeftBraceDepth = 0; for (int idx = 0; idx < text.Length; ++idx) { char ch = text[idx]; // Note: The "error detection" cases are later in the switch because we // expect them to be visited less often. (and the C# compiler emits the branches // in order) switch (state) { case LexerState.SkipWhitespace: // Putting Default first because we expect most of the time to be skipping // whitespace. tokenStart = idx; switch (ch) { case ' ': case '\t': case '\r': case '\n': case '\u2028': case '\u2029': // Skip whitespace break; case '\'': state = LexerState.CollectingString; break; case '/': state = LexerState.CommentCandidate; break; case '|': yield return(tokenFactory.Token(idx, TokenKind.Pipe)); break; case ':': yield return(tokenFactory.Token(idx, TokenKind.Colon)); break; case ';': yield return(tokenFactory.Token(idx, TokenKind.Semicolon)); break; case '.': state = LexerState.DotsCandidate; break; case '(': yield return(tokenFactory.Token(idx, TokenKind.Lparen)); break; case ')': yield return(tokenFactory.Token(idx, TokenKind.Rparen)); break; case '*': yield return(tokenFactory.Token(idx, TokenKind.Star)); break; case '+': yield return(tokenFactory.Token(idx, TokenKind.Plus)); break; case '?': yield return(tokenFactory.Token(idx, TokenKind.Question)); break; default: state = LexerState.CollectingIdentifier; break; } break; case LexerState.CollectingString: if (ch == '\'') { yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.String)); state = LexerState.SkipWhitespace; } break; case LexerState.SkipSingleLineComment: switch (ch) { case '\r': case '\n': case '\u2028': case '\u2029': state = LexerState.SkipWhitespace; break; } break; case LexerState.CommentCandidate: switch (ch) { case '/': state = LexerState.SkipSingleLineComment; break; case '*': state = LexerState.MultiLineComment; multiLineCommentStart = idx - 1; break; default: throw new G4ParseFailureException(tokenFactory.Location(idx - 1), Strings.UnrecognizedForwardSlash); } break; case LexerState.MultiLineComment: switch (ch) { case '*': state = LexerState.MultiLineCommentStar; break; case '@': state = LexerState.CollectingAnnotation; tokenStart = idx; break; case '{': state = LexerState.CollectingAnnotationValue; tokenStart = idx; break; } break; case LexerState.MultiLineCommentStar: switch (ch) { case '*': // Do nothing, e.g. in case *****/ break; case '@': state = LexerState.CollectingAnnotation; tokenStart = idx; break; case '{': state = LexerState.CollectingAnnotationValue; tokenStart = idx; break; case '/': state = LexerState.SkipWhitespace; break; default: state = LexerState.MultiLineComment; break; } break; case LexerState.CollectingAnnotation: switch (ch) { case ' ': case '\t': case '\r': case '\n': case '\u2028': case '\u2029': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation)); state = LexerState.MultiLineComment; break; case '*': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation)); state = LexerState.MultiLineCommentStar; break; case '{': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation)); valueLeftBraceDepth = 0; state = LexerState.CollectingAnnotationValue; tokenStart = idx; break; case '@': throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnrecognizedAtInAnnotation); } break; case LexerState.CollectingAnnotationValue: switch (ch) { case '{': valueLeftBraceDepth++; break; case '}': if (valueLeftBraceDepth > 0) { valueLeftBraceDepth--; } else { yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.AnnotationValue)); state = LexerState.MultiLineComment; } break; case '*': state = LexerState.CollectingAnnotationValueStar; break; } break; case LexerState.CollectingIdentifier: switch (ch) { case ' ': case '\t': case '\r': case '\n': case '\u2028': case '\u2029': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); state = LexerState.SkipWhitespace; break; case '\'': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); tokenStart = idx; state = LexerState.CollectingString; break; case '/': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); state = LexerState.CommentCandidate; break; case '|': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Pipe)); state = LexerState.SkipWhitespace; break; case ':': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Colon)); state = LexerState.SkipWhitespace; break; case ';': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Semicolon)); state = LexerState.SkipWhitespace; break; case '.': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); tokenStart = idx; state = LexerState.DotsCandidate; break; case '(': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Lparen)); state = LexerState.SkipWhitespace; break; case ')': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Rparen)); state = LexerState.SkipWhitespace; break; case '*': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Star)); state = LexerState.SkipWhitespace; break; case '+': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Plus)); state = LexerState.SkipWhitespace; break; case '?': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Question)); state = LexerState.SkipWhitespace; break; } break; case LexerState.CollectingAnnotationValueStar: switch (ch) { case '}': yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.AnnotationValue)); state = LexerState.MultiLineComment; break; case '/': throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnclosedAnnotation); default: state = LexerState.CollectingAnnotationValue; break; } break; case LexerState.DotsCandidate: switch (ch) { case '.': yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.Dots)); state = LexerState.SkipWhitespace; break; default: throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.SingleDot); } break; } } switch (state) { case LexerState.CollectingIdentifier: yield return(tokenFactory.Token(tokenStart, text.Length, TokenKind.Identifier)); break; case LexerState.MultiLineComment: case LexerState.MultiLineCommentStar: case LexerState.CollectingAnnotation: case LexerState.CollectingAnnotationValue: case LexerState.CollectingAnnotationValueStar: throw new G4ParseFailureException(tokenFactory.Location(multiLineCommentStart), Strings.UnclosedMultiLineComment); case LexerState.CommentCandidate: throw new G4ParseFailureException(tokenFactory.Location(text.Length), Strings.UnrecognizedForwardSlash); case LexerState.CollectingString: throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnclosedString); case LexerState.DotsCandidate: throw new G4ParseFailureException(tokenFactory.Location(text.Length), Strings.SingleDot); case LexerState.SkipWhitespace: case LexerState.SkipSingleLineComment: // OK (do nothing) break; } }
/// <summary>Lexes text in the given <see cref="TokenTextIndex"/>.</summary> /// <exception cref="G4ParseFailureException">Thrown when the input is not lexically valid.</exception> /// <param name="tokenFactory">The token factory from which lexical analysis will be performed.</param> /// <returns>An <see cref="ImmutableArray{Token}"/> containing the generated tokens.</returns> public static ImmutableArray <Token> Lex(TokenTextIndex tokenFactory) { return(LexImpl(tokenFactory).ToImmutableArray()); }
//private static IDisposable sw; /// <summary>Main entry-point for this application.</summary> /// <param name="args">Array of command-line argument strings.</param> /// <returns>Exit-code for the process - 0 for success, else an error code.</returns> public static int Main(string[] args) { using (var config = new DataModelGeneratorConfiguration()) { if (!config.ParseArgs(args)) { return(1); } string inputText; using (var sr = new StreamReader(config.InputStream)) { inputText = sr.ReadToEnd(); } GrammarSymbol grammar; try { var factory = new TokenTextIndex(inputText); System.Collections.Immutable.ImmutableArray <Token> tokens = Lexer.Lex(factory); var parser = new Parser(tokens); grammar = parser.ParseGrammar(); } catch (G4ParseFailureException ex) { Console.Error.WriteLine(ex.Message); return(1); } var builder = new DataModelBuilder(); for (int idx = 1; idx < grammar.Children.Length; ++idx) { try { GrammarSymbol prod = grammar.Children[idx]; builder.CompileProduction(prod); } catch (G4ParseFailureException ex) { Console.WriteLine(Strings.FailedToCompileProduction + ex.Message); } } string generatedCSharp; string generatedJsonSchema; try { DataModel model = builder.Link(config.InputFilePath, DataModelMetadata.FromGrammar(grammar)); model = MemberHoister.HoistTypes(model); var cr = new CodeWriter(); CodeGenerator.WriteDataModel(cr, model); generatedCSharp = cr.ToString(); cr = new CodeWriter(2, ' '); JsonSchemaGenerator.WriteSchema(cr, model); generatedJsonSchema = cr.ToString(); } catch (G4ParseFailureException ex) { Console.Error.WriteLine(ex.Message); return(1); } if (config.ToConsole) { Console.WriteLine(generatedCSharp); } using (var sw = new StreamWriter(config.OutputStream)) { sw.Write(generatedCSharp); } string jsonSchemaPath = Path.GetFileNameWithoutExtension(config.OutputFilePath); jsonSchemaPath = Path.Combine(Path.GetDirectoryName(config.OutputFilePath), jsonSchemaPath + ".schema.json"); File.WriteAllText(jsonSchemaPath, generatedJsonSchema); return(0); } }