internal HsonParserException(SourcePosition source, string messageFormat, params object[] args) : base(String.Format("HSON parser error in '{0}' at line {1}({2})", source.Name, source.LineNumber, source.LinePosition) + ": " + String.Format(messageFormat, args)) { SourcePosition = source; }
public Token(SourcePosition source, TokenType type, string text) : this(source, type) { Text = text; }
/// <summary> /// This function parses HSON and emits JSON, but not necessarily well-formed JSON. The JSON subset of HSON is /// only superficially parsed to clean out comments and reparse multi-line string literals. /// </summary> /// <returns></returns> public IEnumerator<Token> Read() { using (var fs = new FileStream(this.path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize)) using (var sr = new StreamReader(fs, encoding, detectEncodingFromByteOrderMarks, bufferSize)) { // Track the last read position and next position in the source: SourcePosition posRead = new SourcePosition(this.path, 1, 1), posNext = new SourcePosition(this.path, 1, 1); // Reads the next character and keeps track of current position in the source: Func<int> readNext = () => { posRead = posNext; // Read single chars at a time, relying on buffered reads for performance. // Attempt to read a single char from the input stream: int x = sr.Read(); // EOF? if (x == -1) return x; // CR does not affect output position: else if (x == '\r') return x; // LF affects output position: else if (x == '\n') posNext = new SourcePosition(posRead.Name, posRead.LineNumber + 1, 1); // TODO: How to treat '\t'? else posNext = new SourcePosition(posRead.Name, posRead.LineNumber, posRead.LinePosition + 1); return x; }; int c, c2; c = readNext(); while (c != -1) { // Parse comments and don't emit them: if (c == '/') { c2 = readNext(); if (c2 == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); if (c2 == '/') { // single line comment c = readNext(); while (c != -1) { // Presence of an '\r' is irrelevant since we're not consuming it for storage. // Stop at '\n': if (c == '\n') { c = readNext(); break; } else if (c == '\r') { c = readNext(); } else c = readNext(); } } else if (c2 == '*') { // block comment c = readNext(); while (c != -1) { // Read up until '*/': if (c == '*') { c = readNext(); if (c == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); else if (c == '/') break; else c = readNext(); } else c = readNext(); } if (c == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); c = readNext(); continue; } // Not either comment type: else throw new HsonParserException(posRead, "Unknown comment type"); } else if (c == '@') { c = readNext(); if (c == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); SourcePosition emitSource = posRead; // @"multi-line string literal": if (c == '"') { // Parse the multiline string and emit a string literal token: StringBuilder emit = new StringBuilder(); c = readNext(); if (c == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); while (c != -1) { // Is it a terminating '"' or a double '""'? if (c == '"') { c = readNext(); if (c == '"') { // Double quote chars are emitted as a single escaped quote char: emit.Append('\\'); emit.Append('"'); c = readNext(); } else { // Exit: break; } } else if (c == '\\') { // Backslashes have no special meaning in multiline strings, pass them through as escaped: emit.Append('\\'); emit.Append('\\'); c = readNext(); } else if (c == '\r') { emit.Append('\\'); emit.Append('r'); c = readNext(); } else if (c == '\n') { emit.Append('\\'); emit.Append('n'); c = readNext(); } else { // Emit any other regular char: emit.Append((char)c); c = readNext(); } if (c == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); } // Yield the string literal token: yield return new Token(emitSource, TokenType.StringLiteral, emit.ToString()); } // @directive... else if (Char.IsLetter((char)c)) { // Read the word up to the next non-word char: StringBuilder sbDirective = new StringBuilder("import".Length); sbDirective.Append((char)c); while ((c = readNext()) != -1) { if (!Char.IsLetter((char)c)) break; sbDirective.Append((char)c); } if (c == -1) throw new HsonParserException(posRead, "Unexpected end of directive"); string directive = sbDirective.ToString(); if (directive == "import") { // @import directive if (c != '(') throw new HsonParserException(posRead, "Expected '('"); c = readNext(); // Parse a string argument: if (c != '"') throw new HsonParserException(posRead, "Expected '\"'"); StringBuilder sbValue = new StringBuilder(80); while ((c = readNext()) != -1) { if (c == '"') break; sbValue.Append((char)c); } if (c != '"') throw new HsonParserException(posRead, "Expected '\"'"); c = readNext(); if (c != ')') throw new HsonParserException(posRead, "Expected ')'"); c = readNext(); // Call the import function to get an IEnumerator<Token> to stream its output through to our caller: string path = sbValue.ToString(); using (var imported = Import(path)) { while (imported.MoveNext()) { yield return imported.Current; } } } else { throw new HsonParserException(posRead, "Unknown directive, '@{0}'", directive); } } else { throw new HsonParserException(posRead, "Unknown @directive"); } } else if (c == '"') { // Parse the string literal: SourcePosition emitSource = posRead; c = readNext(); if (c == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); StringBuilder emit = new StringBuilder(80); while (c != -1) { if (c == '"') { // Exit: break; } else if (c == '\\') { // We don't care what escape sequence it is so long as we handle the '\"' case properly. // Emit the '\': emit.Append((char)c); // An early-terminated escape sequence is an error: c = readNext(); if (c == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); // Emit the escaped char too: emit.Append((char)c); c = readNext(); } else { // Emit regular characters: emit.Append((char)c); c = readNext(); } if (c == -1) throw new HsonParserException(posRead, "Unexpected end of stream"); } yield return new Token(emitSource, TokenType.StringLiteral, emit.ToString()); c = readNext(); } // Don't actually parse the underlying JSON, just recognize its basic tokens: else if (Char.IsWhiteSpace((char)c)) { // Don't emit whitespace runs as a token. c = readNext(); } else if (c == '{') { yield return new Token(posRead, TokenType.OpenCurly); c = readNext(); } else if (c == '[') { yield return new Token(posRead, TokenType.OpenBracket); c = readNext(); } else if (c == ',') { yield return new Token(posRead, TokenType.Comma); c = readNext(); } else if (c == ':') { yield return new Token(posRead, TokenType.Colon); c = readNext(); } else if (c == ']') { yield return new Token(posRead, TokenType.CloseBracket); c = readNext(); } else if (c == '}') { yield return new Token(posRead, TokenType.CloseCurly); c = readNext(); } // FIXME: what's '_' doing here? else if (Char.IsLetterOrDigit((char)c) || c == '_' || c == '.') { SourcePosition runStart = posRead; StringBuilder emit = new StringBuilder(); while ((c != -1) && (Char.IsLetterOrDigit((char)c) || c == '_' || c == '.')) { emit.Append((char)c); c = readNext(); } yield return new Token(runStart, TokenType.Raw, emit.ToString()); } else throw new HsonParserException(posRead, "Unexpected character '{0}'", (char)c); } } }
public Token(SourcePosition source, TokenType type) : this() { Source = source; TokenType = type; }