private static void matchSymbol(Scanner s) { if (!s.isInitial()) return; s.readChar(); while (s.isSubsequent()) s.readChar(); }
// Remove the '"' delimiters surrounding the token that came // back from the lexer. Also 'unescape' any backslashes. private static string unescape(string s) { // Remove surrounding quotes s = s.Substring(1, s.Length - 2); // Regex.Unescape solves the problem of converting \n, \t etc // for us. return System.Text.RegularExpressions.Regex.Unescape(s); }
private static bool matchSymbol(Scanner s) { if (!s.isInitial()) return false; s.readChar(); while (s.isSubsequent()) s.readChar(); return true; }
public static IEnumerable<Datum> ReadDatums(Assembly assembly, string resourceFile) { var stream = assembly.GetManifestResourceStream(resourceFile); if (stream == null) throw new Exception(string.Format("Unable to find '{0}' embedded resource", resourceFile)); var s = new Scanner(new StreamReader(stream)) { Filename = resourceFile }; var p = new Parser(s); Datum d; while ((d = p.parse()) != null) { yield return d; } }
public Parser(Scanner s) { this.s = s; initTokens(s.Scan()); }
// .Net method support. // Check if we received a "." or a ".symbol". // Given them different tokens so that the parser // can expand appropriately. private static TokenType? matchDot(Scanner s) { if (s.peek() != '.') return null; s.readChar(); matchSymbol(s); return s.sb.Length > 1 ? TokenType.Symbol : TokenType.Dot; }
// TODO: // WE should implement actual reader macros for this part of things. private static TokenType? matchHash(Scanner s) { if (s.peek() != '#') return null; s.readChar(); if(s.peek() == '(') { s.readChar(); return TokenType.VectorOpen; } if(s.isOneOf("tfTF")) { s.readChar(); return TokenType.Boolean; } throw s.fail("Unrecognized token"); }
// This code is pretty hairy - the problem is // distinguishing between: // 1. a floating point number, which may be // in scientific notification, and might simply begin // with "." // 2. A symbol with a dot in it // 3. Just a lone "." used for separating lists // It's doubtful that it's entirely correct - but if you find a bug, // add it as a test to ScannerTest and amend this logic. private static TokenType? matchNumber(Scanner s) { if(s.peek() == '.') return leadingFloat(s); if(s.isOneOf("+-")) { s.readChar(); if(s.peek() == '.') return leadingFloat(s); var num = unsignedNumber(s); if (num != null) return num; matchSymbol(s); return TokenType.Symbol; } return unsignedNumber(s); }
private static TokenType? leadingFloat(Scanner s) { var floatToken = remainingFloat(s); if (s.sb.Length == 1) { matchSymbol(s); return s.sb.Length == 1 ? TokenType.Dot : TokenType.Symbol; } return floatToken; }
private static TokenType? unsignedNumber(Scanner s) { if (!s.isDigit()) return null; while (s.isDigit()) s.readChar(); if (s.isExponent()) return readExponent(s); if (s.peek() != '.') return TokenType.Integer; return remainingFloat(s); }
private static TokenType? remainingFloat(Scanner s) { s.readChar(); // Skip the '.' while (s.isDigit() || s.isExponent()) { if (s.isExponent()) return readExponent(s); s.readChar(); } return TokenType.Double; }
private static TokenType? readExponent(Scanner s) { s.readChar(); // Skip exponent market var read = false; if (s.isOneOf("+-")) { s.readChar(); read = true; } while (s.isDigit()) { read = true; s.readChar(); } if(read) return TokenType.Double; // It's not a floating point number in exponent format. // Read it as a symbol instead. matchSymbol(s); return TokenType.Symbol; }
public Parser(Scanner s) { this.s = s; // Skip whitespace and comments tokens = s.Scan().Where(token => token.Type != TokenType.Space && token.Type != TokenType.Comment).GetEnumerator(); }