protected override Token <String> Match(TokenizableDataStream <String> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); bool whiteSpace = false; int newlines = 0; int column = streamPos.Column; while (!data.AtEnd() && String.IsNullOrWhiteSpace(data.CurrentItem)) { whiteSpace = true; if (data.CurrentItem == "\n") { newlines++; column = 0; } else { column++; } data.Advance(); } if (whiteSpace) { streamPos = new SourcePosition(start.Line + newlines, column, data.CurrentIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <String>(TokenType.WhiteSpace, null, start, end)); } return(null); }
protected override Token <string> Match(TokenizableDataStream <string> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); string comment = null; if (data.CurrentItem == "/") { data.Advance(); if (data.CurrentItem == "/") { data.Advance(); while (!data.AtEnd()) { if (data.CurrentItem == "\n") { break; } comment += data.CurrentItem; data.Advance(); } comment ??= ""; streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <string>(TokenType.SingleLineComment, comment, start, end)); } } return(null); }
protected override Token <string> Match(TokenizableDataStream <string> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); string value = null; if (data.CurrentItem == Delimiter) { data.Advance(); bool inEscape = false; for (; !data.AtEnd(); data.Advance()) { if (inEscape) { inEscape = false; switch (data.CurrentItem) { case "\\": case Delimiter: value += data.CurrentItem; continue; default: log.LogError(@$ "Unrecognized escape sequence: '\{data.CurrentItem}'", new SourcePosition(streamPos)); return(null); } } if (data.CurrentItem == "\\") { inEscape = true; continue; } if (data.CurrentItem == Delimiter) { break; } if (data.CurrentItem == "\n") { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); log.LogError("Name Literals can not contain line breaks!", start, new SourcePosition(streamPos)); return(null); } value += data.CurrentItem; } if (data.CurrentItem == Delimiter) { data.Advance(); value ??= "None"; //empty name literals should be interpreted as 'None' } else { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); log.LogError("Name Literal was not terminated properly!", start, new SourcePosition(streamPos)); return(null); } }
protected override Token <String> Match(TokenizableDataStream <String> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); TokenType type; String value; String first = SubNumber(data, new Regex("[0-9]")); if (first == null) { return(null); } if (data.CurrentItem == "x") { if (first != "0") { return(null); } data.Advance(); String hex = SubNumber(data, new Regex("[0-9a-fA-F]")); if (hex == null || data.CurrentItem == "." || data.CurrentItem == "x") { return(null); } hex = Convert.ToInt32(hex, 16).ToString("D"); type = TokenType.IntegerNumber; value = hex; } else if (data.CurrentItem == ".") { data.Advance(); String second = SubNumber(data, new Regex("[0-9]")); if (second == null || data.CurrentItem == "." || data.CurrentItem == "x") { return(null); } type = TokenType.FloatingNumber; value = first + "." + second; } else { type = TokenType.IntegerNumber; value = first; } streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <String>(type, value, start, end)); }
private static string SubNumber(TokenizableDataStream <string> data, Regex regex) { string number = null; string peek = data.CurrentItem; while (!data.AtEnd() && regex.IsMatch(peek)) { number += peek; data.Advance(); peek = data.CurrentItem; } return(number); }
protected override Token <string> Match(TokenizableDataStream <string> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); String value = null; if (data.CurrentItem == "\"") { data.Advance(); String prev = ""; while (!data.AtEnd()) { if (data.CurrentItem == "\"" && prev != "\\") { break; } else if (data.CurrentItem == "\n") { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); log.LogError("String Literals can not contain line breaks!", start, new SourcePosition(streamPos)); return(null); } value += data.CurrentItem; prev = data.CurrentItem; data.Advance(); } if (data.CurrentItem == "\"") { data.Advance(); if (value == null) { value = ""; } } else { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); log.LogError("String Literal was not terminated properly!", start, new SourcePosition(streamPos)); return(null); } } if (value != null) { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <String>(TokenType.String, value, start, end)); } return(null); }
public Token <T> MatchNext(TokenizableDataStream <T> data, ref SourcePosition streamPos, MessageLog log) { data.PushSnapshot(); Token <T> token = Match(data, ref streamPos, log); if (token == null) { data.PopSnapshot(); } else { data.DiscardSnapshot(); } return(token); }
protected override Token <string> Match(TokenizableDataStream <string> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); string peek = data.CurrentItem; if (peek != "$") { return(null); } data.Advance(); peek = data.CurrentItem; bool isNegative = false; if (peek == "-") { isNegative = true; data.Advance(); peek = data.CurrentItem; } string number = null; var regex = new Regex("[0-9]"); while (!data.AtEnd() && regex.IsMatch(peek)) { number += peek; data.Advance(); peek = data.CurrentItem; } if (number == null) { return(null); } if (isNegative) { number = $"-{number}"; } streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <string>(TokenType.StringRefLiteral, number, start, end)); }
private String SubNumber(TokenizableDataStream <String> data, Regex regex) { String number = null; String peek = data.CurrentItem; while (!data.AtEnd() && regex.IsMatch(peek)) { number += peek; data.Advance(); peek = data.CurrentItem; } peek = data.CurrentItem; bool hasDelimiter = String.IsNullOrWhiteSpace(peek) || Delimiters.Any(c => c.Keyword == peek) || peek == "x" || peek == "."; return(number != null && hasDelimiter ? number : null); }
public void TestStreamTokenizer() { String source = "123456789ABCDEF"; Func <List <String> > input = () => source.ToCharArray() .Select(i => i.ToString(CultureInfo.InvariantCulture)) .ToList(); TokenizableDataStream <String> data = new TokenizableDataStream <String>(input); // Take base snapshot for later match data.PushSnapshot(); // Match CurrentItem result Assert.AreEqual(data.CurrentItem, "1"); // Match advance data.Advance(3); Assert.AreEqual(data.CurrentItem, "4"); // Snapshot and advance and then pop and ensure result data.PushSnapshot(); data.Advance(); data.PopSnapshot(); Assert.AreEqual(data.CurrentItem, "4"); // Snapshot, advance, discard snapshot and ensure result data.PushSnapshot(); data.Advance(3); data.DiscardSnapshot(); Assert.AreEqual(data.CurrentItem, "7"); // Pop snapshot and assert that we are once again at the start data.PopSnapshot(); Assert.AreEqual(data.CurrentItem, "1"); // Test AtEnd for non-eof Assert.IsFalse(data.AtEnd()); // Advance too far and ensure that AtEnd returns data.PushSnapshot(); data.Advance(23); Assert.IsTrue(data.AtEnd()); // Assert that currentItem is null when out of range Assert.IsNull(data.CurrentItem); // Pop back to start and assert that LookAhead returns desired item data.PopSnapshot(); Assert.AreEqual(data.LookAhead(4), "5"); // Assert that LookAhead returns null once out of range Assert.IsNull(data.LookAhead(42)); }
public void TestStreamTokenizer() { String source = "123456789ABCDEF"; Func<List<String>> input = () => source.ToCharArray() .Select(i => i.ToString(CultureInfo.InvariantCulture)) .ToList(); TokenizableDataStream<String> data = new TokenizableDataStream<String>(input); // Take base snapshot for later match data.PushSnapshot(); // Match CurrentItem result Assert.AreEqual(data.CurrentItem, "1"); // Match advance data.Advance(3); Assert.AreEqual(data.CurrentItem, "4"); // Snapshot and advance and then pop and ensure result data.PushSnapshot(); data.Advance(); data.PopSnapshot(); Assert.AreEqual(data.CurrentItem, "4"); // Snapshot, advance, discard snapshot and ensure result data.PushSnapshot(); data.Advance(3); data.DiscardSnapshot(); Assert.AreEqual(data.CurrentItem, "7"); // Pop snapshot and assert that we are once again at the start data.PopSnapshot(); Assert.AreEqual(data.CurrentItem, "1"); // Test AtEnd for non-eof Assert.IsFalse(data.AtEnd()); // Advance too far and ensure that AtEnd returns data.PushSnapshot(); data.Advance(23); Assert.IsTrue(data.AtEnd()); // Assert that currentItem is null when out of range Assert.IsNull(data.CurrentItem); // Pop back to start and assert that LookAhead returns desired item data.PopSnapshot(); Assert.AreEqual(data.LookAhead(4), "5"); // Assert that LookAhead returns null once out of range Assert.IsNull(data.LookAhead(42)); }
protected override Token <string> Match(TokenizableDataStream <string> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); String value = null; Regex regex = new Regex("[0-9a-zA-Z_]"); if (data.CurrentItem == "'") { data.Advance(); while (!data.AtEnd() && regex.IsMatch(data.CurrentItem)) { value += data.CurrentItem; data.Advance(); } if (data.CurrentItem == "'") { data.Advance(); if (value == null) { value = ""; } } else { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); log.LogError("Name Literal was not terminated properly!", start, new SourcePosition(streamPos)); return(null); } } if (value != null) { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <String>(TokenType.Name, value, start, end)); } return(null); }
protected override Token <String> Match(TokenizableDataStream <String> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); String peek = data.CurrentItem; String word = null; while (!data.AtEnd() && !String.IsNullOrWhiteSpace(peek) && Delimiters.All(d => d.Keyword != peek) && peek != "\"" && peek != "'") { word += peek; data.Advance(); peek = data.CurrentItem; } if (word != null) { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <String>(TokenType.Word, word, start, end)); } return(null); }
protected override Token <string> Match(TokenizableDataStream <string> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); string peek = data.CurrentItem; string word = null; loopStart: while (!data.AtEnd() && !string.IsNullOrWhiteSpace(peek) && (Delimiters.All(d => d.Keyword != peek)) && peek != "\"" && peek != "'") { word += peek; data.Advance(); peek = data.CurrentItem; } //HACK: there are variable names that include the c++ scope operator '::' for some godforsaken reason if (peek == ":" && data.LookAhead(1) == ":") { word += peek; data.Advance(); peek = data.CurrentItem; word += peek; data.Advance(); peek = data.CurrentItem; goto loopStart; } if (word != null) { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <string>(TokenType.Word, word, start, end)); } return(null); }
protected override Token <string> Match(TokenizableDataStream <string> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); foreach (char c in Keyword) { if (!string.Equals(data.CurrentItem, c.ToString(CultureInfo.InvariantCulture), StringComparison.CurrentCultureIgnoreCase)) { return(null); } data.Advance(); } string peek = data.CurrentItem; bool hasDelimiter = string.IsNullOrWhiteSpace(peek) || Delimiters.Any(c => c.Keyword == peek); if (SubString || (!SubString && hasDelimiter)) { streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <string>(Type, Keyword, start, end)); } return(null); }
public LexerBase(TokenizableDataStream <T> data) { Data = data; }
protected LexerBase(TokenizableDataStream <T> data) { Data = data; }
protected override Token <string> Match(TokenizableDataStream <string> data, ref SourcePosition streamPos, MessageLog log) { SourcePosition start = new SourcePosition(streamPos); TokenType type; string value; string first = SubNumber(data, digits); if (first == null) { return(null); } if (data.CurrentItem == "x") { if (first != "0") { return(null); } data.Advance(); string hex = SubNumber(data, hexDigits); if (hex == null || data.CurrentItem == "." || data.CurrentItem == "x") { return(null); } hex = Convert.ToInt32(hex, 16).ToString("D"); type = TokenType.IntegerNumber; value = hex; } else if (data.CurrentItem == "." || data.CurrentItem.CaseInsensitiveEquals("e") || data.CurrentItem.CaseInsensitiveEquals("d")) { type = TokenType.FloatingNumber; string second = null; if (data.CurrentItem == ".") { data.Advance(); second = SubNumber(data, digits); } if (data.CurrentItem.CaseInsensitiveEquals("e") || data.CurrentItem.CaseInsensitiveEquals("d")) { data.Advance(); string exponent = SubNumber(data, digits); if (exponent == null || data.CurrentItem == "." || data.CurrentItem == "x") { return(null); } value = $"{first}.{second ?? "0"}e{exponent}"; } else if (second == null && data.CurrentItem == "f") { data.Advance(); value = $"{first}.0"; } else { if (second == null && data.CurrentItem == "f") { data.Advance(); } if (second == null || data.CurrentItem == "." || data.CurrentItem == "x") { return(null); } value = $"{first}.{second}"; } if (data.CurrentItem == "f") { data.Advance(); } } else { type = TokenType.IntegerNumber; value = first; } string peek = data.CurrentItem; bool hasDelimiter = string.IsNullOrWhiteSpace(peek) || Delimiters.Any(c => c.Keyword == peek); if (!hasDelimiter) { return(null); } streamPos = streamPos.GetModifiedPosition(0, data.CurrentIndex - start.CharIndex, data.CurrentIndex - start.CharIndex); SourcePosition end = new SourcePosition(streamPos); return(new Token <string>(type, value, start, end)); }
protected abstract Token <T> Match(TokenizableDataStream <T> data, ref SourcePosition streamPos, MessageLog log);