private static Token BlockComment(Tokenizer t) { int i = 0, nest = 1; if (!t.IsReadable(i + 1) || t.Read(i, 2) != "/*") { return Token.Empty; } for (i = 2; t.IsReadable(i + 1); i++) { if (t.Read(i, 2) == "*/") { ++i; if (--nest == 0) { break; } } if (t.Read(i, 2) == "/*") { ++i; ++nest; } } return t.TakeToken(++i, TokenType.BlockComment); }
public void DigitStartString(string text, string eText, TokenType eType) { var t = new Tokenizer(text, string.Empty); var token = (Token)typeof(Lexer).Invoke("DigitStartString", t); Assert.That(token.Text, Is.EqualTo(eText)); Assert.That(token.TokenType, Is.EqualTo(eType)); }
public void BlockComment(string text, string eText, TokenType eType) { var t = new Tokenizer(text, string.Empty); var token = (Token)typeof(Lexer).Invoke("BlockComment", t); Assert.That(token.Text, Is.EqualTo(eText)); Assert.That(token.TokenType, Is.EqualTo(eType)); }
public static TokenCollection Lex(string text, string fileName) { var tokenList = new List<Token>(); var errorToken = new List<Token>(); var t = new Tokenizer(text, fileName); while (t.IsReadable()) { LexPartion(t, tokenList, errorToken); } return new TokenCollection(text, fileName, tokenList, errorToken, t.Position); }
public void BuiltInExpression(string text, string[] eTexts, TokenType[] eTypes) { var t = new Tokenizer(text, string.Empty); var tokenList = new List<Token>(); var errorToken = new List<Token>(); typeof(Lexer).Invoke("BuiltInExpression", t, tokenList, errorToken); Assert.That(tokenList, Is.All.Not.Null); Assert.That(List.Map(tokenList).Property("Text"), Is.EqualTo(eTexts)); Assert.That(List.Map(tokenList).Property("TokenType"), Is.EqualTo(eTypes)); Assert.That(errorToken.Count, Is.EqualTo(0)); }
private static Token DigitStartString(Tokenizer t) { int i; for (i = 0; t.IsReadable(i); i++) { if (t.MatchRange(i, '0', '9')) { continue; } if (i > 0 && (t.MatchRange(i, 'a', 'z') || t.MatchRange(i, 'A', 'Z') || t.MatchAny(i, "_"))) { continue; } break; } return t.TakeToken(i, TokenType.DigitStartString); }
public void TakeToken(string text, int length, TokenType type, string eText, bool eLine) { Tokenizer t = new Tokenizer(text, "file"); var token = t.TakeToken(length, type); if (!token) { Assert.That(token, Is.EqualTo(Token.Empty)); } else { Assert.That(token.Text, Is.EqualTo(eText)); Assert.That(token.TokenType, Is.EqualTo(type)); Assert.That(token.Position.File, Is.EqualTo("file")); Assert.That(token.Position.Total, Is.EqualTo(0)); Assert.That(token.Position.Row, Is.EqualTo(0)); Assert.That(token.Position.Length, Is.EqualTo(length)); Assert.That(token.Position.Line, Is.EqualTo(1)); Assert.That(t.Position.Total, Is.EqualTo(length)); Assert.That(t.Position.Row, Is.EqualTo(eLine ? 0 : length)); Assert.That(t.Position.Length, Is.EqualTo(0)); Assert.That(t.Position.Line, Is.EqualTo(eLine ? 2 : 1)); } }
private static void BuiltInExpression(Tokenizer t, List<Token> tokenList, List<Token> errorToken) { if (!t.IsReadable(0) || !t.MatchAny(0, "{")) { return; } var result = new List<Token>(); int nest = 0; while (t.IsReadable()) { var tt = LexPartion(t, tokenList, errorToken); if (tt == TokenType.LeftBrace) { ++nest; } if (tt == TokenType.RightBrace) { if (--nest == 0) { break; } } } }
private static bool StringLiteral(Tokenizer t, List<Token> tokenList, List<Token> errorToken) { if (!t.IsReadable(0) || !t.MatchAny(0, "\'\"`")) { return false; } string quote = t.Read(0, 1); bool escape = false; tokenList.Add(t.TakeToken(1, TokenType.QuoteSeparator)); int i; for (i = 0; t.IsReadable(i); i++) { if (!escape && t.MatchAny(i, quote)) { if (i > 0) { tokenList.Add(t.TakeToken(i, TokenType.PlainText)); } tokenList.Add(t.TakeToken(1, TokenType.QuoteSeparator)); return true; } if (!escape && t.MatchAny(i, "{")) { if (i > 0) { tokenList.Add(t.TakeToken(i, TokenType.PlainText)); } BuiltInExpression(t, tokenList, errorToken); i = -1; } else if (t.MatchAny(i, "\\")) { escape = !escape; continue; } escape = false; } tokenList.Add(t.TakeToken(i, TokenType.PlainText)); return true; }
private static Token TriplePunctuator(Tokenizer t) { TokenType type = TokenType.Unknoun; string sub = t.Read(0, 3); switch (sub) { case ":=:": type = TokenType.Swap; break; case "=<>": type = TokenType.Incomparable; break; case "=><": type = TokenType.Incomparable; break; case "<=>": type = TokenType.Incomparable; break; case ">=<": type = TokenType.Incomparable; break; case "<>=": type = TokenType.Incomparable; break; case "><=": type = TokenType.Incomparable; break; case "<<=": type = TokenType.LeftCompose | TokenType.LeftPipeline; break; case "=<<": type = TokenType.LeftCompose | TokenType.RightPipeline; break; case ">>=": type = TokenType.RightCompose | TokenType.LeftPipeline; break; case "=>>": type = TokenType.RightCompose | TokenType.RightPipeline; break; default: return Token.Empty; } return t.TakeToken(3, type); }
public void DisjunctionLexer1() { var tokenizer = new Tokenizer("abc", string.Empty); var token = (Token)typeof(Lexer).Invoke("DisjunctionLexer", tokenizer, new Lexer.LexerFunction[] { t => Token.Empty, t => t.TakeToken(2, TokenType.Unknoun) }); Assert.That(token.Text, Is.EqualTo("ab")); }
private static Token SinglePunctuator(Tokenizer t) { TokenType type = TokenType.Unknoun; string sub = t.Read(0, 1); switch (sub) { case ";": type = TokenType.EndExpression; break; case ":": type = TokenType.Pair; break; case ",": type = TokenType.List; break; case ".": type = TokenType.Access; break; case "#": type = TokenType.Zone; break; case "@": type = TokenType.Attribute; break; case "$": type = TokenType.Lambda; break; case "?": type = TokenType.Reject; break; case "!": type = TokenType.Template; break; case "|": type = TokenType.Typeof; break; case "&": type = TokenType.Refer; break; case "=": type = TokenType.Equal; break; case "<": type = TokenType.LessThan; break; case ">": type = TokenType.GreaterThan; break; case "~": type = TokenType.Combine; break; case "+": type = TokenType.Add; break; case "-": type = TokenType.Subtract; break; case "*": type = TokenType.Multiply; break; case "/": type = TokenType.Divide; break; case "%": type = TokenType.Modulo; break; case "(": type = TokenType.LeftParenthesis; break; case ")": type = TokenType.RightParenthesis; break; case "[": type = TokenType.LeftBracket; break; case "]": type = TokenType.RightBracket; break; case "{": type = TokenType.LeftBrace; break; case "}": type = TokenType.RightBrace; break; default: return Token.Empty; } return t.TakeToken(1, type); }
private static Token LineTerminator(Tokenizer t) { int i = 0; if (t.IsReadable(i) && t.MatchAny(i, "\x0A")) { i++; if (t.IsReadable(i) && t.MatchAny(i, "\x0D")) { i++; } } else if (t.IsReadable(i) && t.MatchAny(i, "\x0D")) { i++; if (t.IsReadable(i) && t.MatchAny(i, "\x0A")) { i++; } } return t.TakeToken(i, TokenType.LineTerminator); }
private static Token OtherString(Tokenizer t) { int i; for (i = 0; t.IsReadable(i); i++) { if (!t.MatchRange(i, '\x00', '\x7F')) { continue; } break; } return t.TakeToken(i, TokenType.OtherString); }
private static TokenType LexPartion(Tokenizer t, List<Token> tokenList, List<Token> errorToken) { Token temp = LineTerminator(t); if(temp) { tokenList.Add(temp); return temp.TokenType; } temp = DisjunctionLexer ( t, WhiteSpace, BlockComment, LineCommnet ); if (temp) { return temp.TokenType; } if (StringLiteral(t, tokenList, errorToken)) { return TokenType.PlainText; } temp = DisjunctionLexer ( t, TriplePunctuator, DoublePunctuator, SinglePunctuator, LetterStartString, DigitStartString ); if (temp) { tokenList.Add(temp); return temp.TokenType; } errorToken.Add(OtherString(t)); return TokenType.OtherString; }
private static Token LineCommnet(Tokenizer t) { int i = 0; if (!t.IsReadable(i + 1)) { return Token.Empty; } if (t.Read(i, 2) != "//" && t.Read(i, 2) != "#!") { return Token.Empty; } for (i = 2; t.IsReadable(i); i++) { if (t.MatchAny(i, "\x0A\x0D")) { break; } } return t.TakeToken(i, TokenType.LineCommnet); }
public void StringLiteral(string text, string[] eTexts, TokenType[] eTypes) { var t = new Tokenizer(text, string.Empty); var tokenList = new List<Token>(); var errorToken = new List<Token>(); var result = (bool)typeof(Lexer).Invoke("StringLiteral", t, tokenList, errorToken); Assert.That(result, Is.EqualTo(eTexts.Length > 0)); Assert.That(tokenList, Is.All.Not.Null); Assert.That(List.Map(tokenList).Property("Text"), Is.EqualTo(eTexts)); Assert.That(List.Map(tokenList).Property("TokenType"), Is.EqualTo(eTypes)); Assert.That(errorToken.Count, Is.EqualTo(0)); }
public void DisjunctionLexer2() { var tokenizer = new Tokenizer("abc", string.Empty); var token = (Token)typeof(Lexer).Invoke("DisjunctionLexer", tokenizer, new Lexer.LexerFunction[] { t => Token.Empty, t => Token.Empty }); Assert.That(token, Is.EqualTo(Token.Empty)); }
public void IsReadable(string text, int index, bool expected) { Tokenizer t = new Tokenizer(text, string.Empty); Assert.That(t.IsReadable(index), Is.EqualTo(expected)); }
private static Token WhiteSpace(Tokenizer t) { int i; for (i = 0; t.IsReadable(i); i++) { if (t.MatchRange(i, '\x00', '\x09') || t.MatchRange(i, '\x0B', '\x0C') || t.MatchRange(i, '\x0E', '\x20') || t.MatchAny(i, "\x7F")) { continue; } break; } return t.TakeToken(i, TokenType.WhiteSpace); }
public void Read(string text, int index, int length, string expected) { Tokenizer t = new Tokenizer(text, string.Empty); Assert.That(t.Read(index, length), Is.EqualTo(expected)); }
public void MatchRange(string text, char start, char end, bool expected) { Tokenizer t = new Tokenizer(text, string.Empty); Assert.That(t.MatchRange(0, start, end), Is.EqualTo(expected)); }
public void MatchAny(string text, string list, bool expected) { Tokenizer t = new Tokenizer(text, string.Empty); Assert.That(t.MatchAny(0, list), Is.EqualTo(expected)); }
private static Token DoublePunctuator(Tokenizer t) { TokenType type = TokenType.Unknoun; string sub = t.Read(0, 2); switch (sub) { case "->": type = TokenType.ReturnArrow; break; case "::": type = TokenType.Separator; break; case "..": type = TokenType.Range; break; case "@@": type = TokenType.Pragma; break; case "##": type = TokenType.Macro; break; case "??": type = TokenType.Nullable; break; case "||": type = TokenType.Or; break; case "&&": type = TokenType.And; break; case "!!": type = TokenType.Not; break; case "++": type = TokenType.Plus; break; case "--": type = TokenType.Minus; break; case "==": type = TokenType.Equal; break; case "<>": type = TokenType.NotEqual; break; case "><": type = TokenType.NotEqual; break; case "<=": type = TokenType.LessThanOrEqual; break; case "=<": type = TokenType.LessThanOrEqual; break; case ">=": type = TokenType.GreaterThanOrEqual; break; case "=>": type = TokenType.GreaterThanOrEqual; break; case "<<": type = TokenType.LeftCompose; break; case ">>": type = TokenType.RightCompose; break; case ":=": type = TokenType.LeftPipeline; break; case "=:": type = TokenType.RightPipeline; break; case "+=": type = TokenType.Add | TokenType.LeftPipeline; break; case "=+": type = TokenType.Add | TokenType.RightPipeline; break; case "-=": type = TokenType.Subtract | TokenType.LeftPipeline; break; case "=-": type = TokenType.Subtract | TokenType.RightPipeline; break; case "*=": type = TokenType.Multiply | TokenType.LeftPipeline; break; case "=*": type = TokenType.Multiply | TokenType.RightPipeline; break; case "/=": type = TokenType.Divide | TokenType.LeftPipeline; break; case "=/": type = TokenType.Divide | TokenType.RightPipeline; break; case "%=": type = TokenType.Modulo | TokenType.LeftPipeline; break; case "=%": type = TokenType.Modulo | TokenType.RightPipeline; break; default: return Token.Empty; } return t.TakeToken(2, type); }
private static Token LetterStartString(Tokenizer t) { int i; bool escape = false; for (i = 0; t.IsReadable(i); i++) { if (escape && t.MatchRange(i, '!', '~')) { escape = false; continue; } if (t.MatchRange(i, 'a', 'z') || t.MatchRange(i, 'A', 'Z') || t.MatchAny(i, "_")) { escape = false; continue; } if (i > 0 && t.MatchRange(i, '0', '9')) { escape = false; continue; } if (t.MatchAny(i, "\\")) { escape = !escape; continue; } break; } return t.TakeToken(i, TokenType.LetterStartString); }
private static Token DisjunctionLexer(Tokenizer t, params LexerFunction[] func) { foreach (var f in func) { var token = f(t); if (token) { return token; } } return Token.Empty; }
public void LineTerminator(string text, string eText, TokenType eType) { var t = new Tokenizer(text, string.Empty); var token = (Token)typeof(Lexer).Invoke("LineTerminator", t); Assert.That(token.Text, Is.EqualTo(eText)); Assert.That(token.TokenType, Is.EqualTo(eType)); }