/// <summary> /// inserts the RTF codes to highlight text blocks /// </summary> /// <param name="token">the token to highlight, will be appended to sb</param> /// <param name="sb">the final output string</param> private void HighlightToken(Token token, StringBuilder sb) { switch (token.Type) { case TokenType.GRAMMARCOMMENTLINE: sb.Append(@"{{\cf1 "); break; case TokenType.GRAMMARCOMMENTBLOCK: sb.Append(@"{{\cf2 "); break; case TokenType.DIRECTIVESTRING: sb.Append(@"{{\cf3 "); break; case TokenType.DIRECTIVEKEYWORD: sb.Append(@"{{\cf4 "); break; case TokenType.DIRECTIVEOPEN: sb.Append(@"{{\cf5 "); break; case TokenType.DIRECTIVECLOSE: sb.Append(@"{{\cf6 "); break; case TokenType.ATTRIBUTEKEYWORD: sb.Append(@"{{\cf7 "); break; case TokenType.CS_KEYWORD: sb.Append(@"{{\cf8 "); break; case TokenType.VB_KEYWORD: sb.Append(@"{{\cf9 "); break; case TokenType.DOTNET_KEYWORD: sb.Append(@"{{\cf10 "); break; case TokenType.DOTNET_TYPES: sb.Append(@"{{\cf11 "); break; case TokenType.CS_COMMENTLINE: sb.Append(@"{{\cf12 "); break; case TokenType.CS_COMMENTBLOCK: sb.Append(@"{{\cf13 "); break; case TokenType.CS_STRING: sb.Append(@"{{\cf14 "); break; case TokenType.VB_COMMENTLINE: sb.Append(@"{{\cf15 "); break; case TokenType.VB_COMMENTBLOCK: sb.Append(@"{{\cf16 "); break; case TokenType.VB_STRING: sb.Append(@"{{\cf17 "); break; case TokenType.DOTNET_COMMENTLINE: sb.Append(@"{{\cf18 "); break; case TokenType.DOTNET_COMMENTBLOCK: sb.Append(@"{{\cf19 "); break; case TokenType.DOTNET_STRING: sb.Append(@"{{\cf20 "); break; case TokenType.CODEBLOCKOPEN: sb.Append(@"{{\cf21 "); break; case TokenType.CODEBLOCKCLOSE: sb.Append(@"{{\cf22 "); break; case TokenType.GRAMMARKEYWORD: sb.Append(@"{{\cf23 "); break; case TokenType.GRAMMARARROW: sb.Append(@"{{\cf24 "); break; case TokenType.GRAMMARSTRING: sb.Append(@"{{\cf25 "); break; default: sb.Append(@"{{\cf0 "); break; } }
public Scanner() { Regex regex; Patterns = new Dictionary<TokenType, Regex>(); Tokens = new List<TokenType>(); LookAheadToken = null; Skipped = new List<Token>(); SkipList = new List<TokenType>(); SkipList.Add(TokenType.WHITESPACE); regex = new Regex(@"\s+", RegexOptions.Compiled); Patterns.Add(TokenType.WHITESPACE, regex); Tokens.Add(TokenType.WHITESPACE); regex = new Regex(@"^$", RegexOptions.Compiled); Patterns.Add(TokenType.EOF, regex); Tokens.Add(TokenType.EOF); regex = new Regex(@"//[^\n]*\n?", RegexOptions.Compiled); Patterns.Add(TokenType.GRAMMARCOMMENTLINE, regex); Tokens.Add(TokenType.GRAMMARCOMMENTLINE); regex = new Regex(@"/\*([^*]+|\*[^/])+(\*/)?", RegexOptions.Compiled); Patterns.Add(TokenType.GRAMMARCOMMENTBLOCK, regex); Tokens.Add(TokenType.GRAMMARCOMMENTBLOCK); regex = new Regex(@"@?\""(\""\""|[^\""])*(""|\n)", RegexOptions.Compiled); Patterns.Add(TokenType.DIRECTIVESTRING, regex); Tokens.Add(TokenType.DIRECTIVESTRING); regex = new Regex(@"^(@TinyPG|@Parser|@Scanner|@Grammar|@ParseTree|@TextHighlighter)", RegexOptions.Compiled); Patterns.Add(TokenType.DIRECTIVEKEYWORD, regex); Tokens.Add(TokenType.DIRECTIVEKEYWORD); regex = new Regex(@"^(@|(%[^>])|=|"")+?", RegexOptions.Compiled); Patterns.Add(TokenType.DIRECTIVESYMBOL, regex); Tokens.Add(TokenType.DIRECTIVESYMBOL); regex = new Regex(@"[^%@=""]+", RegexOptions.Compiled); Patterns.Add(TokenType.DIRECTIVENONKEYWORD, regex); Tokens.Add(TokenType.DIRECTIVENONKEYWORD); regex = new Regex(@"<%", RegexOptions.Compiled); Patterns.Add(TokenType.DIRECTIVEOPEN, regex); Tokens.Add(TokenType.DIRECTIVEOPEN); regex = new Regex(@"%>", RegexOptions.Compiled); Patterns.Add(TokenType.DIRECTIVECLOSE, regex); Tokens.Add(TokenType.DIRECTIVECLOSE); regex = new Regex(@"[^\[\]]", RegexOptions.Compiled); Patterns.Add(TokenType.ATTRIBUTESYMBOL, regex); Tokens.Add(TokenType.ATTRIBUTESYMBOL); regex = new Regex(@"^(Skip|Color|IgnoreCase|FileAndLine)", RegexOptions.Compiled); Patterns.Add(TokenType.ATTRIBUTEKEYWORD, regex); Tokens.Add(TokenType.ATTRIBUTEKEYWORD); regex = new Regex(@"[^\(\)\]\n\s]+", RegexOptions.Compiled); Patterns.Add(TokenType.ATTRIBUTENONKEYWORD, regex); Tokens.Add(TokenType.ATTRIBUTENONKEYWORD); regex = new Regex(@"\[\s*", RegexOptions.Compiled); Patterns.Add(TokenType.ATTRIBUTEOPEN, regex); Tokens.Add(TokenType.ATTRIBUTEOPEN); regex = new Regex(@"\s*\]\s*", RegexOptions.Compiled); Patterns.Add(TokenType.ATTRIBUTECLOSE, regex); Tokens.Add(TokenType.ATTRIBUTECLOSE); regex = new Regex(@"^(abstract|as|base|break|case|catch|checked|class|const|continue|decimal|default|delegate|double|do|else|enum|event|explicit|extern|false|finally|fixed|float|foreach|for|get|goto|if|implicit|interface|internal|int|in|is|lock|namespace|new|null|object|operator|out|override|params|partial|private|protected|public|readonly|ref|return|sealed|set|sizeof|stackalloc|static|struct|switch|throw|this|true|try|typeof|unchecked|unsafe|ushort|using|var|virtual|void|volatile|while)", RegexOptions.Compiled); Patterns.Add(TokenType.CS_KEYWORD, regex); Tokens.Add(TokenType.CS_KEYWORD); regex = new Regex(@"^(AddHandler|AddressOf|Alias|AndAlso|And|Ansi|Assembly|As|Auto|Boolean|ByRef|Byte|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|CDec|CDbl|Char|CInt|Class|CLng|CObj|Const|CShort|CSng|CStr|CType|Date|Decimal|Declare|Default|Delegate|Dim|DirectCast|Double|Do|Each|ElseIf|Else|End|Enum|Erase|Error|Event|Exit|False|Finally|For|Friend|Function|GetType|Get|GoSub|GoTo|Handles|If|Implements|Imports|Inherits|Integer|Interface|In|Is|Let|Lib|Like|Long|Loop|Me|Mod|Module|MustInherit|MustOverride|MyBase|MyClass|Namespace|New|Next|Nothing|NotInheritable|NotOverridable|Not|Object|On|Optional|Option|OrElse|Or|Overloads|Overridable|Overrides|ParamArray|Preserve|Private|Property|Protected|Public|RaiseEvent|ReadOnly|ReDim|REM|RemoveHandler|Resume|Return|Select|Set|Shadows|Shared|Short|Single|Static|Step|Stop|String|Structure|Sub|SyncLock|Then|Throw|To|True|Try|TypeOf|Unicode|Until|Variant|When|While|With|WithEvents|WriteOnly|Xor|Source)", RegexOptions.Compiled); Patterns.Add(TokenType.VB_KEYWORD, regex); Tokens.Add(TokenType.VB_KEYWORD); regex = new Regex(@"^(abstract|as|base|break|case|catch|checked|class|const|continue|decimal|default|delegate|double|do|else|enum|event|explicit|extern|false|finally|fixed|float|foreach|for|get|goto|if|implicit|interface|internal|int|in|is|lock|namespace|new|null|object|operator|out|override|params|partial|private|protected|public|readonly|ref|return|sealed|set|sizeof|stackalloc|static|struct|switch|throw|this|true|try|typeof|unchecked|unsafe|ushort|using|var|virtual|void|volatile|while)", RegexOptions.Compiled); Patterns.Add(TokenType.DOTNET_KEYWORD, regex); Tokens.Add(TokenType.DOTNET_KEYWORD); regex = new Regex(@"^(Array|AttributeTargets|AttributeUsageAttribute|Attribute|BitConverter|Boolean|Buffer|Byte|Char|CharEnumerator|CLSCompliantAttribute|ConsoleColor|ConsoleKey|ConsoleKeyInfo|ConsoleModifiers|ConsoleSpecialKey|Console|ContextBoundObject|ContextStaticAttribute|Converter|Convert|DateTimeKind|DateTimeOffset|DateTime|DayOfWeek|DBNull|Decimal|Delegate|Double|Enum|Environment.SpecialFolder|EnvironmentVariableTarget|Environment|EventArgs|EventHandler|Exception|FlagsAttribute|GCCollectionMode|GC|Guid|ICloneable|IComparable|IConvertible|ICustomFormatter|IDisposable|IEquatable|IFormatProvider|IFormattable|IndexOutOfRangeException|InsufficientMemoryException|Int16|Int32|Int64|IntPtr|InvalidCastException|InvalidOperationException|InvalidProgramException|MarshalByRefObject|Math|MidpointRounding|NotFiniteNumberException|NotImplementedException|NotSupportedException|Nullable|NullReferenceException|ObjectDisposedException|Object|ObsoleteAttribute|OperatingSystem|OutOfMemoryException|OverflowException|ParamArrayAttribute|PlatformID|PlatformNotSupportedException|Predicate|Random|SByte|SerializableAttribute|Single|StackOverflowException|StringComparer|StringComparison|StringSplitOptions|String|SystemException|TimeSpan|TimeZone|TypeCode|TypedReference|TypeInitializationException|Type|UInt16|UInt32|UInt64|UIntPtr|UnauthorizedAccessException|UnhandledExceptionEventArgs|UnhandledExceptionEventHandler|ValueType|Void|WeakReference|Comparer|Dictionary|EqualityComparer|ICollection|IComparer|IDictionary|IEnumerable|IEnumerator|IEqualityComparer|IList|KeyNotFoundException|KeyValuePair|List|ASCIIEncoding|Decoder|DecoderExceptionFallback|DecoderExceptionFallbackBuffer|DecoderFallback|DecoderFallbackBuffer|DecoderFallbackException|DecoderReplacementFallback|DecoderReplacementFallbackBuffer|EncoderExceptionFallback|EncoderExceptionFallbackBuffer|EncoderFallback|EncoderFallbackBuffer|EncoderFallbackException|EncoderReplacementFallback|EncoderReplacementFallbackBuffer|Encoder|EncodingInfo|Encoding|NormalizationForm|StringBuilder|UnicodeEncoding|UTF32Encoding|UTF7Encoding|UTF8Encoding)", RegexOptions.Compiled); Patterns.Add(TokenType.DOTNET_TYPES, regex); Tokens.Add(TokenType.DOTNET_TYPES); regex = new Regex(@"//[^\n]*\n?", RegexOptions.Compiled); Patterns.Add(TokenType.CS_COMMENTLINE, regex); Tokens.Add(TokenType.CS_COMMENTLINE); regex = new Regex(@"/\*([^*]+|\*[^/])+(\*/)?", RegexOptions.Compiled); Patterns.Add(TokenType.CS_COMMENTBLOCK, regex); Tokens.Add(TokenType.CS_COMMENTBLOCK); regex = new Regex(@"[^}]", RegexOptions.Compiled); Patterns.Add(TokenType.CS_SYMBOL, regex); Tokens.Add(TokenType.CS_SYMBOL); regex = new Regex(@"([^""\n\s/;.}\(\)\[\]]|/[^/*]|}[^;])+", RegexOptions.Compiled); Patterns.Add(TokenType.CS_NONKEYWORD, regex); Tokens.Add(TokenType.CS_NONKEYWORD); regex = new Regex(@"@?[""]([""][""]|[^\""\n])*[""]?", RegexOptions.Compiled); Patterns.Add(TokenType.CS_STRING, regex); Tokens.Add(TokenType.CS_STRING); regex = new Regex(@"'[^\n]*\n?", RegexOptions.Compiled); Patterns.Add(TokenType.VB_COMMENTLINE, regex); Tokens.Add(TokenType.VB_COMMENTLINE); regex = new Regex(@"REM[^\n]*\n?", RegexOptions.Compiled); Patterns.Add(TokenType.VB_COMMENTBLOCK, regex); Tokens.Add(TokenType.VB_COMMENTBLOCK); regex = new Regex(@"[^}]", RegexOptions.Compiled); Patterns.Add(TokenType.VB_SYMBOL, regex); Tokens.Add(TokenType.VB_SYMBOL); regex = new Regex(@"([^""\n\s/;.}\(\)\[\]]|/[^/*]|}[^;])+", RegexOptions.Compiled); Patterns.Add(TokenType.VB_NONKEYWORD, regex); Tokens.Add(TokenType.VB_NONKEYWORD); regex = new Regex(@"@?[""]([""][""]|[^\""\n])*[""]?", RegexOptions.Compiled); Patterns.Add(TokenType.VB_STRING, regex); Tokens.Add(TokenType.VB_STRING); regex = new Regex(@"//[^\n]*\n?", RegexOptions.Compiled); Patterns.Add(TokenType.DOTNET_COMMENTLINE, regex); Tokens.Add(TokenType.DOTNET_COMMENTLINE); regex = new Regex(@"/\*([^*]+|\*[^/])+(\*/)?", RegexOptions.Compiled); Patterns.Add(TokenType.DOTNET_COMMENTBLOCK, regex); Tokens.Add(TokenType.DOTNET_COMMENTBLOCK); regex = new Regex(@"[^}]", RegexOptions.Compiled); Patterns.Add(TokenType.DOTNET_SYMBOL, regex); Tokens.Add(TokenType.DOTNET_SYMBOL); regex = new Regex(@"([^""\n\s/;.}\[\]\(\)]|/[^/*]|}[^;])+", RegexOptions.Compiled); Patterns.Add(TokenType.DOTNET_NONKEYWORD, regex); Tokens.Add(TokenType.DOTNET_NONKEYWORD); regex = new Regex(@"@?[""]([""][""]|[^\""\n])*[""]?", RegexOptions.Compiled); Patterns.Add(TokenType.DOTNET_STRING, regex); Tokens.Add(TokenType.DOTNET_STRING); regex = new Regex(@"\{", RegexOptions.Compiled); Patterns.Add(TokenType.CODEBLOCKOPEN, regex); Tokens.Add(TokenType.CODEBLOCKOPEN); regex = new Regex(@"\};", RegexOptions.Compiled); Patterns.Add(TokenType.CODEBLOCKCLOSE, regex); Tokens.Add(TokenType.CODEBLOCKCLOSE); regex = new Regex(@"(Start)", RegexOptions.Compiled); Patterns.Add(TokenType.GRAMMARKEYWORD, regex); Tokens.Add(TokenType.GRAMMARKEYWORD); regex = new Regex(@"->", RegexOptions.Compiled); Patterns.Add(TokenType.GRAMMARARROW, regex); Tokens.Add(TokenType.GRAMMARARROW); regex = new Regex(@"[^{}\[\]/<>]|[</]$", RegexOptions.Compiled); Patterns.Add(TokenType.GRAMMARSYMBOL, regex); Tokens.Add(TokenType.GRAMMARSYMBOL); regex = new Regex(@"([^;""\[\n\s/<{\(\)]|/[^/*]|<[^%])+", RegexOptions.Compiled); Patterns.Add(TokenType.GRAMMARNONKEYWORD, regex); Tokens.Add(TokenType.GRAMMARNONKEYWORD); regex = new Regex(@"@?[""]([""][""]|[^\""\n])*[""]?", RegexOptions.Compiled); Patterns.Add(TokenType.GRAMMARSTRING, regex); Tokens.Add(TokenType.GRAMMARSTRING); }
public void UpdateRange(Token token) { if (token.StartPos < startpos) startpos = token.StartPos; if (token.EndPos > endpos) endpos = token.EndPos; }
/// <summary> /// executes a lookahead of the next token /// and will advance the scan on the input string /// </summary> /// <returns></returns> public Token Scan(params TokenType[] expectedtokens) { Token tok = LookAhead(expectedtokens); // temporarely retrieve the lookahead LookAheadToken = null; // reset lookahead token, so scanning will continue StartPos = tok.EndPos; EndPos = tok.EndPos; // set the tokenizer to the new scan position CurrentLine = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length); CurrentFile = tok.File; return tok; }
/// <summary> /// returns token with longest best match /// </summary> /// <returns></returns> public Token LookAhead(params TokenType[] expectedtokens) { int i; int startpos = StartPos; int endpos = EndPos; int currentline = CurrentLine; string currentFile = CurrentFile; Token tok = null; List<TokenType> scantokens; // this prevents double scanning and matching // increased performance if (LookAheadToken != null && LookAheadToken.Type != TokenType._UNDETERMINED_ && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken; // if no scantokens specified, then scan for all of them (= backward compatible) if (expectedtokens.Length == 0) scantokens = Tokens; else { scantokens = new List<TokenType>(expectedtokens); scantokens.AddRange(SkipList); } do { int len = -1; TokenType index = (TokenType)int.MaxValue; string input = Input.Substring(startpos); tok = new Token(startpos, endpos); for (i = 0; i < scantokens.Count; i++) { Regex r = Patterns[scantokens[i]]; Match m = r.Match(input); if (m.Success && m.Index == 0 && ((m.Length > len) || (scantokens[i] < index && m.Length == len ))) { len = m.Length; index = scantokens[i]; } } if (index >= 0 && len >= 0) { tok.EndPos = startpos + len; tok.Text = Input.Substring(tok.StartPos, len); tok.Type = index; } else if (tok.StartPos == tok.EndPos) { if (tok.StartPos < Input.Length) tok.Text = Input.Substring(tok.StartPos, 1); else tok.Text = "EOF"; } // Update the line and column count for error reporting. tok.File = currentFile; tok.Line = currentline; if (tok.StartPos < Input.Length) tok.Column = tok.StartPos - Input.LastIndexOf('\n', tok.StartPos); if (SkipList.Contains(tok.Type)) { startpos = tok.EndPos; endpos = tok.EndPos; currentline = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length); currentFile = tok.File; Skipped.Add(tok); } else { // only assign to non-skipped tokens tok.Skipped = Skipped; // assign prior skips to this token Skipped = new List<Token>(); //reset skips } // Check to see if the parsed token wants to // alter the file and line number. if (tok.Type == FileAndLine) { var match = Patterns[tok.Type].Match(tok.Text); var fileMatch = match.Groups["File"]; if (fileMatch.Success) currentFile = fileMatch.Value; var lineMatch = match.Groups["Line"]; if (lineMatch.Success) currentline = int.Parse(lineMatch.Value); } } while (SkipList.Contains(tok.Type)); LookAheadToken = tok; return tok; }
public Token GetToken(TokenType type) { Token t = new Token(this.StartPos, this.EndPos); t.Type = type; return t; }
public void Init(string input, string fileName = "") { this.Input = input; StartPos = 0; EndPos = 0; CurrentFile = fileName; CurrentLine = 1; CurrentColumn = 1; CurrentPosition = 0; LookAheadToken = null; }
/// <summary> /// returns token with longest best match /// </summary> /// <returns></returns> public Token LookAhead(params TokenType[] expectedtokens) { int i; int startpos = StartPos; Token tok = null; List<TokenType> scantokens; // this prevents double scanning and matching // increased performance if (LookAheadToken != null && LookAheadToken.Type != TokenType._UNDETERMINED_ && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken; // if no scantokens specified, then scan for all of them (= backward compatible) if (expectedtokens.Length == 0) scantokens = Tokens; else { scantokens = new List<TokenType>(expectedtokens); scantokens.AddRange(SkipList); } do { int len = -1; TokenType index = (TokenType)int.MaxValue; string input = Input.Substring(startpos); tok = new Token(startpos, EndPos); for (i = 0; i < scantokens.Count; i++) { Regex r = Patterns[scantokens[i]]; Match m = r.Match(input); if (m.Success && m.Index == 0 && ((m.Length > len) || (scantokens[i] < index && m.Length == len ))) { len = m.Length; index = scantokens[i]; } } if (index >= 0 && len >= 0) { tok.EndPos = startpos + len; tok.Text = Input.Substring(tok.StartPos, len); tok.Type = index; } else if (tok.StartPos < tok.EndPos - 1) { tok.Text = Input.Substring(tok.StartPos, 1); } if (SkipList.Contains(tok.Type)) { startpos = tok.EndPos; Skipped.Add(tok); } else { // only assign to non-skipped tokens tok.Skipped = Skipped; // assign prior skips to this token Skipped = new List<Token>(); //reset skips } } while (SkipList.Contains(tok.Type)); LookAheadToken = tok; return tok; }
/// <summary> /// executes a lookahead of the next token /// and will advance the scan on the input string /// </summary> /// <returns></returns> public Token Scan(params TokenType[] expectedtokens) { Token tok = LookAhead(expectedtokens); // temporarely retrieve the lookahead LookAheadToken = null; // reset lookahead token, so scanning will continue StartPos = tok.EndPos; EndPos = tok.EndPos; // set the tokenizer to the new scan position return tok; }
public void Init(string input) { this.Input = input; StartPos = 0; EndPos = 0; CurrentLine = 0; CurrentColumn = 0; CurrentPosition = 0; LookAheadToken = null; }