private Token CompleteMatch(ISourceStream source) { if (source.EOF()) return null; do { // Match NewLine var lookAhead = source.PreviewChar; if (LineTerminators.IndexOf(lookAhead) >= 0) { source.PreviewPosition++; // Treat \r\n as single NewLine if (!source.EOF() && lookAhead == '\r' && source.PreviewChar == '\n') source.PreviewPosition++; break; } // Eat up whitespace if (GrammarData.Grammar.WhitespaceChars.IndexOf(lookAhead) >= 0) { source.PreviewPosition++; continue; } // Fail on anything else return null; } while (!source.EOF()); // Create output token return source.CreateToken(this.OutputTerminal); }
private TokenAst ReadToken() { if (_bufferedTokens.Count > 0) { TokenAst tkn = _bufferedTokens[0]; _bufferedTokens.RemoveAt(0); return(tkn); } SkipWhiteSpaces(); SetTokenStartLocation(); if (_source.EOF()) { return(TokenAst.Create(LRParser.Eof, _context, _source.TokenStart, string.Empty, LRParser.Eof.Name)); } IEnumerable <ITerminal> terms = SelectTerminals(_source.CurrentChar); TokenAst result = MatchTerminals(terms); if (result != null && !result.IsError()) { _source.Position = _source.TokenStart.Position + result.Length; return(result); } if (result != null) { return(result); } result = LRParser.CreateSyntaxErrorToken(_context, _source.TokenStart, "Invalid character: '{0}'", _source.CurrentChar); return(result); }
/// <summary> /// Tries to match the current position in the <paramref name="source"/> with the rules for a TYPENAME terminal; /// returning null if there is no match, returning an error token if the TYPENAME is malformed or there is no /// match and returning a TYPENAME token /// otherwise. /// </summary> /// <param name="context"> /// The context in which the match is occuring. /// </param> /// <param name="source"> /// The source to try to match. /// </param> /// <returns> /// An error token if the TYPENAME is malformed or there is no match and returning a TYPENAME token /// otherwise. /// </returns> public override Token TryMatch([NotNull] ParsingContext context, [NotNull] ISourceStream source) { if (context == null) { throw new ArgumentNullException(nameof(context)); } if (source == null) { throw new ArgumentNullException(nameof(source)); } while (true) { if (source.EOF()) { return(context.CreateErrorToken("Malformed MEMBERNAME or STRING: no terminal encountered")); } if (source.PreviewChar == '}' || char.IsWhiteSpace(source.PreviewChar)) { var token = source.CreateToken(this); if (string.IsNullOrWhiteSpace(token.ValueString)) { return(context.CreateErrorToken("Malformed MEMBERNAME or STRING: no terminal encountered")); } return(token); } ++source.PreviewPosition; } }
protected override Token QuickParse(ParsingContext context, ISourceStream source) { if (!_allFirstCharsSet.Contains(source.PreviewChar)) { return(null); } source.PreviewPosition++; while (_allCharsSet.Contains(source.PreviewChar) && !source.EOF()) { source.PreviewPosition++; } //if it is not a terminator then cancel; we need to go through full algorithm if (!this.Grammar.IsWhitespaceOrDelimiter(source.PreviewChar)) { return(null); } var token = source.CreateToken(this.OutputTerminal); if (CaseRestriction != CaseRestriction.None && !CheckCaseRestriction(token.ValueString)) { return(null); } //!!! Do not convert to common case (all-lower) for case-insensitive grammar. Let identifiers remain as is, // it is responsibility of interpreter to provide case-insensitive read/write operations for identifiers // if (!this.GrammarData.Grammar.CaseSensitive) // token.Value = token.Text.ToLower(CultureInfo.InvariantCulture); CheckReservedWord(token); return(token); }
public override void SkipWhitespace(ISourceStream source) { while (!source.EOF()) { switch (source.PreviewChar) { case '\r': case '\v': case ' ': case '\t': break; case '\n': if (source.NextPreviewChar != ' ' && source.NextPreviewChar != '\t' && source.NextPreviewChar != '\n' && source.NextPreviewChar != '\r' && source.NextPreviewChar != '\v' && source.NextPreviewChar != ';') { return; } break; default: return; } source.PreviewPosition++; } }
public override TokenAst TryMatch(CompilerContext context, ISourceStream source) { if (!source.MatchSymbol(_startSymbol, false)) return null; source.Position += _startSymbol.Length; while (!source.EOF()) { int firstCharPos = source.Text.IndexOf(_endSymbol, source.Position); if (firstCharPos < 0) { source.Position = source.Text.Length; if (_isLineComment) return TokenAst.Create(this, context, source.TokenStart, source.GetLexeme()); else return Grammar.CreateSyntaxErrorToken(context, source.TokenStart, "Unclosed comment block"); } source.Position = firstCharPos; if (source.MatchSymbol(_endSymbol, false)) { source.Position += _endSymbol.Length; return TokenAst.Create(this, context, source.TokenStart, source.GetLexeme()); } source.Position++; } throw new NotSupportedException(); }
private Token MatchQuoted(ParsingContext context, ISourceStream source) { char quoteChar = source.PreviewChar; if ((quoteChar != '\'') && (quoteChar != '"')) { return(null); } source.PreviewPosition++; while (!source.EOF()) { if (source.PreviewChar == quoteChar) { source.PreviewPosition++; return(source.CreateToken(this.OutputTerminal)); } // Escaped? if (source.PreviewChar == '\\') { // Consume next ++source.PreviewPosition; } source.PreviewPosition++; } return(context.CreateErrorToken("Unbalanced quoted string")); }
public override void SkipWhitespace(ISourceStream source) { while (!source.EOF()) { var ch = source.PreviewChar; switch (ch) { case ' ': case '\t': case '\r': case '\n': case '\v': case '\u2085': case '\u2028': case '\u2029': source.PreviewPosition++; break; default: //Check unicode class Zs UnicodeCategory chCat = char.GetUnicodeCategory(ch); if (chCat == UnicodeCategory.SpaceSeparator) //it is whitespace, continue moving { continue; //while loop } //Otherwize return return; } } }
private Token MatchQuoted(ParsingContext context, ISourceStream source) { char quoteChar = source.PreviewChar; if ((quoteChar != '\'') && (quoteChar != '"')) { return null; } source.PreviewPosition++; while (!source.EOF()) { if (source.PreviewChar == quoteChar) { source.PreviewPosition++; return source.CreateToken(this.OutputTerminal); } // Escaped? if (source.PreviewChar == '\\') { // Consume next ++source.PreviewPosition; } source.PreviewPosition++; } return context.CreateErrorToken("Unbalanced quoted string"); }
public override void SkipWhitespace(ISourceStream source) { // This method is a copy of the base one with the additiong of the non-breaking space // c.f. https://github.com/sebastienros/fluid/issues/138 const char NonBreakingSpace = (char)160; while (!source.EOF()) { switch (source.PreviewChar) { case ' ': case '\t': case NonBreakingSpace: break; case '\r': case '\n': case '\v': if (UsesNewLine) { return; //do not treat as whitespace if language is line-based } break; default: return; } source.PreviewPosition++; } }
private Token CompleteMatch(ParsingContext context, ISourceStream source) { //Find end symbol while (!source.EOF()) { int firstCharPos; if (EndSymbols.Count == 1) firstCharPos = source.Text.IndexOf(EndSymbols[0], source.PreviewPosition); else firstCharPos = source.Text.IndexOfAny(_endSymbolsFirsts, source.PreviewPosition); if (firstCharPos < 0) { source.PreviewPosition = source.Text.Length; return null; //indicating error } //We found a character that might start an end symbol; let's see if it is true. source.PreviewPosition = firstCharPos; foreach (string endSymbol in EndSymbols) { if (source.MatchSymbol(endSymbol)) { //We found end symbol; eat end symbol only if it is not line comment. // For line comment, leave LF symbol there, it might be important to have a separate LF token if (!_isLineComment) source.PreviewPosition += endSymbol.Length; return source.CreateToken(this.OutputTerminal); }//if }//foreach endSymbol source.PreviewPosition++; //move to the next char and try again }//while return null; //might happen if we found a start char of end symbol, but not the full endSymbol }//method
protected override bool ReadBody(ISourceStream source, ScanDetails details) { if (!ReadStartSymbol(source, details)) { return(false); } bool escapeEnabled = !details.IsSet(ScanFlags.DisableEscapes); bool ignoreCase = IsSet(TermOptions.SpecialIgnoreCase); int start = source.Position; string startS = details.ControlSymbol; string startS2 = startS + startS; //doubled start symbol //1. Find the string end // first get the position of the next line break; we are interested in it to detect malformed string, // therefore do it only if linebreak is NOT allowed; if linebreak is allowed, set it to -1 (we don't care). int nlPos = details.IsSet(ScanFlags.AllowLineBreak) ? -1 : source.Text.IndexOf('\n', source.Position); while (!source.EOF()) { int endPos = source.Text.IndexOf(startS, source.Position); //Check for malformed string: either EndSymbol not found, or LineBreak is found before EndSymbol bool malformed = endPos < 0 || nlPos >= 0 && nlPos < endPos; if (malformed) { //Set source position for recovery: move to the next line if linebreak is not allowed. if (nlPos > 0) { endPos = nlPos; } if (endPos > 0) { source.Position = endPos + 1; } details.Error = "Mal-formed string literal - cannot find termination symbol."; return(true); } //We found EndSymbol - check if it is escaped; if yes, skip it and continue search if (escapeEnabled && source.Text[endPos - 1] == EscapeChar) { source.Position = endPos + startS.Length; continue; //searching for end symbol } //Check if it is doubled end symbol source.Position = endPos; if (details.IsSet(ScanFlags.AllowDoubledQuote) && source.MatchSymbol(startS2, ignoreCase)) { source.Position = endPos + startS.Length * 2; continue; }//checking for doubled end symbol //Ok, this is normal endSymbol that terminates the string. // Advance source position and get out from the loop details.Body = source.Text.Substring(start, endPos - start); source.Position = endPos + startS.Length; return(true); //if we come here it means we're done - we found string end. } //end of loop to find string end; return(false); }
public override Token TryMatch(CompilerContext context, ISourceStream source) { bool ignoreCase = !Grammar.CaseSensitive; //Check starting symbol if (!source.MatchSymbol(StartSymbol, ignoreCase)) return null; //Find end symbol source.Position += StartSymbol.Length; while(!source.EOF()) { int firstCharPos; if (EndSymbols.Count == 1) firstCharPos = source.Text.IndexOf(EndSymbols[0], source.Position); else firstCharPos = source.Text.IndexOfAny(_endSymbolsFirsts, source.Position); if (firstCharPos < 0) { source.Position = source.Text.Length; if (_isLineComment) //if it is LineComment, it is ok to hit EOF without final line-break; just return all until end. return Token.Create(this, context, source.TokenStart, source.GetLexeme()); else return Grammar.CreateSyntaxErrorToken(context, source.TokenStart, "Unclosed comment block"); } //We found a character that might start an end symbol; let's see if it is true. source.Position = firstCharPos; foreach (string endSymbol in EndSymbols) if (source.MatchSymbol(endSymbol, ignoreCase)) { //We found end symbol source.Position += endSymbol.Length; return Token.Create(this, context, source.TokenStart, source.GetLexeme()); }//if source.Position++; //move to the next char and try again }//while return null; //never happens }
protected override bool ReadBody(ISourceStream source, ScanDetails details) { //remember start - it may be different from source.TokenStart, we may have skipped int start = source.Position; //Figure out digits set string digits = GetDigits(details); bool isDecimal = !details.IsSet(ScanFlags.NonDecimal); bool allowFloat = !IsSet(TermOptions.NumberIntOnly); while (!source.EOF()) { char current = source.CurrentChar; //1. If it is a digit, just continue going if (digits.IndexOf(current) >= 0) { source.Position++; continue; } //2. Check if it is a dot if (current == DecimalSeparator && allowFloat) { //If we had seen already a dot or exponent, don't accept this one; //In python number literals (NumberAllowPointFloat) a point can be the first and last character, //otherwise we accept dot only if it is followed by a digit if (details.IsSet(ScanFlags.HasDotOrExp) || (digits.IndexOf(source.NextChar) < 0) && !IsSet(TermOptions.NumberAllowStartEndDot)) { break; //from while loop } details.Flags |= ScanFlags.HasDot; source.Position++; continue; } //3. Only for decimals - check if it is (the first) exponent symbol if (allowFloat && isDecimal && (details.ControlSymbol == null) && (ExponentSymbols.IndexOf(current) >= 0)) { char next = source.NextChar; bool nextIsSign = next == '-' || next == '+'; bool nextIsDigit = digits.IndexOf(next) >= 0; if (!nextIsSign && !nextIsDigit) { break; //Exponent should be followed by either sign or digit } //ok, we've got real exponent details.ControlSymbol = current.ToString(); //remember the exp char details.Flags |= ScanFlags.HasExp; source.Position++; if (nextIsSign) { source.Position++; //skip +/- explicitly so we don't have to deal with them on the next iteration } continue; } //4. It is something else (not digit, not dot or exponent) - we're done break; //from while loop }//while int end = source.Position; details.Body = source.Text.Substring(start, end - start); return(true); }
/// <summary>Skips whitespace characters in the input stream. </summary> /// <remarks>Override this method if your language has non-standard whitespace characters.</remarks> /// <param name="source">Source stream.</param> public virtual void SkipWhitespace(ISourceStream source) { while (!source.EOF()) { switch (source.PreviewChar) { case ' ': case '\t': break; case '\r': case '\n': case '\v': if (UsesNewLine) { return; //do not treat as whitespace if language is line-based } break; default: return; } //switch source.PreviewPosition++; } //while } //method
public override TokenAst TryMatch(CompilerContext context, ISourceStream source) { bool isVerbatim = false; int start = source.Position; if (source.CurrentChar == '@') { isVerbatim = true; source.Position++; start++; } if (IsCurrentQuote(source)) { source.Position++; start++; } else return null; while (!source.EOF()) { if (!isVerbatim) { if (source.CurrentChar == '\\') { //TODO: Escape processing source.Position += 2; continue; } else //Single line string ends incorrectly if (ParserData.LineTerminators.IndexOf(source.CurrentChar) >= 0) return null; } if (IsCurrentQuote(source)) break; source.Position++; } if (IsCurrentQuote(source)) source.Position++; else return null; string lexeme = source.GetLexeme(); string body = source.Text.Substring(start, source.Position - start - 1); //TODO: handle this in escape processing if (!isVerbatim) body = body.Replace("\\'", "'").Replace("\\\"", "\"").Replace("\\\\", "\\"); TokenAst token = TokenAst.Create(this, context, source.TokenStart, lexeme, body); return token; //return Grammar.CreateSyntaxErrorToken(context, source.TokenStart, "Failed to convert the value"); }
/// <summary> /// Tries to match an unquoted string from the source: this has been identified because the first character is /// not a single quote. /// </summary> /// <param name="context"> /// The context in which the match is occuring. /// </param> /// <param name="source"> /// The source to try to match. /// </param> /// <returns> /// Null if there is no match, an error token if the terminal is malformed and the appropriate token otherwise. /// </returns> private Token TryMatchUnquoted(ParsingContext context, ISourceStream source) { var openBraceCount = 0; var builder = new StringBuilder(); while (true) { if (source.EOF()) { return(context.CreateErrorToken("Malformed MEMBERNAME or STRING: no terminal encountered")); } switch (source.PreviewChar) { case '\\': ++source.PreviewPosition; break; case '\'': return(context.CreateErrorToken("Malformed MEMBERNAME or STRING: unescaped quote encountered")); case '=': if (openBraceCount <= 0) { return(this.CreateMemberNameToken(source, builder)); } break; case ',': if (openBraceCount <= 0) { return(this.CreateStringToken(context, source, builder)); } break; case '{': ++openBraceCount; break; case '}': if (openBraceCount-- <= 0) { return(this.CreateStringToken(context, source, builder)); } break; } builder.Append(source.PreviewChar); ++source.PreviewPosition; } }
protected override bool ReadBody(ISourceStream source, CompoundTokenDetails details) { int start = source.PreviewPosition; bool allowEscapes = details.IsSet((short)IdOptions.AllowsEscapes); CharList outputChars = new CharList(); while (!source.EOF()) { char current = source.PreviewChar; if (Grammar.IsWhitespaceOrDelimiter(current)) { break; } if (allowEscapes && current == this.EscapeChar) { current = ReadUnicodeEscape(source, details); //We need to back off the position. ReadUnicodeEscape sets the position to symbol right after escape digits. //This is the char that we should process in next iteration, so we must backup one char, to pretend the escaped // char is at position of last digit of escape sequence. source.PreviewPosition--; if (details.Error != null) { return(false); } } //Check if current character is OK if (!CharOk(current, source.PreviewPosition == start)) { break; } //Check if we need to skip this char #if NETSTANDARD UnicodeCategory currCat = CharUnicodeInfo.GetUnicodeCategory(current); #else UnicodeCategory currCat = char.GetUnicodeCategory(current); //I know, it suxx, we do it twice, fix it later #endif if (!this.CharsToRemoveCategories.Contains(currCat)) { outputChars.Add(current); //add it to output (identifier) } source.PreviewPosition++; }//while if (outputChars.Count == 0) { return(false); } //Convert collected chars to string details.Body = new string(outputChars.ToArray()); if (!CheckCaseRestriction(details.Body)) { return(false); } return(!string.IsNullOrEmpty(details.Body)); }
private Token CompleteMatch(ParsingContext context, ISourceStream source, byte commentLevel) { if (commentLevel == 0) { var line_breaks = new char[] { '\n', '\r', '\v' }; var firstCharPos = source.Text.IndexOfAny(line_breaks, source.PreviewPosition); if (firstCharPos > 0) { source.PreviewPosition = firstCharPos; } else { source.PreviewPosition = source.Text.Length; } return(source.CreateToken(this.OutputTerminal)); } while (!source.EOF()) { string text = source.Text.Substring(source.PreviewPosition); var matches = Regex.Matches(text, @"\](=*)\]"); foreach (Match match in matches) { if (match.Groups[1].Value.Length == (int)commentLevel - 1) { source.PreviewPosition += match.Index + match.Length; if (context.VsLineScanState.Value != 0) { //We are using line-mode and begin terminal was on previous line. SourceLocation tokenStart = new SourceLocation(); tokenStart.Position = 0; string lexeme = source.Text.Substring(0, source.PreviewPosition); context.VsLineScanState.Value = 0; return(new Token(this, tokenStart, lexeme, null)); } else { return(source.CreateToken(this.OutputTerminal)); } } } source.PreviewPosition++; } //The full match wasn't found, store the state for future parsing. // context.VsLineScanState.TerminalIndex = this.MultilineIndex; context.VsLineScanState.TokenSubType = commentLevel; return(null); }
/// <summary> /// Tries to match a quoted string from the source: this has been identified because the first character is a /// single quote. /// </summary> /// <param name="context"> /// The context in which the match is occuring. /// </param> /// <param name="source"> /// The source to try to match. /// </param> /// <returns> /// Null if there is no match, an error token if the terminal is malformed and the appropriate token otherwise. /// </returns> private Token TryMatchQuoted(ParsingContext context, ISourceStream source) { ++source.PreviewPosition; var isClosed = false; var builder = new StringBuilder(); Token token = null; while (token == null) { if (source.EOF()) { return(context.CreateErrorToken("Malformed MEMBERNAME or STRING: no terminal encountered")); } switch (source.PreviewChar) { case '\\': ++source.PreviewPosition; builder.Append(source.PreviewChar); break; case '\'': isClosed = true; break; default: if (!isClosed) { builder.Append(source.PreviewChar); break; } if (char.IsWhiteSpace(source.PreviewChar)) { break; } if (source.PreviewChar == '=') { token = this.CreateMemberNameToken(source, builder); break; } token = this.CreateStringToken(context, source, builder); break; } source.PreviewPosition++; } return(token); }
private int?GetLength(ISourceStream source) { int length = 0; for (; !source.EOF() && char.IsDigit(source.PreviewChar); ++source.PreviewPosition) { length = length * 10 + int.Parse(source.PreviewChar.ToString()); } if (length == 0 || source.PreviewPosition + length > source.Text.Length) { return(null); } return(length); }
public override Token TryMatch(CompilerContext context, ISourceStream source) { bool ignoreCase = !Grammar.CaseSensitive; //Check starting symbol if (!source.MatchSymbol(StartSymbol, ignoreCase)) { return(null); } //Find end symbol source.Position += StartSymbol.Length; while (!source.EOF()) { int firstCharPos; if (EndSymbols.Count == 1) { firstCharPos = source.Text.IndexOf(EndSymbols[0], source.Position); } else { firstCharPos = source.Text.IndexOfAny(_endSymbolsFirsts, source.Position); } if (firstCharPos < 0) { source.Position = source.Text.Length; if (_isLineComment) //if it is LineComment, it is ok to hit EOF without final line-break; just return all until end. { return(Token.Create(this, context, source.TokenStart, source.GetLexeme())); } else { return(Grammar.CreateSyntaxErrorToken(context, source.TokenStart, "Unclosed comment block")); } } //We found a character that might start an end symbol; let's see if it is true. source.Position = firstCharPos; foreach (string endSymbol in EndSymbols) { if (source.MatchSymbol(endSymbol, ignoreCase)) { //We found end symbol source.Position += endSymbol.Length; return(Token.Create(this, context, source.TokenStart, source.GetLexeme())); }//if } source.Position++; //move to the next char and try again } //while return(null); //never happens } //method
private void MoveSourcePositionAfterTerminator(ISourceStream source) { while (!source.EOF()) { while (source.PreviewChar != Terminator[0]) { source.PreviewPosition++; } if (source.MatchSymbol(Terminator, !Grammar.CaseSensitive)) { source.PreviewPosition += Terminator.Length; return; } //if } //while } //method
private Token CompleteMatch(ISourceStream source) { if (source.EOF()) { return(null); } do { // Match NewLine var lookAhead = source.PreviewChar; if (LineTerminators.IndexOf(lookAhead) >= 0) { source.PreviewPosition++; // Treat \r\n as single NewLine if (!source.EOF() && lookAhead == '\r' && source.PreviewChar == '\n') { source.PreviewPosition++; } break; } // Eat up whitespace if (GrammarData.Grammar.WhitespaceChars.IndexOf(lookAhead) >= 0) { source.PreviewPosition++; continue; } // Fail on anything else return(null); }while (!source.EOF()); // Create output token return(source.CreateToken(this.OutputTerminal)); }
private Token CompleteMatch(ParsingContext context, ISourceStream source, byte commentLevel) { if (commentLevel == 0) { char[] anyOf = new char[] { '\n', '\r', '\v' }; int num = source.Text.IndexOfAny(anyOf, source.PreviewPosition); if (num > 0) { source.PreviewPosition = num; } else { source.PreviewPosition = source.Text.Length; } return(source.CreateToken(base.OutputTerminal)); } while (!source.EOF()) { foreach (Match match in Regex.Matches(source.Text.Substring(source.PreviewPosition), "\\](=*)\\]")) { if (match.Groups[1].Value.Length == (int)(commentLevel - 1)) { source.PreviewPosition += match.Index + match.Length; Token result; if (context.VsLineScanState.Value != 0) { SourceLocation location = default(SourceLocation); location.Position = 0; string text = source.Text.Substring(0, source.PreviewPosition); context.VsLineScanState.Value = 0; result = new Token(this, location, text, null); return(result); } result = source.CreateToken(base.OutputTerminal); return(result); } } int previewPosition = source.PreviewPosition; source.PreviewPosition = previewPosition + 1; } context.VsLineScanState.TokenSubType = commentLevel; return(null); }
protected override bool ReadBody(ISourceStream source, ScanDetails details) { int start = source.Position; bool allowEscapes = !details.IsSet(ScanFlags.DisableEscapes); CharList outputChars = new CharList(); while (!source.EOF()) { char current = source.CurrentChar; if (_terminators.IndexOf(current) >= 0) { break; } if (allowEscapes && current == this.EscapeChar) { current = ReadUnicodeEscape(source, details); //We need to back off the position. ReadUnicodeEscape sets the position to symbol right after escape digits. //This is the char that we should process in next iteration, so we must backup one char, to pretend the escaped // char is at position of last digit of escape sequence. source.Position--; if (details.HasError()) { return(false); } } //Check if current character is OK if (!CharOk(current, source.Position == start)) { break; } //Check if we need to skip this char UnicodeCategory currCat = char.GetUnicodeCategory(current); //I know, it suxx, we do it twice, fix it later if (!this.CharsToRemoveCategories.Contains(currCat)) { outputChars.Add(current); //add it to output (identifier) } source.Position++; }//while if (outputChars.Count == 0) { return(false); } //Convert collected chars to string details.Body = new string(outputChars.ToArray()); return(!string.IsNullOrEmpty(details.Body)); }
public override Token TryMatch(ParsingContext context, ISourceStream source) { while (!source.EOF()) { switch (source.PreviewChar) { case '\n': case '\r': case ' ': case '}': if (source.PreviewPosition > source.Position) return source.CreateToken(this.OutputTerminal); return context.CreateErrorToken(Name + " was expected"); } source.PreviewPosition++; } return context.CreateErrorToken("Unbalanced " + Name); }
protected override Token QuickParse(CompilerContext context, ISourceStream source) { if (AllFirstChars.IndexOf(source.CurrentChar) < 0) { return(null); } source.Position++; while (AllChars.IndexOf(source.CurrentChar) >= 0 && !source.EOF()) { source.Position++; } //if it is not a terminator then cancel; we need to go through full algorithm if (_terminators.IndexOf(source.CurrentChar) < 0) { return(null); } string text = source.GetLexeme(); return(Token.Create(this, context, source.TokenStart, text)); }
public override Token TryMatch(ParsingContext context, ISourceStream source) { if (!IsValid(source.PreviewChar)) { return(null); } source.PreviewPosition++; do { var current = source.PreviewChar; if (!IsValid(source.PreviewChar)) { break; } source.PreviewPosition++; }while (!source.EOF()); var value = source.PreviewString(); return(source.CreateToken(this.OutputTerminal, value)); }
public override Token TryMatch(ParsingContext context, ISourceStream source) { while (!source.EOF()) { switch (source.PreviewChar) { case '\n': case '\r': case ' ': case '}': if (source.PreviewPosition > source.Position) { return(source.CreateToken(this.OutputTerminal)); } return(context.CreateErrorToken(Name + " was expected")); } source.PreviewPosition++; } return(context.CreateErrorToken("Unbalanced " + Name)); }
private Token CompleteMatch(ParsingContext context, ISourceStream source) { //Find end symbol while (!source.EOF()) { var firstCharPos = source.Text.IndexOf('{', source.PreviewPosition); if (firstCharPos < 0) { source.PreviewPosition = source.Text.Length; return(source.CreateToken(this.OutputTerminal)); } //We found a character that might start an end symbol; let's see if it is true. source.PreviewPosition = firstCharPos; if (source.MatchSymbol("{{") || source.MatchSymbol("{%")) { return(source.CreateToken(this.OutputTerminal)); } source.PreviewPosition++; //move to the next char and try again } return(source.CreateToken(this.OutputTerminal)); }
public override TokenAst TryMatch(CompilerContext context, ISourceStream source) { if (!source.MatchSymbol(_startSymbol, false)) { return(null); } source.Position += _startSymbol.Length; while (!source.EOF()) { int firstCharPos = source.Text.IndexOf(_endSymbol, source.Position); if (firstCharPos < 0) { source.Position = source.Text.Length; if (_isLineComment) { return(TokenAst.Create(this, context, source.TokenStart, source.GetLexeme())); } else { return(Grammar.CreateSyntaxErrorToken(context, source.TokenStart, "Unclosed comment block")); } } source.Position = firstCharPos; if (source.MatchSymbol(_endSymbol, false)) { source.Position += _endSymbol.Length; return(TokenAst.Create(this, context, source.TokenStart, source.GetLexeme())); } source.Position++; } throw new NotSupportedException(); }
private Token CompleteMatch(CompilerContext context, ISourceStream source) { //Find end symbol while (!source.EOF()) { int firstCharPos; if (EndSymbols.Count == 1) { firstCharPos = source.Text.IndexOf(EndSymbols[0], source.Position); } else { firstCharPos = source.Text.IndexOfAny(_endSymbolsFirsts, source.Position); } if (firstCharPos < 0) { source.Position = source.Text.Length; return(null); //indicating error } //We found a character that might start an end symbol; let's see if it is true. source.Position = firstCharPos; foreach (string endSymbol in EndSymbols) { if (source.MatchSymbol(endSymbol, !OwnerGrammar.CaseSensitive)) { //We found end symbol; eat end symbol only if it is not line comment. // For line comment, leave LF symbol there, it might be important to have a separate LF token if (!_isLineComment) { source.Position += endSymbol.Length; } return(new Token(this, source.TokenStart, source.GetLexeme(), null)); } //if } //foreach endSymbol source.Position++; //move to the next char and try again } //while return(null); //might happen if we found a start char of end symbol, but not the full endSymbol } //method
private bool CompleteReadBody(ISourceStream source, CompoundTokenDetails details) { bool escapeEnabled = !details.IsSet((short)StringOptions.NoEscapes); int start = source.PreviewPosition; string endQuoteSymbol = details.EndSymbol; string endQuoteDoubled = endQuoteSymbol + endQuoteSymbol; //doubled quote symbol bool lineBreakAllowed = details.IsSet((short)StringOptions.AllowsLineBreak); //1. Find the string end // first get the position of the next line break; we are interested in it to detect malformed string, // therefore do it only if linebreak is NOT allowed; if linebreak is allowed, set it to -1 (we don't care). int nlPos = lineBreakAllowed ? -1 : source.Text.IndexOf('\n', source.PreviewPosition); //fix by ashmind for EOF right after opening symbol while (true) { int endPos = source.Text.IndexOf(endQuoteSymbol, source.PreviewPosition); //Check for partial token in line-scanning mode if (endPos < 0 && details.PartialOk && lineBreakAllowed) { ProcessPartialBody(source, details); return(true); } //Check for malformed string: either EndSymbol not found, or LineBreak is found before EndSymbol bool malformed = endPos < 0 || nlPos >= 0 && nlPos < endPos; if (malformed) { //Set source position for recovery: move to the next line if linebreak is not allowed. if (nlPos > 0) { endPos = nlPos; } if (endPos > 0) { source.PreviewPosition = endPos + 1; } details.Error = Resources.ErrBadStrLiteral; // "Mal-formed string literal - cannot find termination symbol."; return(true); //we did find start symbol, so it is definitely string, only malformed }//if malformed if (source.EOF()) { return(true); } //We found EndSymbol - check if it is escaped; if yes, skip it and continue search if (escapeEnabled && IsEndQuoteEscaped(source.Text, endPos)) { source.PreviewPosition = endPos + endQuoteSymbol.Length; continue; //searching for end symbol } //Check if it is doubled end symbol source.PreviewPosition = endPos; if (details.IsSet((short)StringOptions.AllowsDoubledQuote) && source.MatchSymbol(endQuoteDoubled, !CaseSensitive)) { source.PreviewPosition = endPos + endQuoteDoubled.Length; continue; }//checking for doubled end symbol //Ok, this is normal endSymbol that terminates the string. // Advance source position and get out from the loop details.Body = source.Text.Substring(start, endPos - start); source.PreviewPosition = endPos + endQuoteSymbol.Length; return(true); //if we come here it means we're done - we found string end. } //end of loop to find string end; }
private Token ReadToken() { if (_bufferedTokens.Count > 0) { Token tkn = _bufferedTokens[0]; _bufferedTokens.RemoveAt(0); return(tkn); } //1. Skip whitespace. We don't need to check for EOF: at EOF we start getting 0-char, so we'll get out automatically while (_data.Grammar.WhitespaceChars.IndexOf(_source.CurrentChar) >= 0) { _source.Position++; } //That's the token start, calc location (line and column) SetTokenStartLocation(); //Check for EOF if (_source.EOF()) { return(Token.Create(Grammar.Eof, _context, _source.TokenStart, string.Empty, Grammar.Eof.Name)); } //Find matching terminal // First, try terminals with explicit "first-char" prefixes, selected by current char in source TerminalList terms = SelectTerminals(_source.CurrentChar); Token result = MatchTerminals(terms); //If no token, try FallbackTerminals if (result == null && _data.FallbackTerminals.Count > 0) { result = MatchTerminals(_data.FallbackTerminals); } //If we don't have a token from registered terminals, try Grammar's method if (result == null) { result = _data.Grammar.TryMatch(_context, _source); } //Check if we have a multi-token; if yes, copy all but first child tokens from ChildNodes to _bufferedTokens, // and set result to the first child token if (result != null && result.IsMultiToken()) { foreach (Token tkn in result.ChildNodes) { _bufferedTokens.Add(tkn); } result = _bufferedTokens[0]; _bufferedTokens.RemoveAt(0); } //If we have normal token then return it if (result != null && !result.IsError()) { //restore position to point after the result token _source.Position = _source.TokenStart.Position + result.Length; return(result); } //we have an error: either error token or no token at all if (result == null) //if no error result then create it { result = Grammar.CreateSyntaxErrorToken(_context, _source.TokenStart, "Invalid character: '{0}'", _source.CurrentChar); } Recover(); return(result); }//method
public override void SkipWhitespace(ISourceStream source) { while (!source.EOF()) { var ch = source.PreviewChar; switch (ch) { case ' ': case '\t': case '\r': case '\n': case '\v': case '\u2085': case '\u2028': case '\u2029': source.PreviewPosition++; break; default: //Check unicode class Zs UnicodeCategory chCat = char.GetUnicodeCategory(ch); if (chCat == UnicodeCategory.SpaceSeparator) //it is whitespace, continue moving continue;//while loop //Otherwize return return; }//switch }//while }
public override void SkipWhitespace(ISourceStream source) { while (!source.EOF()) { int count = SkipSingleWhitespace(source); if (count == 0) return; else source.PreviewPosition += count; } }
/* * private static List<string> _firsts = new List<string>() { "'", "\"", "@" }; */ #endregion #region Init public override TokenAst TryMatch(CompilerContext context, ISourceStream source) { bool isVerbatim = false; int start = source.Position; if (source.CurrentChar == '@') { isVerbatim = true; source.Position++; start++; } if (IsCurrentQuote(source)) { source.Position++; start++; } else { return(null); } while (!source.EOF()) { if (!isVerbatim) { if (source.CurrentChar == '\\') { //TODO: Escape processing source.Position += 2; continue; } if (LRParser.LineTerminators.IndexOf(source.CurrentChar) >= 0) { return(null); } } if (IsCurrentQuote(source)) { break; } source.Position++; } if (IsCurrentQuote(source)) { source.Position++; } else { return(null); } string lexeme = source.GetLexeme(); string body = source.Text.Substring(start, source.Position - start - 1); //TODO: handle this in escape processing if (!isVerbatim) { body = body.Replace("\\'", "'").Replace("\\\"", "\"").Replace("\\\\", "\\"); } TokenAst token = TokenAst.Create(this, context, source.TokenStart, lexeme, body); return(token); //return Grammar.CreateSyntaxErrorToken(context, source.TokenStart, "Failed to convert the value"); }
private Token MatchUnquoted(ParsingContext context, ISourceStream source) { if (source.PreviewChar == '{') { // Member names can't start with { if (this.IsMemberName) { return null; } // Check for special {} at start of token indicating that this is a STRING token. if (source.NextPreviewChar != '}') { return null; } source.PreviewPosition += 2; } var runningBraceTotal = 0; // This variable tracks the position of the last non whitespace (or significant whitespace). var lastNonWhitespacePosition = source.PreviewPosition; while (!source.EOF()) { bool isWhiteSpace = false; switch (source.PreviewChar) { case '{': runningBraceTotal++; break; case '}': if (--runningBraceTotal < 0) { return this.CreateToken(source, lastNonWhitespacePosition); } break; case ',': if (runningBraceTotal == 0) { return this.CreateToken(source, lastNonWhitespacePosition); } break; case '=': if (runningBraceTotal == 0) { // Equal sign. Only allowed after MemberNames. return this.IsMemberName ? this.CreateToken(source, lastNonWhitespacePosition) : null; } break; case '\\': source.PreviewPosition++; break; default: isWhiteSpace = Char.IsWhiteSpace(source.PreviewChar); break; } source.PreviewPosition++; if (!isWhiteSpace) { lastNonWhitespacePosition = source.PreviewPosition; } } return context.CreateErrorToken("Unterminated string terminal"); }
private Token CompleteMatch(ParsingContext context, ISourceStream source, byte commentLevel) { if (commentLevel == 0) { var line_breaks = new char[] { '\n', '\r', '\v' }; var firstCharPos = source.Text.IndexOfAny(line_breaks, source.PreviewPosition); if (firstCharPos > 0) { source.PreviewPosition = firstCharPos; } else { source.PreviewPosition = source.Text.Length; } return source.CreateToken(this.OutputTerminal); } while (!source.EOF()) { string text = source.Text.Substring(source.PreviewPosition); var matches = Regex.Matches(text, @"\](=*)\]"); foreach (Match match in matches) { if (match.Groups[1].Value.Length == (int)commentLevel - 1) { source.PreviewPosition += match.Index + match.Length; if (context.VsLineScanState.Value != 0) { SourceLocation tokenStart = new SourceLocation(); tokenStart.Position = 0; string lexeme = source.Text.Substring(0, source.PreviewPosition); context.VsLineScanState.Value = 0; return new Token(this, tokenStart, lexeme, null); } else { return source.CreateToken(this.OutputTerminal); } } } source.PreviewPosition++; } context.VsLineScanState.TokenSubType = commentLevel; return null; }
}//method protected override bool ReadBody(ISourceStream source, CompoundTokenDetails details) { //remember start - it may be different from source.TokenStart, we may have skipped prefix int start = source.PreviewPosition; char current = source.PreviewChar; if (IsSet(NumberOptions.AllowSign) && (current == '-' || current == '+')) { details.Sign = current.ToString(); source.PreviewPosition++; } //Figure out digits set string digits = GetDigits(details); bool isDecimal = !details.IsSet((short)(NumberOptions.Binary | NumberOptions.Octal | NumberOptions.Hex)); bool allowFloat = !IsSet(NumberOptions.IntOnly); bool foundDigits = false; while (!source.EOF()) { current = source.PreviewChar; //1. If it is a digit, just continue going; the same for '_' if it is allowed if (digits.IndexOf(current) >= 0 || IsSet(NumberOptions.AllowUnderscore) && current == '_') { source.PreviewPosition++; foundDigits = true; continue; } //2. Check if it is a dot in float number bool isDot = current == DecimalSeparator; if (allowFloat && isDot) { //If we had seen already a dot or exponent, don't accept this one; bool hasDotOrExp = details.IsSet((short)(NumberFlagsInternal.HasDot | NumberFlagsInternal.HasExp)); if (hasDotOrExp) { break; //from while loop } //In python number literals (NumberAllowPointFloat) a point can be the first and last character, //We accept dot only if it is followed by a digit if (digits.IndexOf(source.NextPreviewChar) < 0 && !IsSet(NumberOptions.AllowStartEndDot)) { break; //from while loop } details.Flags |= (int)NumberFlagsInternal.HasDot; source.PreviewPosition++; continue; } //3. Check if it is int number followed by dot or exp symbol bool isExpSymbol = (details.ExponentSymbol == null) && _exponentsTable.ContainsKey(current); if (!allowFloat && foundDigits && (isDot || isExpSymbol)) { //If no partial float allowed then return false - it is not integer, let float terminal recognize it as float if (IsSet(NumberOptions.NoDotAfterInt)) { return(false); } //otherwise break, it is integer and we're done reading digits break; } //4. Only for decimals - check if it is (the first) exponent symbol if (allowFloat && isDecimal && isExpSymbol) { char next = source.NextPreviewChar; bool nextIsSign = next == '-' || next == '+'; bool nextIsDigit = digits.IndexOf(next) >= 0; if (!nextIsSign && !nextIsDigit) { break; //Exponent should be followed by either sign or digit } //ok, we've got real exponent details.ExponentSymbol = current.ToString(); //remember the exp char details.Flags |= (int)NumberFlagsInternal.HasExp; source.PreviewPosition++; if (nextIsSign) { source.PreviewPosition++; //skip +/- explicitly so we don't have to deal with them on the next iteration } continue; } //4. It is something else (not digit, not dot or exponent) - we're done break; //from while loop }//while int end = source.PreviewPosition; if (!foundDigits) { return(false); } details.Body = source.Text.Substring(start, end - start); return(true); }
/// <summary>Skips whitespace characters in the input stream. </summary> /// <remarks>Override this method if your language has non-standard whitespace characters.</remarks> /// <param name="source">Source stream.</param> public virtual void SkipWhitespace(ISourceStream source) { while (!source.EOF()) { switch (source.PreviewChar) { case ' ': case '\t': break; case '\r': case '\n': case '\v': if (UsesNewLine) return; //do not treat as whitespace if language is line-based break; default: return; }//switch source.PreviewPosition++; }//while }
private void MoveSourcePositionAfterTerminator(ISourceStream source) { while(!source.EOF()) { while(source.PreviewChar != Terminator[0]) source.PreviewPosition++; if(source.MatchSymbol(Terminator, !Grammar.CaseSensitive)) { source.PreviewPosition += Terminator.Length; return; }//if }//while }//method
public override void SkipWhitespace(ISourceStream source) { while (!source.EOF()) { switch (source.PreviewChar) { case '\r': case '\v': case ' ': case '\t': break; case '\n': if (source.NextPreviewChar != ' ' && source.NextPreviewChar != '\t' && source.NextPreviewChar != '\n' && source.NextPreviewChar != '\r' && source.NextPreviewChar != '\v' && source.NextPreviewChar != ';') return; break; default: return; } source.PreviewPosition++; } }
protected override bool ReadBody(ISourceStream source, ScanDetails details) { //remember start - it may be different from source.TokenStart, we may have skipped int start = source.Position; //Figure out digits set string digits = GetDigits(details); bool isDecimal = !details.IsSet(ScanFlags.NonDecimal); bool allowFloat = !IsSet(TermOptions.NumberIntOnly); while (!source.EOF()) { char current = source.CurrentChar; //1. If it is a digit, just continue going if (digits.IndexOf(current) >= 0) { source.Position++; continue; } //2. Check if it is a dot if (current == DecimalSeparator && allowFloat) { //If we had seen already a dot or exponent, don't accept this one; //In python number literals (NumberAllowPointFloat) a point can be the first and last character, //otherwise we accept dot only if it is followed by a digit if (details.IsSet(ScanFlags.HasDotOrExp) || (digits.IndexOf(source.NextChar) < 0) && !IsSet(TermOptions.NumberAllowStartEndDot)) break; //from while loop details.Flags |= ScanFlags.HasDot; source.Position++; continue; } //3. Only for decimals - check if it is (the first) exponent symbol if (allowFloat && isDecimal && (details.ControlSymbol == null) && (ExponentSymbols.IndexOf(current) >= 0)) { char next = source.NextChar; bool nextIsSign = next == '-' || next == '+'; bool nextIsDigit = digits.IndexOf(next) >= 0; if (!nextIsSign && !nextIsDigit) break; //Exponent should be followed by either sign or digit //ok, we've got real exponent details.ControlSymbol = current.ToString(); //remember the exp char details.Flags |= ScanFlags.HasExp; source.Position++; if (nextIsSign) source.Position++; //skip +/- explicitly so we don't have to deal with them on the next iteration continue; } //4. It is something else (not digit, not dot or exponent) - we're done break; //from while loop }//while int end = source.Position; details.Body = source.Text.Substring(start, end - start); return true; }
protected override bool ReadBody(ISourceStream source, CompoundTokenDetails details) { //remember start - it may be different from source.TokenStart, we may have skipped prefix int start = source.Position; char current = source.CurrentChar; if (current == '-' || current == '+') { details.Sign = current.ToString(); source.Position++; } //Figure out digits set string digits = GetDigits(details); bool isDecimal = !details.IsSet((short) (NumberFlags.Binary | NumberFlags.Octal | NumberFlags.Hex)); bool allowFloat = !IsSet(NumberFlags.IntOnly); bool foundDigits = false; while (!source.EOF()) { current = source.CurrentChar; //1. If it is a digit, just continue going if (digits.IndexOf(current) >= 0) { source.Position++; foundDigits = true; continue; } //2. Check if it is a dot in float number bool isDot = current == DecimalSeparator; if (allowFloat && isDot) { //If we had seen already a dot or exponent, don't accept this one; bool hasDotOrExp = details.IsSet((short) (NumberFlags.HasDot | NumberFlags.HasExp)); if (hasDotOrExp) break; //from while loop //In python number literals (NumberAllowPointFloat) a point can be the first and last character, //We accept dot only if it is followed by a digit if (digits.IndexOf(source.NextChar) < 0 && !IsSet(NumberFlags.AllowStartEndDot)) break; //from while loop details.Flags |= (int) NumberFlags.HasDot; source.Position++; continue; } //3. Check if it is int number followed by dot or exp symbol bool isExpSymbol = (details.ExponentSymbol == null) && ExponentSymbols.IndexOf(current) >= 0; if (!allowFloat && foundDigits && (isDot || isExpSymbol)) { //If no partial float allowed then return false - it is not integer, let float terminal recognize it as float if (IsSet(NumberFlags.AvoidPartialFloat)) return false; //otherwise break, it is integer and we're done reading digits break; } //4. Only for decimals - check if it is (the first) exponent symbol if (allowFloat && isDecimal && isExpSymbol) { char next = source.NextChar; bool nextIsSign = next == '-' || next == '+'; bool nextIsDigit = digits.IndexOf(next) >= 0; if (!nextIsSign && !nextIsDigit) break; //Exponent should be followed by either sign or digit //ok, we've got real exponent details.ExponentSymbol = current.ToString(); //remember the exp char details.Flags |= (int) NumberFlags.HasExp; source.Position++; if (nextIsSign) source.Position++; //skip +/- explicitly so we don't have to deal with them on the next iteration continue; } //4. It is something else (not digit, not dot or exponent) - we're done break; //from while loop }//while int end = source.Position; if (!foundDigits) return false; details.Body = source.Text.Substring(start, end - start); return true; }
protected override bool ReadBody(ISourceStream source, ScanDetails details) { if (!ReadStartSymbol(source, details)) return false; bool escapeEnabled = !details.IsSet(ScanFlags.DisableEscapes); bool ignoreCase = IsSet(TermOptions.SpecialIgnoreCase); int start = source.Position; string startS = details.ControlSymbol; string startS2 = startS + startS; //doubled start symbol //1. Find the string end // first get the position of the next line break; we are interested in it to detect malformed string, // therefore do it only if linebreak is NOT allowed; if linebreak is allowed, set it to -1 (we don't care). int nlPos = details.IsSet(ScanFlags.AllowLineBreak) ? -1 : source.Text.IndexOf('\n', source.Position); while (!source.EOF()) { int endPos = source.Text.IndexOf(startS, source.Position); //Check for malformed string: either EndSymbol not found, or LineBreak is found before EndSymbol bool malformed = endPos < 0 || nlPos >= 0 && nlPos < endPos; if (malformed) { //Set source position for recovery: move to the next line if linebreak is not allowed. if (nlPos > 0) endPos = nlPos; if (endPos > 0) source.Position = endPos + 1; details.Error = "Mal-formed string literal - cannot find termination symbol."; return true; } //We found EndSymbol - check if it is escaped; if yes, skip it and continue search if (escapeEnabled && source.Text[endPos - 1] == EscapeChar) { source.Position = endPos + startS.Length; continue; //searching for end symbol } //Check if it is doubled end symbol source.Position = endPos; if (details.IsSet(ScanFlags.AllowDoubledQuote) && source.MatchSymbol(startS2, ignoreCase)) { source.Position = endPos + startS.Length * 2; continue; }//checking for doubled end symbol //Ok, this is normal endSymbol that terminates the string. // Advance source position and get out from the loop details.Body = source.Text.Substring(start, endPos - start); source.Position = endPos + startS.Length; return true; //if we come here it means we're done - we found string end. } //end of loop to find string end; return false; }