//helper: undo the above Escape() effect and restores to its intended value, when an escaped string is retrieved. public string UnEscape(string escapedSectionValue) { StringBuilder buffer = new StringBuilder(); if (!string.IsNullOrEmpty(escapedSectionValue)) { char[] valueCharArray = escapedSectionValue.ToCharArray(); for (int i = 0; i < valueCharArray.Length; i++) { char curr = valueCharArray[i]; if (curr == EscapeChar && i < valueCharArray.Length - 1) { char next = valueCharArray[i + 1]; if (Delimiters.Contains(next) || next == EscapeChar) { continue; //skip the escape-char } } buffer.Append(curr); } } return buffer.ToString(); //un-escaped section value }
/// <summary> /// Tokenizes the given string into a list of tokens /// </summary> /// <param name="input">The string to tokenize</param> /// <returns>The list of tokens</returns> public List <T> Tokenize(string input) { List <T> tokens = new List <T>(); Token currentToken = null; insideStringLiteral = false; char currentCharacter; char?nextCharacter; int currentIndex = -1; int currentLine = 1; int currentColumn = 0; while (TryReadCharacter(input, ref currentIndex, out currentCharacter)) { char peeked; if (TryPeekCharacter(input, currentIndex, out peeked)) { nextCharacter = peeked; } else { nextCharacter = null; } currentColumn++; if (currentCharacter == '\n') { Tokenize_Whitespace(input, ref currentIndex, ref currentCharacter, ref currentLine, ref currentColumn, ref currentToken, tokens); currentLine++; currentColumn = 0; } else if (IsEscapeSequence(currentCharacter, nextCharacter)) { Tokenize_EscapeCharacter(input, ref currentIndex, ref currentCharacter, ref currentLine, ref currentColumn, ref currentToken, tokens); } else if (insideStringLiteral) { Tokenize_Plain(input, ref currentIndex, ref currentCharacter, ref currentLine, ref currentColumn, ref currentToken, tokens); } else if (Delimiters.Contains("" + currentCharacter)) { Tokenize_DelimiterCharacter(input, ref currentIndex, ref currentCharacter, ref currentLine, ref currentColumn, ref currentToken, tokens); } else if (char.IsWhiteSpace(currentCharacter)) { Tokenize_Whitespace(input, ref currentIndex, ref currentCharacter, ref currentLine, ref currentColumn, ref currentToken, tokens); } else { Tokenize_Plain(input, ref currentIndex, ref currentCharacter, ref currentLine, ref currentColumn, ref currentToken, tokens); } } FinalizeTokenIfNotNull(ref currentToken, tokens); return(tokens); }
public IEnumerable <Token <R> > ReadToScopeClose(R open, R close, Delimiters bracketPairs) { SkipSpace(); _stack.Clear(); Token <R> token = null; while (!End) { token = ReadToken(); bool literalCheck = !bracketPairs.Contains(token.ID, token.ID) || (_stack.Any() ? _stack.Peek().ID == token.ID : open == token.ID); if (literalCheck && (bracketPairs.ContainsClosing(token.ID) || token.ID == close)) // Allows nesting { // Since this method assumes that the first opening bracket was already read, an empty _stack indicates main scope closure. if (!_stack.Any() && token.ID == close) { yield break; } var lastOpening = _stack.Pop(); if (!bracketPairs.Contains(lastOpening.ID, token.ID)) { throw new RantException(_source, token, "Invalid closure '" + lastOpening.Value + " ... " + token.Value + "' - expected '" + RantLexer.Rules.GetSymbolForId(bracketPairs.GetClosing(lastOpening.ID)) + "'"); } } else if (bracketPairs.ContainsOpening(token.ID) || open == token.ID) // Allows nesting { _stack.Push(token); } yield return(token); } throw new RantException(_source, null, "Unexpected end of file; expected '" + RantLexer.Rules.GetSymbolForId(close) + "'."); }
private void Tokenize_Plain(string input, ref int currentIndex, ref char currentCharacter, ref int line, ref int col, ref Token currentToken, List <T> tokens) { if (currentCharacter == '"' && DoubleQuoteBehavior != PowerArgs.DoubleQuoteBehavior.NoSpecialHandling) { if (DoubleQuoteBehavior == PowerArgs.DoubleQuoteBehavior.IncludeQuotedTokensAsStringLiterals) { if (insideStringLiteral == false) { FinalizeTokenIfNotNull(ref currentToken, tokens); AppendToTokenSafe(ref currentToken, currentCharacter, currentIndex, line, col); insideStringLiteral = true; } else { AppendToTokenSafe(ref currentToken, currentCharacter, currentIndex, line, col); FinalizeTokenIfNotNull(ref currentToken, tokens); insideStringLiteral = false; } } else { throw new TokenizerException("Unknown double quote option: " + DoubleQuoteBehavior); } } else { AppendToTokenSafe(ref currentToken, currentCharacter, currentIndex, line, col); if (insideStringLiteral == false) { var t = currentToken; var delimiterMatch = (from d in Delimiters where t.EndsWithDelimiter(d) select d).OrderByDescending(d => d.Length); if (delimiterMatch.Count() == 0) { // do nothing } else { if (Delimiters.Contains(currentToken.Value)) { FinalizeTokenIfNotNull(ref currentToken, tokens); } else { var delimiter = delimiterMatch.First(); currentToken.Value = currentToken.Value.Substring(0, currentToken.Value.Length - delimiter.Length); var prevToken = currentToken; FinalizeTokenIfNotNull(ref currentToken, tokens); currentToken = CreateTokenForTokenizer(delimiter, prevToken.StartIndex + prevToken.Value.Length, prevToken.Line, prevToken.Column + prevToken.Value.Length); FinalizeTokenIfNotNull(ref currentToken, tokens); } } } } }
//helper: used for storing a section-value that conatins delimiters chars and/or escape char //but you want these chars to be ignored during encoding and decoding, so the stored value //can be retrived as one-piece rather than being fragmented through child-indexing public string Escape(string sectionValue) { StringBuilder escaped = new StringBuilder(); foreach (char c in sectionValue.ToCharArray()) { if (Delimiters.Contains(c) || EscapeChar == c) { escaped.Append(EscapeChar); } escaped.Append(c); } return escaped.ToString(); //escaped section value }
public IEnumerable <Token <R> > ReadToTokenInParentScope(R tokenType, Delimiters bracketPairs) { SkipSpace(); _stack.Clear(); Token <R> token = null; while (!End) { token = ReadToken(); if (token.ID == tokenType && !_stack.Any()) { yield break; } if (bracketPairs.ContainsOpening(token.ID)) // Allows nesting { _stack.Push(token); } else if (bracketPairs.ContainsClosing(token.ID)) // Allows nesting { // Since this method assumes that the first opening bracket was already read, an empty _stack indicates main scope closure. if (!_stack.Any()) { throw new RantException(_source, token, "Unexpected token '\{token.Value}'"); } var lastOpening = _stack.Pop(); if (!bracketPairs.Contains(lastOpening.ID, token.ID)) { throw new RantException(_source, token, "Invalid closure '" + lastOpening.Value + " ... " + token.Value + "' - expected '" + RantLexer.Rules.GetSymbolForId(bracketPairs.GetClosing(lastOpening.ID)) + "'"); } } yield return(token); } throw new RantException(_source, null, "Unexpected end of file; expected '\{RantLexer.Rules.GetSymbolForId(_stack.Any() ? bracketPairs.GetClosing(_stack.Peek().ID) : tokenType)}'."); }
private bool IsEscapeSequence(char current, char?next) { if (current != EscapeSequenceIndicator) { return(false); } else if (next.HasValue == false) { return(false); } else if (DoubleQuoteBehavior == PowerArgs.DoubleQuoteBehavior.IncludeQuotedTokensAsStringLiterals && next.Value == '"') { return(true); } else if (Delimiters.Contains(next.Value + "")) { return(true); } else { return(false); } }
public IEnumerable <IEnumerable <Token <R> > > ReadMultiItemScope(R open, R close, R separator, Delimiters bracketPairs) { SkipSpace(); _stack.Clear(); // Assumes first bracket was already read. _stack.Push(new Token <R>(open, RantLexer.Rules.GetSymbolForId(open))); int start = _pos; Token <R> token = null; while (!End) { // Peek but don't consume - this saves some calculations later on. token = PeekToken(); bool literalCheck = !bracketPairs.Contains(token.ID, token.ID) || _stack.Peek().ID == token.ID; // Closing bracket if (literalCheck && (bracketPairs.ContainsClosing(token.ID) || close == token.ID)) // Previous bracket allows nesting { var lastOpening = _stack.Pop(); // Handle invalid closures if (!bracketPairs.Contains(lastOpening.ID, token.ID)) // Not in main pair { throw new RantException(_source, token, "Invalid closure '" + lastOpening.Value + " ... " + token.Value + "' - expected '" + RantLexer.Rules.GetSymbolForId(bracketPairs.GetClosing(lastOpening.ID)) + "'"); } // If the _stack is empty, this is the last item. Stop the iterator. if (!_stack.Any()) { yield return(_tokens .SkipWhile((t, i) => i < start) // Cut to start of section .TakeWhile((t, i) => i < _pos - start) // Cut to end of section .SkipWhile(t => t.ID == R.Whitespace) // Remove leading whitespace .Reverse() // Reverse to trim end .SkipWhile(t => t.ID == R.Whitespace) // Remove trailing whitespace .Reverse() // Reverse back .ToArray()); _pos++; yield break; } } // Opening bracket else if (bracketPairs.ContainsOpening(token.ID) || open == token.ID) // Previous bracket allows nesting { _stack.Push(token); } // Separator else if (token.ID == separator && _stack.Count == 1) { yield return(_tokens .SkipWhile((t, i) => i < start) // Cut to start of section .TakeWhile((t, i) => i < _pos - start) // Cut to end of section .SkipWhile(t => t.ID == R.Whitespace) // Remove leading whitespace .Reverse() // Reverse to trim end .SkipWhile(t => t.ID == R.Whitespace) // Remove trailing whitespace .Reverse() // Reverse back .ToArray()); _pos++; start = _pos; continue; } // Move to next position _pos++; } throw new RantException(_source, null, "Unexpected end of file - expected '" + RantLexer.Rules.GetSymbolForId(close) + "'."); }
public bool Contains(T item) { return(Delimiters.Contains(item)); }
public OuterLexemes Parse(string code) { int lineNumber = 1; var lexemeBuilder = new StringBuilder(); foreach (string symbol in code.Select(c => c.ToString())) { int realLineNumber = lineNumber; if (symbol.Equals("\n")) { lineNumber++; } if (TrimDelimiters.Contains(symbol) || Delimiters.Contains(symbol)) { string lexeme = lexemeBuilder.ToString(); if (PredefinedWords.Contains(lexeme) || Operators.Contains(lexeme)) { AddLexeme(lexeme, realLineNumber); } else if (IsIdentifier(lexeme)) { // Duplicated identifier if (Lexemes.Any() && Lexemes.Last().SubString.Equals("var") && Identifiers.Any(e => e.Name.Equals(lexeme))) { AddError($"Duplicate declaration of {lexeme} identifier", realLineNumber); lexemeBuilder.Clear(); continue; } // Usage of undeclared identifier if (Lexemes.Any() && !Lexemes.Last().SubString.Equals("var") && !Lexemes.Last().SubString.Equals("program") && !Identifiers.Any(e => e.Name.Equals(lexeme))) { AddError($"Usage of undeclared identifier: {lexeme}", realLineNumber); lexemeBuilder.Clear(); continue; } AddIdentifier(lexeme); AddLexeme(lexeme, realLineNumber, IdentifierType.Identifier); } else if (IsConstant(lexeme)) { AddConstant(lexeme); AddLexeme(lexeme, realLineNumber, IdentifierType.Constant); } else if (!string.IsNullOrEmpty(lexeme)) { AddError($"Unknown lexeme: {lexeme}", realLineNumber); lexemeBuilder.Clear(); continue; } if (Delimiters.Contains(symbol)) { AddLexeme(symbol, realLineNumber); } lexemeBuilder.Clear(); continue; } if (!TrimDelimiters.Contains(symbol)) { lexemeBuilder.Append(symbol); } } return(new OuterLexemes() { Identifiers = Identifiers, Constants = Constants, Lexemes = Lexemes, Errors = Errors, Grammar = GrammarLexemes }); }
private void Tokenize_Plain(string input, ref int currentIndex, ref char currentCharacter, int line, ref int col, ref T currentToken, List <T> tokens) { if (currentCharacter == '"' && DoubleQuoteBehavior != PowerArgs.DoubleQuoteBehavior.NoSpecialHandling) { if (DoubleQuoteBehavior == PowerArgs.DoubleQuoteBehavior.IncludeQuotedTokensAsStringLiterals) { if (insideStringLiteral == false) { FinalizeTokenIfNotNull(ref currentToken, tokens); AppendToTokenSafe(ref currentToken, currentCharacter, currentIndex, line, col); insideStringLiteral = true; } else { AppendToTokenSafe(ref currentToken, currentCharacter, currentIndex, line, col); FinalizeTokenIfNotNull(ref currentToken, tokens); insideStringLiteral = false; } } else { throw new TokenizerException("Unknown double quote option: " + DoubleQuoteBehavior); } } else { AppendToTokenSafe(ref currentToken, currentCharacter, currentIndex, line, col); if (insideStringLiteral == false) { var t = currentToken; string bestDelimiter = null; for (var i = 0; i < Delimiters.Count; i++) { var d = Delimiters[i]; if (CurrentTokenEndsWith(currentToken, d)) { if (bestDelimiter == null || d.Length > bestDelimiter.Length) { bestDelimiter = d; } } } if (bestDelimiter == null) { // do nothing } else { if (Delimiters.Contains(currentToken.Value)) { FinalizeTokenIfNotNull(ref currentToken, tokens); } else { var delimiter = bestDelimiter; currentTokenIndex -= delimiter.Length; var prevToken = currentToken; FinalizeTokenIfNotNull(ref currentToken, tokens); currentToken = TokenFactory(prevToken.StartIndex + prevToken.Value.Length, prevToken.Line, prevToken.Column + prevToken.Value.Length); currentToken.SourceFileLocation = this.SourceFileLocation; for (var i = 0; i < delimiter.Length; i++) { AppendToTokenSafe(ref currentToken, delimiter[i], prevToken.StartIndex + prevToken.Value.Length + i, prevToken.Line, prevToken.Column + prevToken.Value.Length + i); } FinalizeTokenIfNotNull(ref currentToken, tokens); } } } } }