/// <summary> /// This function will scan a secquence of characters beginning with a digit. /// The function will traverse the stream until it sees a character that is no longer recognized as a digit. /// If the non-digit character is a period (.) and it is the first period in the sequence, the function will treat the following digits as floating points. /// If there are no digits after the period character, the scanner will append a 0 to the value, to ensure the value is a float in the target language. /// <example>The following sequences are recognized as numeric: /// <code> /// 2 /// 42 /// 42.1 /// 54. # Will become 54.0 /// </code> /// </example> /// </summary> private void ScanNumeric() { bool isFloat = false; string subString = CurrentChar.ToString(); while (recogniser.IsDigit(Peek())) { Pop(); subString += CurrentChar; } // Make sure it isn't a range if (Peek() == '.' && Peek(2) != '.') { isFloat = !isFloat; Pop(); subString += CurrentChar; while (recogniser.IsDigit(Peek())) { Pop(); subString += CurrentChar; } if (subString.Last() == '.') { subString += "0"; } } ScannerToken token = Token(TokenType.NUMERIC, subString); token.SymbolicType = new TypeContext(TokenType.NUMERIC); token.SymbolicType.IsFloat = isFloat; Tokens.AddLast(token); }
private void ParseChar() { var key = Key.None; var modifiers = ModifierKeys.None; var ch = CurrentChar.ToString(); KeyPressInfo knownKeyPress = KeyboardLayout.Instance.GetKeyGestureForChar(ch[0]); if (knownKeyPress != null) { key = knownKeyPress.Key; modifiers = knownKeyPress.Modifiers; } else { if (char.IsUpper(ch, 0)) { ch = ch.ToLower(); modifiers = ModifierKeys.Shift; } key = (Key) new KeyConverter().ConvertFromInvariantString(ch); } if (key != Key.None) { Add(new KeyPressInfo(key, modifiers)); current++; } }
/// <summary> /// Calls GetNextChar() to skip over comment characters. /// </summary> public void SkipComment(Regex commentEndRegex) { while (!commentEndRegex.IsMatch(CurrentChar.ToString() + JavaFile.PeekNextChar()) && !JavaFile.EndOfFile()) { JavaFile.GetNextChar(); } }
protected override void Extract() { if (CurrentChar.IsSymbol()) { if (CurrentChar == '/' && PeekChar == '>') { type = KeywordToken.ShortClose; text = KeywordToken.ShortClose.GetText(); NextChar(); } else if (CurrentChar == '<' && PeekChar == '/') { NextChar(2); var stringBuilder = new StringBuilder(); while (CurrentChar != '>') { stringBuilder.Append(CurrentChar); NextChar(); } // NextChar(); text = stringBuilder.ToString(); // force file ending when w:document closes type = text == "w:document"? KeywordToken.EOF : KeywordToken.Close; } else { type = CurrentChar.ToTokenType(); text = CurrentChar.ToString(); } } NextChar(); }
/// <summary> /// This function will scan a sequence of characters, providing a comment token. /// The function will traverse the sequence, until it reaches the end of line or end of file character. /// </summary> private void ScanComment() { string subString = CurrentChar.ToString(); while (!IsEOL(Peek()) && !IsEOF(Peek())) { Pop(); subString += CurrentChar; } Tokens.AddLast(Token(TokenType.COMMENT, subString)); }
public Token Next() { var tokenDescription = GetTokenDescriptionFromChar(CurrentChar); var stringBuilder = new StringBuilder(CurrentChar.ToString()); while (tokenDescription.Match(NextChar()) && CurrentChar != EOF) { stringBuilder.Append(CurrentChar); } return(new Token(tokenDescription.Type, stringBuilder.ToString())); }
private void HandleKeyword() { if (TestKeywords()) { return; } else { EmitToken(TokenKind.IdentifierToken, CurrentChar.ToString()); Next(); return; } }
/// <summary> /// This function will scan a sequence of characters, providing a multiline comment token. /// The function will traverse the sequence, until it meets a sequence representing the end of a multiline comment. /// <example>An example of a valid multiline comment: /// <c> /// <# This is a valid multiline comment #> /// <# This is also /// a valid multiline comment#> /// </c> /// </example> /// <exception cref="Lexer.Exceptions.InvalidSyntaxException">When the traversed sequence is not closed by a quotation symbol</exception> /// </summary> private void ScanMultiLineComment() { string subString = CurrentChar.ToString(); while (!IsEOF(Peek()) && !subString.Contains(">#")) { Pop(); subString += CurrentChar; } if (!subString.Contains(">#")) { new InvalidSyntaxException($"Multiline comments must be closed before reaching end of file. Error at line {Line}:{Offset}."); } Tokens.AddLast(Token(TokenType.MULT_COMNT, subString)); }
/// <summary> /// This function will scan a sequence of characters providing a range token to the token list. /// The function will traverse while the next character is a period (.) symbol. /// <exception cref="Lexer.Exceptions.InvalidSyntaxException">When the traversed sequence does not produce a token with the value of exavtly '..'</exception> /// </summary> private void ScanRange() { string subString = CurrentChar.ToString(); // This should in reality only run once while (Peek() == '.') { Pop(); subString += CurrentChar; } if (subString.Length != 2) { new InvalidSyntaxException($"Invalid range symbol. Range symbol must be '..' but was '{subString}'. Error at line {Line}:{Offset}."); } Tokens.AddLast(Token(TokenType.OP_RANGE)); }
/// <summary> /// Reads a literal string until the ending quote is found. /// Adds 3 tokens: 2 quote tokens, 1 literal token. /// </summary> /// <example> /// "literal" => QuoteT -> LiteralT -> QuoteT /// </example> public void ProcessLiteral() { Token = Tokens.LiteralT; Lexeme = CurrentChar.ToString(); while (JavaFile.PeekNextChar() != '\"' && JavaFile.PeekNextChar() != '\n' && !JavaFile.EndOfFile()) { JavaFile.GetNextChar(); Lexeme += CurrentChar; } if (JavaFile.PeekNextChar() == '\"') { JavaFile.GetNextChar(); Lexeme += CurrentChar.ToString(); } }
protected string PickName() { if (End) { return(null); } Skip(); var from = Index; if (!IsNameStart) { return(null); } if (CurrentChar == '-') { var nc = NextChar; if (nc.HasValue) { if (!Regex.IsMatch(nc.ToString(), @"[A-Za-zÀ-ÿ\-_]")) { return(null); } _index += 2; // skip the first two } else { return(null); } } char c; while (!End && Regex.IsMatch(CurrentChar.ToString(), @"[A-Za-zÀ-ÿ\-_0-9]")) { _index++; } if (from == Index) { return(null); } var to = Index; wasWhitespace = false; Skip(); return(css.Substring(from, to - from)); }
/// <summary> /// This function will scan a sequence of characters, providing a string token. /// The function will traverse the sequence, until it meets a quotation (") symbol. /// <example>An example of a valid string: /// <c> /// "Hello, World!" /// "I am a valid string,.!-/()*?#$" /// </c> /// </example> /// <exception cref="Lexer.Exceptions.InvalidSyntaxException">When the traversed sequence is not closed by a quotation symbol</exception> /// </summary> private void ScanString() { string subString = CurrentChar.ToString(); while (Peek() != '"' && !IsEOF(Peek())) { Pop(); subString += CurrentChar; } if (NextChar == '"') { Pop(); subString += CurrentChar; } if (!subString.EndsWith('"')) { new InvalidSyntaxException($"Strings must be closed. Error at line {Line}:{Offset}."); } Tokens.AddLast(Token(TokenType.STRING, subString)); }
/// <summary> /// Check if a particular file name is alphanumeric or has underscore /// </summary> /// <param name="checkedValue">true: valid file name; false: invalid</param> /// <returns></returns> public static bool IsValidFileName(string fileName) { // TODO: Code review issue 24/05/2005 - KB // All this can be done using 1-line regular expressions (^\w(\w|\s)*$) bool IsValid = false; string validString = "abcdefghijklmnopqrstuvwxyz0123456789_ "; foreach (char CurrentChar in fileName.ToLower().ToCharArray()) { if (validString.IndexOf(CurrentChar.ToString(), 0) == -1) { IsValid = false; break; } //if we come here, the string is valid IsValid = true; } return(IsValid); }
/// <summary> /// Parses an identifier the ECMA-55 rules. /// </summary> /// <param name="c">The first character of the parsed identifier.</param> private IToken ParseIdent() { var strValueSb = new StringBuilder(CurrentChar.ToString()); NextChar(); while (IsDigit(CurrentChar) || IsLetter(CurrentChar)) { strValueSb.Append(CurrentChar); NextChar(); } var strValue = strValueSb.ToString().ToUpperInvariant(); if (_keyWordsMap.ContainsKey(strValue)) { return(new SimpleToken(_keyWordsMap[strValue])); } else { return(new IdentifierToken(strValue)); } }
/// <summary> /// Reads the first token char into Lexeme, which is then /// used to determine which function to use for processing. /// </summary> public void ProcessToken() { JavaFile.GetNextChar(); Lexeme = CurrentChar.ToString(); if (commentStartRegex.IsMatch(Lexeme + JavaFile.PeekNextChar())) { // skip the comment then continue processing the token ProcessComment(); GetNextToken(); } else if (char.IsLetter(Lexeme[0])) { ProcessWordToken(); } else if (char.IsDigit(Lexeme[0])) { ProcessNumToken(); } else if (comparisonRegex.IsMatch(Lexeme) && lookAheadCharRegex.IsMatch(JavaFile.PeekNextChar().ToString())) { ProcessDoubleToken(); } else if (specialCharRegex.IsMatch(Lexeme)) { ProcessSingleToken(); } else if (Lexeme == "\"") { ProcessLiteral(); } else { Token = Tokens.UnknownT; } }
internal override AttributeCondition DoParse() { // http://www.w3schools.com/cssref/css_selectors.asp if (CurrentChar != '[') { return(null); } Index++; // skip '[' var attr = PickName(); if (attr == null) { AddError(ErrorCode.ExpectingToken, "condition"); var cond = SkipTillEnd(); return(new AttributeCondition()); } if (End || CurrentChar == ']') { if (End) { AddError(ErrorCode.UnexpectedEnd, "]"); } else { Index++; // skip ']' } return(new AttributeCondition { Attribute = attr, ConditionType = AttributeCondition.Type.HavingAttribute, }); } string sep; AttributeCondition.Type type; switch (CurrentChar) { case '=': type = AttributeCondition.Type.Equals; sep = "="; Index++; // skip the '=' break; case '~': case '|': case '^': case '$': case '*': if (NextChar != '=') { AddError(ErrorCode.ExpectingToken, "="); SkipTillEnd(); return(new AttributeCondition()); } type = (AttributeCondition.Type)CurrentChar; sep = CurrentChar + "="; Index += 2; // skip the token break; default: AddError(ErrorCode.UnexpectedToken, CurrentChar.ToString()); SkipTillEnd(); return(new AttributeCondition()); } var val = PickString(); if (val == null) { var ret = SkipTillEnd(); AddError(ErrorCode.ExpectingValue, ret); SkipTillEnd(); return(new AttributeCondition()); } if (End) { AddError(ErrorCode.ExpectingToken, "]"); } else if (CurrentChar != ']') { AddError(ErrorCode.ExpectingToken, "]"); SkipTillEnd(); return(new AttributeCondition()); } Index++; // skip ']' return(new AttributeCondition { Attribute = attr, Value = val, ConditionType = type, }); }
public static string PCase(string strInput) { int I; string CurrentChar, PrevChar; string strOutput; PrevChar = ""; strOutput = ""; for (I = 1; I <= strInput.Length; I++) { CurrentChar = strInput.Substring(I - 1, 1); switch (PrevChar) { case "": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; case " ": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; case ".": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; case "-": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; case ",": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; case "\"": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; case "'": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; default: switch (strOutput.ToUpper().Trim()) { case "MC": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; case "MAC": strOutput = strOutput + CurrentChar.ToString(); break; case "O'": strOutput = strOutput + CurrentChar.ToString().ToUpper(); break; default: strOutput = strOutput + CurrentChar.ToString().ToLower(); break; } break; } PrevChar = CurrentChar; } return(strOutput); }
public bool NextToken() { bool hasWildcard; bool field; SkipWhiteSpaces(); SaveLineInfo(); IsPhrase = false; switch (CurrentChar) { case '\0': CurrentToken = Token.Eof; StringValue = string.Empty; return(false); case '(': CurrentToken = Token.LParen; StringValue = CurrentChar.ToString(); NextChar(); break; case ')': CurrentToken = Token.RParen; StringValue = CurrentChar.ToString(); NextChar(); break; case '[': CurrentToken = Token.LBracket; StringValue = CurrentChar.ToString(); NextChar(); break; case ']': CurrentToken = Token.RBracket; StringValue = CurrentChar.ToString(); NextChar(); break; case '{': CurrentToken = Token.LBrace; StringValue = CurrentChar.ToString(); NextChar(); break; case '}': CurrentToken = Token.RBrace; StringValue = CurrentChar.ToString(); NextChar(); break; case ',': CurrentToken = Token.Comma; StringValue = CurrentChar.ToString(); NextChar(); break; case ':': CurrentToken = Token.Colon; StringValue = CurrentChar.ToString(); NextChar(); break; case '+': CurrentToken = Token.Plus; StringValue = CurrentChar.ToString(); NextChar(); break; case '-': CurrentToken = Token.Minus; StringValue = CurrentChar.ToString(); NextChar(); break; case '^': CurrentToken = Token.Circ; StringValue = CurrentChar.ToString(); NextChar(); break; case '~': CurrentToken = Token.Tilde; StringValue = CurrentChar.ToString(); NextChar(); break; case '!': CurrentToken = Token.Not; StringValue = CurrentChar.ToString(); NextChar(); break; case '"': case '\'': StringValue = ScanQuotedString(out hasWildcard, out field, out var isPhrase); CurrentToken = hasWildcard ? Token.WildcardString : field ? Token.Field : Token.String; IsPhrase = isPhrase; break; case '&': NextChar(); if (CurrentChar != '&') { throw new ParserException("Invalid operator: &", CreateLastLineInfo()); } CurrentToken = Token.And; StringValue = "&&"; NextChar(); SkipWhiteSpaces(); break; case '|': NextChar(); if (CurrentChar != '|') { throw new ParserException("Invalid operator: |", CreateLastLineInfo()); } CurrentToken = Token.Or; StringValue = "||"; NextChar(); SkipWhiteSpaces(); break; case '<': NextChar(); if (CurrentChar == '=') { CurrentToken = Token.LTE; StringValue = "<="; NextChar(); SkipWhiteSpaces(); } else if (CurrentChar == '>') { CurrentToken = Token.NEQ; StringValue = "<>"; NextChar(); SkipWhiteSpaces(); } else { CurrentToken = Token.LT; StringValue = "<"; } break; case '>': NextChar(); if (CurrentChar == '=') { CurrentToken = Token.GTE; StringValue = ">="; NextChar(); SkipWhiteSpaces(); } else { CurrentToken = Token.GT; StringValue = ">"; } break; // ----------------------------------- default: if (CurrentCharType == CharType.Digit) { if (ScanNumber(out var numberValue, out var stringValue, out hasWildcard, out field)) { CurrentToken = Token.Number; StringValue = stringValue; NumberValue = numberValue; } else { CurrentToken = hasWildcard ? Token.WildcardString : Token.String; StringValue = stringValue; } } else { StringValue = ScanNonQuotedString(out hasWildcard, out field, out var keyword); if (keyword) { CurrentToken = Keywords.ScanControl(StringValue, CreateLastLineInfo()); } else if (hasWildcard) { CurrentToken = Token.WildcardString; } else if (field) { CurrentToken = Token.Field; } else { CurrentToken = Keywords.ScanKeyword(StringValue); } SkipWhiteSpaces(); } break; } return(true); }
internal override Selector DoParse() { if (End) { return(null); } var lst = new SelectorList(); bool expectingNext = false; while (true) { bool end = false; switch (CurrentChar) { default: var sel = DoParseOne(); if (sel == null) { // that's a warning end = true; } else { lst.Selectors.Add(sel); } break; case ';': case '{': case '}': case ')': // stoop end = true; break; } if (expectingNext && end) { // we've got a token after a ',' if (!End) { AddError(ErrorCode.UnexpectedToken, CurrentChar.ToString()); } } expectingNext = !End && CurrentChar == ','; if (expectingNext) { Index++; } if (End || !expectingNext) { end = true; } if (end) { if (lst.Selectors.Count == 0) { return(null); } if (lst.Selectors.Count == 1) { return(lst.Selectors[0]); } return(lst); } } }
protected virtual void Extract() { text = CurrentChar.ToString(); value = null; NextChar(); }
private bool MultiCharacterOperatorToken() { var lexeme = CurrentChar.ToString() + PeekChar.ToString(); return(OperatorToken(_multiCharacterOperators, lexeme)); }
private bool SingleCharacterOperatorToken() { return(OperatorToken(_singleCharacterOperators, CurrentChar.ToString())); }
private void ScanSeparator() { AddToken(CurrentChar.ToString(), TokenType.SEPARATOR); Advance(); }
/// <summary> /// This function will scan a sequence of characters providing either a VAR token, a KEYWORD token, an INDEX token, a QUESTIONMARK token, or any ARRAY token. /// This is done by traversing the sequence of characters until it meets either a newline or any of the symbols representing any of the rest of the tokens. /// The time to look up a keyword is O(1) as this is done by using a hashed value in a dictionary. /// This function can produce multiple tokens within itself. /// <example> An example of possible accepted tokens: /// <c> /// a # Variable name /// is # Keyword /// for # Keyword /// a@2 # While the 2 is not recognised in this function this will result in tokens "VAR ARRAYINDEX NUMERIC" /// a[4] # This will result in the tokens "VAR ARRAYLEFT NUMERIC ARRAYRIGHT" /// </c> /// </example> /// </summary> private void ScanWord() { string subString = CurrentChar.ToString(); ScannerToken token; while (recogniser.IsAcceptedCharacter(Peek()) || recogniser.IsDigit(Peek())) { subString += Pop(); } if (Regex.Match(subString.ToLower(), "(a|d)pin\\d+").Success) { if (subString.StartsWith("a")) { token = Token(TokenType.APIN, "A" + subString.Substring(4)); token.SymbolicType = new TypeContext(TokenType.APIN); Tokens.AddLast(token); } else { token = Token(TokenType.DPIN, subString.Substring(4)); token.SymbolicType = new TypeContext(TokenType.DPIN); Tokens.AddLast(token); } return; } if (Keywords.Keys.TryGetValue(subString, out TokenType tokenType)) { if (tokenType == TokenType.BOOL) { if (subString.ToLower() == "on") { token = Token(tokenType, "true"); token.SymbolicType = new TypeContext(TokenType.BOOL); Tokens.AddLast(token); } else if (subString.ToLower() == "off") { token = Token(tokenType, "false"); token.SymbolicType = new TypeContext(TokenType.BOOL); Tokens.AddLast(token); } else { token = Token(tokenType, subString); token.SymbolicType = new TypeContext(TokenType.BOOL); Tokens.AddLast(token); } } else { token = Token(tokenType); if (TokenTypeExpressions.IsOperator(tokenType)) { token.SymbolicType = new TypeContext(tokenType); } Tokens.AddLast(token); } return; } token = Token(TokenType.VAR, subString); if (Tokens.Any() && (Tokens.Last().Type == TokenType.FUNC || Tokens.Last().Type == TokenType.CALL)) { token.SymbolicType = new TypeContext(TokenType.FUNC); } else { token.SymbolicType = new TypeContext(TokenType.VAR); } if (Peek() == '@') { Pop(); token.SymbolicType = new TypeContext(TokenType.ARRAYINDEX); token.Type = TokenType.ARRAYINDEX; } Tokens.AddLast(token); subString = ""; }