} //method protected virtual void ReadSuffix(ISourceStream source, ScanDetails details) { if (_suffixesFirsts.IndexOf(source.CurrentChar) < 0) { return; } bool ignoreCase = IsSet(TermOptions.SpecialIgnoreCase); foreach (string sfx in Suffixes) { if (!source.MatchSymbol(sfx, ignoreCase)) { continue; } //We found suffix details.Suffix = sfx; source.Position += sfx.Length; //Set TypeCode from suffix TypeCode[] codes; if (!string.IsNullOrEmpty(details.Suffix) && SuffixTypeCodes.TryGetValue(details.Suffix, out codes)) { details.TypeCodes = codes; } return; } //foreach } //method
private char ReadUnicodeEscape(ISourceStream source, ScanDetails details) { //Position is currently at "\" symbol source.Position++; //move to U/u char int len; switch (source.CurrentChar) { case 'u': len = 4; break; case 'U': len = 8; break; default: details.Error = "Invalid escape symbol, expected 'u' or 'U' only."; return('\0'); } if (source.Position + len > source.Text.Length) { details.Error = "Invalid escape symbol"; return('\0'); } source.Position++; //move to the first digit string digits = source.Text.Substring(source.Position, len); char result = (char)Convert.ToUInt32(digits, 16); source.Position += len; details.Flags |= ScanFlags.HasEscapes; return(result); }
protected virtual void ReadPrefix(ISourceStream source, ScanDetails details) { if (_prefixesFirsts.IndexOf(source.CurrentChar) < 0) { return; } bool ignoreCase = IsSet(TermOptions.SpecialIgnoreCase); foreach (string pfx in Prefixes) { if (!source.MatchSymbol(pfx, ignoreCase)) { continue; } //We found prefix details.Prefix = pfx; source.Position += pfx.Length; //Set numeric base flag from prefix ScanFlags pfxFlags; if (!string.IsNullOrEmpty(details.Prefix) && PrefixFlags.TryGetValue(details.Prefix, out pfxFlags)) { details.Flags |= pfxFlags; } return; } //foreach } //method
protected override bool ReadBody(ISourceStream source, ScanDetails details) { if (!ReadStartSymbol(source, details)) { return(false); } bool escapeEnabled = !details.IsSet(ScanFlags.DisableEscapes); bool ignoreCase = IsSet(TermOptions.SpecialIgnoreCase); int start = source.Position; string startS = details.ControlSymbol; string startS2 = startS + startS; //doubled start symbol //1. Find the string end // first get the position of the next line break; we are interested in it to detect malformed string, // therefore do it only if linebreak is NOT allowed; if linebreak is allowed, set it to -1 (we don't care). int nlPos = details.IsSet(ScanFlags.AllowLineBreak) ? -1 : source.Text.IndexOf('\n', source.Position); while (!source.EOF()) { int endPos = source.Text.IndexOf(startS, source.Position); //Check for malformed string: either EndSymbol not found, or LineBreak is found before EndSymbol bool malformed = endPos < 0 || nlPos >= 0 && nlPos < endPos; if (malformed) { //Set source position for recovery: move to the next line if linebreak is not allowed. if (nlPos > 0) { endPos = nlPos; } if (endPos > 0) { source.Position = endPos + 1; } details.Error = "Mal-formed string literal - cannot find termination symbol."; return(true); } //We found EndSymbol - check if it is escaped; if yes, skip it and continue search if (escapeEnabled && source.Text[endPos - 1] == EscapeChar) { source.Position = endPos + startS.Length; continue; //searching for end symbol } //Check if it is doubled end symbol source.Position = endPos; if (details.IsSet(ScanFlags.AllowDoubledQuote) && source.MatchSymbol(startS2, ignoreCase)) { source.Position = endPos + startS.Length * 2; continue; }//checking for doubled end symbol //Ok, this is normal endSymbol that terminates the string. // Advance source position and get out from the loop details.Body = source.Text.Substring(start, endPos - start); source.Position = endPos + startS.Length; return(true); //if we come here it means we're done - we found string end. } //end of loop to find string end; return(false); }
}//method private bool QuickConvertToDouble(ScanDetails details) { if (details.IsSet(ScanFlags.Binary | ScanFlags.Octal | ScanFlags.Hex | ScanFlags.HasExp)) { return(false); } if (DecimalSeparator != '.') { return(false); } double result; #if PocketPC || SILVERLIGHT try { result = Convert.ToDouble(details.Body, CultureInfo.InvariantCulture); } catch { return(false); } #else if (!double.TryParse(details.Body, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out result)) { return(false); } #endif details.Value = result; return(true); }
protected override bool ReadBody(ISourceStream source, ScanDetails details) { //remember start - it may be different from source.TokenStart, we may have skipped int start = source.Position; //Figure out digits set string digits = GetDigits(details); bool isDecimal = !details.IsSet(ScanFlags.NonDecimal); bool allowFloat = !IsSet(TermOptions.NumberIntOnly); while (!source.EOF()) { char current = source.CurrentChar; //1. If it is a digit, just continue going if (digits.IndexOf(current) >= 0) { source.Position++; continue; } //2. Check if it is a dot if (current == DecimalSeparator && allowFloat) { //If we had seen already a dot or exponent, don't accept this one; //In python number literals (NumberAllowPointFloat) a point can be the first and last character, //otherwise we accept dot only if it is followed by a digit if (details.IsSet(ScanFlags.HasDotOrExp) || (digits.IndexOf(source.NextChar) < 0) && !IsSet(TermOptions.NumberAllowStartEndDot)) { break; //from while loop } details.Flags |= ScanFlags.HasDot; source.Position++; continue; } //3. Only for decimals - check if it is (the first) exponent symbol if (allowFloat && isDecimal && (details.ControlSymbol == null) && (ExponentSymbols.IndexOf(current) >= 0)) { char next = source.NextChar; bool nextIsSign = next == '-' || next == '+'; bool nextIsDigit = digits.IndexOf(next) >= 0; if (!nextIsSign && !nextIsDigit) { break; //Exponent should be followed by either sign or digit } //ok, we've got real exponent details.ControlSymbol = current.ToString(); //remember the exp char details.Flags |= ScanFlags.HasExp; source.Position++; if (nextIsSign) { source.Position++; //skip +/- explicitly so we don't have to deal with them on the next iteration } continue; } //4. It is something else (not digit, not dot or exponent) - we're done break; //from while loop }//while int end = source.Position; details.Body = source.Text.Substring(start, end - start); return(true); }
}//method protected override void ReadSuffix(ISourceStream source, ScanDetails details) { base.ReadSuffix(source, details); if (string.IsNullOrEmpty(details.Suffix)) { details.TypeCodes = details.IsSet(ScanFlags.HasDotOrExp) ? _defaultFloatTypes : DefaultIntTypes; } }
protected override void ReadPrefix(ISourceStream source, ScanDetails details) { //check that is not a 0 followed by dot; //this may happen in Python for number "0.123" - we can mistakenly take "0" as octal prefix if (source.CurrentChar == '0' && source.NextChar == '.') { return; } base.ReadPrefix(source, details); }//method
protected virtual bool ConvertValue(ScanDetails details) { details.Value = details.Body; //Fire event and give a chance to custom code to convert the value if (ConvertingValue != null) { bool result = OnConvertingValue(details); return(result); } return(false); }
protected virtual bool OnConvertingValue(ScanDetails details) { if (ConvertingValue == null) { return(false); } ScannerConvertingValueEventArgs args = new ScannerConvertingValueEventArgs(details); ConvertingValue(this, args); return(args.Converted); }
protected override bool ConvertValue(ScanDetails details) { if (details.IsSet(ScanFlags.IncludePrefix)) { details.Value = details.Prefix + details.Body; } else { details.Value = details.Body; } return(true); }
//radix^safeWordLength private ulong GetSafeWordRadix(ScanDetails details) { if (details.IsSet(ScanFlags.Hex)) { return(1152921504606846976); } if (details.IsSet(ScanFlags.Octal)) { return(9223372036854775808); } if (details.IsSet(ScanFlags.Binary)) { return(9223372036854775808); } return(10000000000000000000); }
private int GetSafeWordLength(ScanDetails details) { if (details.IsSet(ScanFlags.Hex)) { return(15); } if (details.IsSet(ScanFlags.Octal)) { return(21); //maxWordLength 22 } if (details.IsSet(ScanFlags.Binary)) { return(63); } return(19); //maxWordLength 20 }
private string GetDigits(ScanDetails details) { if (details.IsSet(ScanFlags.Hex)) { return(TextUtils.HexDigits); } if (details.IsSet(ScanFlags.Octal)) { return(TextUtils.OctalDigits); } if (details.IsSet(ScanFlags.Binary)) { return(TextUtils.BinaryDigits); } return(TextUtils.DecimalDigits); }
private int GetRadix(ScanDetails details) { if (details.IsSet(ScanFlags.Hex)) { return(16); } if (details.IsSet(ScanFlags.Octal)) { return(8); } if (details.IsSet(ScanFlags.Binary)) { return(2); } return(10); }
private bool ConvertToBigInteger(ScanDetails details) { //ignore leading zeros details.Body = details.Body.TrimStart('0'); int bodyLength = details.Body.Length; int radix = GetRadix(details); int wordLength = GetSafeWordLength(details); int sectionCount = GetSectionCount(bodyLength, wordLength); ulong[] numberSections = new ulong[sectionCount]; //big endian try { int startIndex = details.Body.Length - wordLength; for (int sectionIndex = sectionCount - 1; sectionIndex >= 0; sectionIndex--) { if (startIndex < 0) { wordLength += startIndex; startIndex = 0; } //workaround for .Net FX bug: http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448 if (radix == 10) { numberSections[sectionIndex] = Convert.ToUInt64(details.Body.Substring(startIndex, wordLength)); } else { numberSections[sectionIndex] = Convert.ToUInt64(details.Body.Substring(startIndex, wordLength), radix); } startIndex -= wordLength; } } catch { details.Error = "Invalid number."; return(false); } //produce big integer ulong safeWordRadix = GetSafeWordRadix(details); BigInteger bigIntegerValue = numberSections[0]; for (int i = 1; i < sectionCount; i++) { bigIntegerValue = checked (bigIntegerValue * safeWordRadix + numberSections[i]); } details.Value = bigIntegerValue; return(true); }
protected override bool ReadBody(ISourceStream source, ScanDetails details) { int start = source.Position; bool allowEscapes = !details.IsSet(ScanFlags.DisableEscapes); CharList outputChars = new CharList(); while (!source.EOF()) { char current = source.CurrentChar; if (_terminators.IndexOf(current) >= 0) { break; } if (allowEscapes && current == this.EscapeChar) { current = ReadUnicodeEscape(source, details); //We need to back off the position. ReadUnicodeEscape sets the position to symbol right after escape digits. //This is the char that we should process in next iteration, so we must backup one char, to pretend the escaped // char is at position of last digit of escape sequence. source.Position--; if (details.HasError()) { return(false); } } //Check if current character is OK if (!CharOk(current, source.Position == start)) { break; } //Check if we need to skip this char UnicodeCategory currCat = char.GetUnicodeCategory(current); //I know, it suxx, we do it twice, fix it later if (!this.CharsToRemoveCategories.Contains(currCat)) { outputChars.Add(current); //add it to output (identifier) } source.Position++; }//while if (outputChars.Count == 0) { return(false); } //Convert collected chars to string details.Body = new string(outputChars.ToArray()); return(!string.IsNullOrEmpty(details.Body)); }
}//method private bool TryConvertToUlong(ScanDetails details) { try { int radix = GetRadix(details); //workaround for .Net FX bug: http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448 if (radix == 10) { details.Value = Convert.ToUInt64(details.Body, CultureInfo.InvariantCulture); } else { details.Value = Convert.ToUInt64(details.Body, radix); } return(true); } catch (OverflowException) { return(false); } }
private bool TryCastToIntegerType(TypeCode typeCode, ScanDetails details) { if (details.Value == null) { return(false); } try { if (typeCode != TypeCode.UInt64) { details.Value = Convert.ChangeType(details.Value, typeCode, CultureInfo.InvariantCulture); } return(true); } catch (Exception e) { #if !SILVERLIGHT Trace.WriteLine("Error converting to integer: text=[" + details.Body + "], type=" + typeCode + ", error: " + e.Message); #endif return(false); } }//method
private bool ReadStartSymbol(ISourceStream source, ScanDetails details) { if (_startEndFirsts.IndexOf(source.CurrentChar) < 0) { return(false); } bool ignoreCase = IsSet(TermOptions.SpecialIgnoreCase); foreach (string startEnd in _startEndSymbols) { if (!source.MatchSymbol(startEnd, ignoreCase)) { continue; } //We found start symbol details.ControlSymbol = startEnd; details.Flags |= StartEndSymbolTable[startEnd]; source.Position += startEnd.Length; return(true); } //foreach return(false); } //method
public override Token TryMatch(CompilerContext context, ISourceStream source) { Token token = null; if (IsSet(TermOptions.EnableQuickParse)) { token = QuickParse(context, source); if (token != null) { return(token); } } source.Position = source.TokenStart.Position; ScanDetails details = new ScanDetails(); details.Flags = DefaultFlags; details.TypeCodes = _defaultTypes; ReadPrefix(source, details); if (!ReadBody(source, details)) { return(null); } if (details.HasError()) { return(Grammar.CreateSyntaxErrorToken(context, source.TokenStart, details.Error)); } ReadSuffix(source, details); if (!ConvertValue(details)) { return(Grammar.CreateSyntaxErrorToken(context, source.TokenStart, "Failed to convert the value: " + details.Error)); } token = CreateToken(context, source, details); return(token); }
}//method #endregion #region private utilities private bool QuickConvertToInt32(ScanDetails details) { TypeCode type = details.TypeCodes[0]; int radix = GetRadix(details); if (radix == 10 && details.Body.Length > 10) { return(false); //10 digits is maximum for int32; int32.MaxValue = 2 147 483 647 } try { //workaround for .Net FX bug: http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448 if (radix == 10) { details.Value = Convert.ToInt32(details.Body, CultureInfo.InvariantCulture); } else { details.Value = Convert.ToInt32(details.Body, radix); } return(true); } catch { return(false); } }//method
public ScannerConvertingValueEventArgs(ScanDetails details) { this.Details = details; }
//Should support: \Udddddddd, \udddd, \xdddd, \N{name}, \0, \ddd (octal), protected virtual string HandleSpecialEscape(string segment, ScanDetails details) { if (string.IsNullOrEmpty(segment)) { return(string.Empty); } int len, p; string digits; char ch; string result; char first = segment[0]; switch (first) { case 'u': case 'U': if (details.IsSet(ScanFlags.AllowUEscapes)) { len = (first == 'u' ? 4 : 8); if (segment.Length < len + 1) { details.Error = "Invalid unicode escape (" + segment.Substring(len + 1) + "), expected " + len + " hex digits."; return(segment); } digits = segment.Substring(1, len); ch = (char)Convert.ToUInt32(digits, 16); result = ch + segment.Substring(len + 1); return(result); }//if break; case 'x': if (details.IsSet(ScanFlags.AllowXEscapes)) { //x-escape allows variable number of digits, from one to 4; let's count them p = 1; //current position while (p < 5 && p < segment.Length) { if (TextUtils.HexDigits.IndexOf(segment[p]) < 0) { break; } p++; } //p now point to char right after the last digit if (p <= 1) { details.Error = "Invalid \\x escape, at least one digit expected."; return(segment); } digits = segment.Substring(1, p - 1); ch = (char)Convert.ToUInt32(digits, 16); result = ch + segment.Substring(p); return(result); }//if break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (details.IsSet(ScanFlags.AllowOctalEscapes)) { //octal escape allows variable number of digits, from one to 3; let's count them p = 0; //current position while (p < 3 && p < segment.Length) { if (TextUtils.OctalDigits.IndexOf(segment[p]) < 0) { break; } p++; } //p now point to char right after the last digit digits = segment.Substring(0, p); ch = (char)Convert.ToUInt32(digits, 8); result = ch + segment.Substring(p); return(result); } //if break; } //switch details.Error = "Invalid escape sequence: \\" + segment; return(segment); }//method
protected virtual Token CreateToken(CompilerContext context, ISourceStream source, ScanDetails details) { string lexeme = source.GetLexeme(); Token token = Token.Create(this, context, source.TokenStart, lexeme, details.Value); token.Details = details; return(token); }
} //method protected virtual bool ReadBody(ISourceStream source, ScanDetails details) { return(false); }
//Override to assign IsKeyword flag to keyword tokens protected override Token CreateToken(CompilerContext context, ISourceStream source, ScanDetails details) { if (details.IsSet(ScanFlags.IncludePrefix) && !string.IsNullOrEmpty(details.Prefix)) { details.Value = details.Prefix + details.Body; } Token token = base.CreateToken(context, source, details); if (details.IsSet(ScanFlags.IsNotKeyword)) { return(token); } //check if it is keyword string text = token.Text; if (!Grammar.CaseSensitive) { text = text.ToLower(); } if (_keywordHash.ContainsKey(text)) { token.IsKeyword = true; } return(token); }
} //method //Extract the string content from lexeme, adjusts the escaped and double-end symbols protected override bool ConvertValue(ScanDetails details) { string value = details.Body; bool escapeEnabled = !details.IsSet(ScanFlags.DisableEscapes); //Fix all escapes if (escapeEnabled && value.IndexOf(EscapeChar) >= 0) { details.Flags |= ScanFlags.HasEscapes; string[] arr = value.Split(EscapeChar); bool ignoreNext = false; //we skip the 0 element as it is not preceeded by "\" for (int i = 1; i < arr.Length; i++) { if (ignoreNext) { ignoreNext = false; continue; } string s = arr[i]; if (string.IsNullOrEmpty(s)) { //it is "\\" - escaped escape symbol. arr[i] = @"\"; ignoreNext = true; continue; } //The char is being escaped is the first one; replace it with char in Escapes table char first = s[0]; char newFirst; if (Escapes.TryGetValue(first, out newFirst)) { arr[i] = newFirst + s.Substring(1); } else { arr[i] = HandleSpecialEscape(arr[i], details); } //else } //for i value = string.Join(string.Empty, arr); } // if EscapeEnabled //Check for doubled end symbol string startS = details.ControlSymbol; if (details.IsSet(ScanFlags.AllowDoubledQuote) && value.IndexOf(startS) >= 0) { value = value.Replace(startS + startS, startS); } if (details.IsSet(ScanFlags.IsChar)) { details.TypeCodes = new TypeCode[] { TypeCode.Char } } ; //Check char length - must be exactly 1 if (details.TypeCodes[0] == TypeCode.Char && value.Length != 1) { details.Error = "Invalid length of char literal - should be 1."; return(false); } details.Value = (details.TypeCodes[0] == TypeCode.Char ? (object)value[0] : value); return(true); //TODO: Investigate unescaped linebreak, with Flags == BnfFlags.StringAllowLineBreak | BnfFlags.StringLineBreakEscaped // also investigate what happens in this case in Windows where default linebreak is "\r\n", not "\n" }
private bool ConvertToFloat(TypeCode typeCode, ScanDetails details) { //only decimal numbers can be fractions if (details.IsSet(ScanFlags.Binary | ScanFlags.Octal | ScanFlags.Hex)) { details.Error = "Invalid number."; return(false); } string body = details.Body; //Some languages allow exp symbols other than E. Check if it is the case, and change it to E // - otherwise .NET conversion methods may fail if (details.IsSet(ScanFlags.HasExp) && details.ControlSymbol.ToUpper() != "E") { body = body.Replace(details.ControlSymbol, "E"); } //'.' decimal seperator required by invariant culture if (details.IsSet(ScanFlags.HasDot) && DecimalSeparator != '.') { body = body.Replace(DecimalSeparator, '.'); } switch (typeCode) { case TypeCode.Double: case TypeCodeImaginary: double dValue; #if PocketPC || SILVERLIGHT try { dValue = Convert.ToDouble(body, CultureInfo.InvariantCulture); } catch { return(false); } #else if (!Double.TryParse(body, NumberStyles.Float, CultureInfo.InvariantCulture, out dValue)) { return(false); } #endif if (typeCode == TypeCodeImaginary) { details.Value = new Complex64(0, dValue); } else { details.Value = dValue; } return(true); case TypeCode.Single: float fValue; #if PocketPC || SILVERLIGHT try { fValue = Convert.ToSingle(body, CultureInfo.InvariantCulture); } catch { return(false); } #else if (!Single.TryParse(body, NumberStyles.Float, CultureInfo.InvariantCulture, out fValue)) { return(false); } #endif details.Value = fValue; return(true); case TypeCode.Decimal: decimal decValue; #if PocketPC || SILVERLIGHT try { decValue = Convert.ToDecimal(body, CultureInfo.InvariantCulture); } catch { return(false); } #else if (!Decimal.TryParse(body, NumberStyles.Float, CultureInfo.InvariantCulture, out decValue)) { return(false); } #endif details.Value = decValue; return(true); }//switch return(false); }
protected override bool ConvertValue(ScanDetails details) { if (String.IsNullOrEmpty(details.Body)) { details.Error = "Invalid number."; return(false); } //base method fires event and lets custom code convert the value; if it returns true, the value was converted. if (base.ConvertValue(details)) { return(true); } //Try quick paths switch (details.TypeCodes[0]) { case TypeCode.Int32: if (QuickConvertToInt32(details)) { return(true); } break; case TypeCode.Double: if (QuickConvertToDouble(details)) { return(true); } break; } //Go full cycle details.Value = null; foreach (TypeCode typeCode in details.TypeCodes) { switch (typeCode) { case TypeCode.Single: case TypeCode.Double: case TypeCode.Decimal: case TypeCodeImaginary: return(ConvertToFloat(typeCode, details)); case TypeCode.SByte: case TypeCode.Byte: case TypeCode.Int16: case TypeCode.UInt16: case TypeCode.Int32: case TypeCode.UInt32: case TypeCode.Int64: case TypeCode.UInt64: if (details.Value == null) //if it is not done yet { TryConvertToUlong(details); //try to convert to ULong and place the result into details.Value field; } if (TryCastToIntegerType(typeCode, details)) //now try to cast the ULong value to the target type { return(true); } break; case TypeCodeBigInt: if (ConvertToBigInteger(details)) { return(true); } break; }//switch } return(false); }//method