private static void AssertNextToken(CssScanner lex, CssTokenType type, Func<CssToken, bool> condition) { var token = new CssToken(); Assert.IsTrue(lex.Next(token, true), "Unexpected EOF"); Assert.AreEqual(type, token.mType); Assert.IsTrue(condition(token), "Condition for token {0} failed".Fmt(token.mType)); }
public IEnumerable<string> GetUris(string aInput) { var lexer = new CssScanner(aInput, 1); lexer.SetErrorReporter(new ErrorReporter(lexer, null, this, null)); var token = new CssToken(); while (lexer.Next(token, true)) if (token.mType == CssTokenType.URL) yield return token.mIdentStr; }
/** * Scan a unicode-range token. These match the regular expression * * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? * * However, some such tokens are "invalid". There are three valid forms: * * u+[0-9a-f]{x} 1 <= x <= 6 * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6 * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6 * * All unicode-range tokens have their text recorded in mIdent; valid ones * are also decoded into mInteger and mInteger2, and mIntegerValid is set. * Note that this does not validate the numeric range, only the syntactic * form. */ internal bool ScanURange(nsCSSToken aResult) { int32_t intro1 = Peek(); int32_t intro2 = Peek(1); int32_t ch = Peek(2); Debug.Assert((intro1 == 'u' || intro1 == 'U') && intro2 == '+' && (IsHexDigit(ch) || ch == '?'), "should not have been called"); aResult.mIdent.Append(intro1); aResult.mIdent.Append(intro2); Advance(2); bool valid = true; bool haveQues = false; uint32_t low = 0; uint32_t high = 0; int i = 0; do { aResult.mIdent.Append(ch); if (IsHexDigit(ch)) { if (haveQues) { valid = false; // All question marks should be at the end. } low = low*16 + HexDigitValue(ch); high = high*16 + HexDigitValue(ch); } else { haveQues = true; low = low*16 + 0x0; high = high*16 + 0xF; } i++; Advance(); ch = Peek(); } while (i < 6 && (IsHexDigit(ch) || ch == '?')); if (ch == '-' && IsHexDigit(Peek(1))) { if (haveQues) { valid = false; } aResult.mIdent.Append(ch); Advance(); ch = Peek(); high = 0; i = 0; do { aResult.mIdent.Append(ch); high = high*16 + HexDigitValue(ch); i++; Advance(); ch = Peek(); } while (i < 6 && IsHexDigit(ch)); } aResult.mInteger = low; aResult.mInteger2 = high; aResult.mIntegerValid = valid; aResult.mType = nsCSSTokenType.URange; return true; }
/** * Scan a string constant ('foo' or "foo"). Will always produce * either a String or a Bad_String token; the latter occurs when the * close quote is missing. Always returns true (for convenience in Next()). */ internal bool ScanString(nsCSSToken aToken) { int32_t aStop = Peek(); Debug.Assert(aStop == '"' || aStop == '\'', "should not have been called"); aToken.mType = nsCSSTokenType.String; aToken.mSymbol = ((PRUnichar)(aStop)); // Remember how it's quoted. Advance(); for (;;) { GatherText(IS_STRING, aToken.mIdent); int32_t ch = Peek(); if (ch == -1) { break; // EOF ends a string token with no error. } if (ch == aStop) { Advance(); break; } // Both " and ' are excluded from IS_STRING. if (ch == '"' || ch == '\'') { aToken.mIdent.Append(ch); Advance(); continue; } aToken.mType = nsCSSTokenType.Bad_String; mReporter.ReportUnexpected("SEUnterminatedString", aToken); break; } return true; }
/** * Scan a Number, Percentage, or Dimension token (all of which begin * like a Number). Can produce a Symbol when a '.' is not followed by * digits, or when '+' or '-' are not followed by either a digit or a * '.' and then a digit. Can also produce a HTMLComment when it * encounters '-.'. */ internal bool ScanNumber(nsCSSToken aToken) { int32_t c = Peek(); #if DEBUG { int32_t c2 = Peek(1); int32_t c3 = Peek(2); Debug.Assert(IsDigit(c) || (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) || (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'), "should not have been called"); } #endif // Sign of the mantissa (-1 or 1). int32_t sign = c == '-' ? -1 : 1; // Absolute value of the integer part of the mantissa. This is a double so // we don't run into overflow issues for consumers that only care about our // floating-point value while still being able to express the full int32_t // range for consumers who want integers. double intPart = 0; // Fractional part of the mantissa. This is a double so that when we convert // to float at the end we'll end up rounding to nearest float instead of // truncating down (as we would if fracPart were a float and we just // effectively lost the last several digits). double fracPart = 0; // Absolute value of the Math.Power of 10 that we should multiply by (only // relevant for numbers in scientific notation). Has to be a signed integer, // because multiplication of signed by unsigned converts the unsigned to // signed, so if we plan to actually multiply by expSign... int32_t exponent = 0; // Sign of the exponent. int32_t expSign = 1; aToken.mHasSign = (c == '+' || c == '-'); if (aToken.mHasSign) { Advance(); c = Peek(); } bool gotDot = (c == '.'); if (!gotDot) { // Scan the integer part of the mantissa. Debug.Assert(IsDigit(c), "should have been excluded by logic above"); do { intPart = 10*intPart + DecimalDigitValue(c); Advance(); c = Peek(); } while (IsDigit(c)); gotDot = (c == '.') && IsDigit(Peek(1)); } if (gotDot) { // Scan the fractional part of the mantissa. Advance(); c = Peek(); Debug.Assert(IsDigit(c), "should have been excluded by logic above"); // Power of ten by which we need to divide our next digit double divisor = 10; do { fracPart += DecimalDigitValue(c) / divisor; divisor *= 10; Advance(); c = Peek(); } while (IsDigit(c)); } bool gotE = false; if (IsSVGMode() && (c == 'e' || c == 'E')) { int32_t expSignChar = Peek(1); int32_t nextChar = Peek(2); if (IsDigit(expSignChar) || ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) { gotE = true; if (expSignChar == '-') { expSign = -1; } Advance(); // consumes the E if (expSignChar == '-' || expSignChar == '+') { Advance(); c = nextChar; } else { c = expSignChar; } Debug.Assert(IsDigit(c), "should have been excluded by logic above"); do { exponent = 10*exponent + DecimalDigitValue(c); Advance(); c = Peek(); } while (IsDigit(c)); } } nsCSSTokenType type = nsCSSTokenType.Number; // Set mIntegerValid for all cases (except %, below) because we need // it for the "2n" in :nth-child(2n). aToken.mIntegerValid = false; // Time to reassemble our number. // Do all the math in double precision so it's truncated only once. double value = sign * (intPart + fracPart); if (gotE) { // Explicitly cast expSign*exponent to double to avoid issues with // overloaded Math.Pow() on Windows. value *= Math.Pow(10.0, ((double)(expSign * exponent))); } else if (!gotDot) { // Clamp values outside of integer range. if (sign > 0) { aToken.mInteger = ((int32_t)(Math.Min(intPart, ((double)(Int32.MaxValue))))); } else { aToken.mInteger = ((int32_t)(Math.Max(-intPart, ((double)(Int32.MinValue))))); } aToken.mIntegerValid = true; } StringBuilder ident = aToken.mIdent; // Check for Dimension and Percentage tokens. if (c >= 0) { if (StartsIdent(c, Peek(1))) { if (GatherText(IS_IDCHAR, ident)) { type = nsCSSTokenType.Dimension; } } else if (c == '%') { Advance(); type = nsCSSTokenType.Percentage; value = value / 100.0f; aToken.mIntegerValid = false; } } aToken.mNumber = (float)value; aToken.mType = type; return true; }
/** * Scan an Ident token. This also handles Function and URL tokens, * both of which begin indistinguishably from an identifier. It can * produce a Symbol token when an apparent identifier actually led * into an invalid escape sequence. */ internal bool ScanIdent(nsCSSToken aToken) { if (!GatherText(IS_IDCHAR, aToken.mIdent)) { aToken.mSymbol = (char)Peek(); Advance(); return true; } if (Peek() != '(') { aToken.mType = nsCSSTokenType.Ident; return true; } Advance(); aToken.mType = nsCSSTokenType.Function; if (aToken.mIdent.LowerCaseEqualsLiteral("url")) { NextURL(aToken); } return true; }
/** * Scan a Hash token. Handles the distinction between nsCSSTokenType.ID * and nsCSSTokenType.Hash, and handles production of Symbol when a '#' * is not followed by identifier characters. */ internal bool ScanHash(nsCSSToken aToken) { Debug.Assert(Peek() == '#', "should not have been called"); // Fall back for when '#' isn't followed by identifier characters. aToken.mSymbol = '#'; Advance(); int32_t ch = Peek(); if (IsIdentChar(ch) || ch == '\\') { nsCSSTokenType type = StartsIdent(ch, Peek(1)) ? nsCSSTokenType.ID : nsCSSTokenType.Hash; aToken.mIdent.SetLength(0); if (GatherText(IS_IDCHAR, aToken.mIdent)) { aToken.mType = type; } } return true; }
/** * Scan an AtKeyword token. Also handles production of Symbol when * an '@' is not followed by an identifier. */ internal bool ScanAtKeyword(nsCSSToken aToken) { Debug.Assert(Peek() == '@', "should not have been called"); // Fall back for when '@' isn't followed by an identifier. aToken.mSymbol = '@'; Advance(); int32_t ch = Peek(); if (StartsIdent(ch, Peek(1))) { if (GatherText(IS_IDCHAR, aToken.mIdent)) { aToken.mType = nsCSSTokenType.AtKeyword; } } return true; }
/** * Consume the part of an URL token after the initial 'url('. Caller * is assumed to have consumed 'url(' already. Will always produce * either an URL or a Bad_URL token. * * Exposed for use by nsCSSParser.ParseMozDocumentRule, which applies * the special lexical rules for URL tokens in a nonstandard context. */ internal bool NextURL(nsCSSToken aToken) { SkipWhitespace(); int32_t ch = Peek(); if (ch < 0) { return false; } // aToken.mIdent may be "url" at this point; clear that out aToken.mIdent.Truncate(); // Do we have a string? if (ch == '"' || ch == '\'') { ScanString(aToken); if (aToken.mType == nsCSSTokenType.Bad_String) { aToken.mType = nsCSSTokenType.Bad_URL; return true; } Debug.Assert(aToken.mType == nsCSSTokenType.String, "unexpected token type"); } else { // Otherwise, this is the start of a non-quoted url (which may be empty). aToken.mSymbol = ((PRUnichar)(0)); GatherText(IS_URL_CHAR, aToken.mIdent); } // Consume trailing whitespace and then look for a close parenthesis. SkipWhitespace(); ch = Peek(); if (ch < 0 || ch == ')') { Advance(); aToken.mType = nsCSSTokenType.URL; } else { aToken.mType = nsCSSTokenType.Bad_URL; } return true; }
/** * Primary scanner entry point. Consume one token and fill in * |aToken| accordingly. Will skip over any number of comments first, * and will also skip over rather than return whitespace tokens if * |aSkipWS| is true. * * Returns true if it successfully consumed a token, false if EOF has * been reached. Will always advance the current read position by at * least one character unless called when already at EOF. */ internal bool Next(nsCSSToken aToken, bool aSkipWS) { int32_t ch; // do this here so we don't have to do it in dozens of other places aToken.mIdent.Truncate(); aToken.mType = nsCSSTokenType.Symbol; for (;;) { // Consume any number of comments, and possibly also whitespace tokens, // in between other tokens. mTokenOffset = mOffset; mTokenLineOffset = mLineOffset; mTokenLineNumber = mLineNumber; ch = Peek(); if (IsWhitespace(ch)) { SkipWhitespace(); if (!aSkipWS) { aToken.mType = nsCSSTokenType.Whitespace; return true; } continue; // start again at the beginning } if (ch == '/' && !IsSVGMode() && Peek(1) == '*') { // FIXME: Editor wants comments to be preserved (bug 60290). SkipComment(); continue; // start again at the beginning } break; } // EOF if (ch < 0) { return false; } // 'u' could be UNICODE-RANGE or an identifier-family token if (ch == 'u' || ch == 'U') { int32_t c2 = Peek(1); int32_t c3 = Peek(2); if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) { return ScanURange(aToken); } return ScanIdent(aToken); } // identifier family if (IsIdentStart(ch)) { return ScanIdent(aToken); } // number family if (IsDigit(ch)) { return ScanNumber(aToken); } if (ch == '.' && IsDigit(Peek(1))) { return ScanNumber(aToken); } if (ch == '+') { int32_t c2 = Peek(1); if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) { return ScanNumber(aToken); } } // '-' can start an identifier-family token, a number-family token, // or an HTML-comment if (ch == '-') { int32_t c2 = Peek(1); int32_t c3 = Peek(2); if (IsIdentStart(c2)) { return ScanIdent(aToken); } if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) { return ScanNumber(aToken); } if (c2 == '-' && c3 == '>') { Advance(3); aToken.mType = nsCSSTokenType.HTMLComment; aToken.mIdent.AssignLiteral("-."); return true; } } // the other HTML-comment token if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') { Advance(4); aToken.mType = nsCSSTokenType.HTMLComment; aToken.mIdent.AssignLiteral("<!--"); return true; } // AT_KEYWORD if (ch == '@') { return ScanAtKeyword(aToken); } // HASH if (ch == '#') { return ScanHash(aToken); } // STRING if (ch == '"' || ch == '\'') { return ScanString(aToken); } // Match operators: ~= |= ^= $= *= nsCSSTokenType opType = MatchOperatorType(ch); if (opType != nsCSSTokenType.Symbol && Peek(1) == '=') { aToken.mType = opType; Advance(2); return true; } // Otherwise, a symbol (DELIM). aToken.mSymbol = (char)ch; Advance(); return true; }
private static void AssertNextTokenEnd(CssScanner lex) { var token = new CssToken(); Assert.IsFalse(lex.Next(token, true), "Expected EOF"); }
internal void ReportUnexpected(string aMessage, CssToken aToken, char aChar) { AddToError(CssResources.FormatString(aMessage, aToken, aChar)); }
internal void ReportUnexpected(string aMessage, CssToken aToken) { var sbToken = new StringBuilder(); aToken.AppendToString(sbToken); AddToError(CssResources.FormatString(aMessage, sbToken.ToString())); }