public override bool MoveNext( ) { bool moved; while (moved = base.MoveNext()) { TokenTypeEnum tokenType = TokenType; if (tokenType == TokenTypeEnum.Comment) { continue; // Comments are ignored. } if (tokenType == TokenTypeEnum.Literal) { string literalToken = (string)Token; if (literalToken.StartsWith(Keyword.DatePrefix)) // Date. { /* * NOTE: Dates are a weak extension to the PostScript language. */ try { Token = PdfDate.ToDate(literalToken); } catch (ParseException) { /* NOOP: gently degrade to a common literal. */ } } } break; } return(moved); }
/** * <summary>Parse the next token [PDF:1.6:3.1].</summary> * <remarks> * Contract: * <list type="bullet"> * <li>Preconditions: * <list type="number"> * <item>To properly parse the current token, the pointer MUST be just before its starting (leading whitespaces are ignored).</item> * </list> * </item> * <item>Postconditions: * <list type="number"> * <item id="moveNext_contract_post[0]">When this method terminates, the pointer IS at the last byte of the current token.</item> * </list> * </item> * <item>Invariants: * <list type="number"> * <item>The byte-level position of the pointer IS anytime (during token parsing) at the end of the current token (whereas the 'current token' represents the token-level position of the pointer).</item> * </list> * </item> * <item>Side-effects: * <list type="number"> * <item>See <see href="#moveNext_contract_post[0]">Postconditions</see>.</item> * </list> * </item> * </list> * </remarks> * <returns>Whether a new token was found.</returns> */ public bool MoveNext( ) { if (stream == null) { return(false); } /* * NOTE: It'd be interesting to evaluate an alternative regular-expression-based * implementation... */ int c = 0; // Skip white-space characters [PDF:1.6:3.1.1]. while (true) { c = stream.ReadByte(); if (c == -1) { /* NOTE: Current stream has finished. */ // Move to the next stream! MoveNextStream(); // No more streams? if (stream == null) { return(false); } } else if (!IsWhitespace(c)) // Keep goin' till there's a white-space character... { break; } } StringBuilder buffer = null; token = null; // Which character is it? switch (c) { case '/': // Name. tokenType = TokenTypeEnum.Name; buffer = new StringBuilder(); while ((c = stream.ReadByte()) != -1) { if (IsDelimiter(c) || IsWhitespace(c)) { break; } // Is it an hexadecimal code [PDF:1.6:3.2.4]? if (c == '#') { try { c = (GetHex(stream.ReadByte()) << 4) + GetHex(stream.ReadByte()); } catch { throw new FileFormatException("Unexpected EOF (malformed hexadecimal code in name object).", stream.Position); } } buffer.Append((char)c); } stream.Skip(-1); // Recover the first byte after the current token. break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': case '-': case '+': // Number [PDF:1.6:3.2.2] | Indirect reference. switch (c) { case '.': // Decimal point. tokenType = TokenTypeEnum.Real; break; default: // Digit or signum. tokenType = TokenTypeEnum.Integer; // By default (it may be real). break; } // Building the number... buffer = new StringBuilder(); do { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { break; } if (c == '.') { tokenType = TokenTypeEnum.Real; } else if (c < '0' || c > '9') { break; } } while(true); stream.Skip(-1); // Recover the first byte after the current token. break; case '[': // Array (begin). tokenType = TokenTypeEnum.ArrayBegin; break; case ']': // Array (end). tokenType = TokenTypeEnum.ArrayEnd; break; case '<': // Dictionary (begin) | Hexadecimal string. c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).", stream.Position); } // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])? if (c == '<') { tokenType = TokenTypeEnum.DictionaryBegin; break; } // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]). tokenType = TokenTypeEnum.Hex; // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it. buffer = new StringBuilder(); while (c != '>') // NOT string end. { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (malformed hex string).", stream.Position); } } break; case '>': // Dictionary (end). c = stream.ReadByte(); if (c != '>') { throw new FileFormatException("Malformed dictionary.", stream.Position); } tokenType = TokenTypeEnum.DictionaryEnd; break; case '(': // Literal string. tokenType = TokenTypeEnum.Literal; buffer = new StringBuilder(); int level = 0; while (true) { c = stream.ReadByte(); if (c == -1) { break; } if (c == '(') { level++; } else if (c == ')') { level--; } else if (c == '\\') { bool lineBreak = false; c = stream.ReadByte(); switch (c) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case '(': case ')': case '\\': break; case '\r': lineBreak = true; c = stream.ReadByte(); if (c != '\n') { stream.Skip(-1); } break; case '\n': lineBreak = true; break; default: { // Is it outside the octal encoding? if (c < '0' || c > '7') { break; } // Octal [PDF:1.6:3.2.3]. int octal = c - '0'; c = stream.ReadByte(); // Octal end? if (c < '0' || c > '7') { c = octal; stream.Skip(-1); break; } octal = (octal << 3) + c - '0'; c = stream.ReadByte(); // Octal end? if (c < '0' || c > '7') { c = octal; stream.Skip(-1); break; } octal = (octal << 3) + c - '0'; c = octal & 0xff; break; } } if (lineBreak) { continue; } if (c == -1) { break; } } else if (c == '\r') { c = stream.ReadByte(); if (c == -1) { break; } if (c != '\n') { c = '\n'; stream.Skip(-1); } } if (level == -1) { break; } buffer.Append((char)c); } if (c == -1) { throw new FileFormatException("Malformed literal string.", stream.Position); } break; case '%': // Comment. tokenType = TokenTypeEnum.Comment; buffer = new StringBuilder(); while (true) { c = stream.ReadByte(); if (c == -1 || IsEOL(c)) { break; } buffer.Append((char)c); } break; default: // Keyword. tokenType = TokenTypeEnum.Keyword; buffer = new StringBuilder(); do { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { break; } } while(!IsDelimiter(c) && !IsWhitespace(c)); stream.Skip(-1); // Recover the first byte after the current token. break; } if (buffer != null) { /* * Here we prepare the current token state. */ // Wich token type? switch (tokenType) { case TokenTypeEnum.Keyword: token = buffer.ToString(); // Late recognition. switch ((string)token) { case Keyword.False: case Keyword.True: // Boolean. tokenType = TokenTypeEnum.Boolean; token = bool.Parse((string)token); break; case Keyword.Null: // Null. tokenType = TokenTypeEnum.Null; token = null; break; } break; case TokenTypeEnum.Comment: case TokenTypeEnum.Hex: case TokenTypeEnum.Name: token = buffer.ToString(); break; case TokenTypeEnum.Literal: token = buffer.ToString(); // Late recognition. if (((string)token).StartsWith("D:")) // Date. { tokenType = TokenTypeEnum.Date; token = PdfDate.ToDate((string)token); } break; case TokenTypeEnum.Integer: token = Int32.Parse( buffer.ToString(), NumberStyles.Integer, StandardNumberFormatInfo ); break; case TokenTypeEnum.Real: // [FIX:1668410] Parsing of float numbers was buggy (localized default number format). token = Single.Parse( buffer.ToString(), NumberStyles.Float, StandardNumberFormatInfo ); break; } } return(true); }
/** * <summary>Parses the next token [PDF:1.6:3.1].</summary> * <remarks>To properly parse the current token, the pointer MUST be just before its starting * (leading whitespaces are ignored). When this method terminates, the pointer IS * at the last byte of the current token.</remarks> * <returns>Whether a new token was found.</returns> */ public bool MoveNext( ) { /* * NOTE: It'd be interesting to evaluate an alternative regular-expression-based * implementation... */ StringBuilder buffer = null; token = null; int c = 0; // Skip white-space characters [PDF:1.6:3.1.1]. do { c = stream.ReadByte(); if (c == -1) { return(false); } } while(IsWhitespace(c)); // Keep goin' till there's a white-space character... // Which character is it? switch (c) { case Symbol.Slash: // Name. tokenType = TokenTypeEnum.Name; buffer = new StringBuilder(); while (true) { c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (malformed name object).", stream.Position); } if (IsDelimiter(c) || IsWhitespace(c)) { break; } buffer.Append((char)c); } stream.Skip(-1); // Recover the first byte after the current token. break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': case '-': case '+': // Number [PDF:1.6:3.2.2] | Indirect reference. switch (c) { case '.': // Decimal point. tokenType = TokenTypeEnum.Real; break; case '-': case '+': // Signum. tokenType = TokenTypeEnum.Integer; // By default (it may be real). break; default: // Digit. if (multipleTokenParsing) // Plain number (multiple token parsing -- see indirect reference search). { tokenType = TokenTypeEnum.Integer; // By default (it may be real). } else // Maybe an indirect reference (postfix notation [PDF:1.6:3.2.9]). { /* * NOTE: We need to identify this pattern: * ref := { int int 'R' } */ // Enable multiple token parsing! // NOTE: This state MUST be disabled before returning. multipleTokenParsing = true; // 1. Object number. // Try the possible object number! stream.Skip(-1); MoveNext(); // Isn't it a valid object number? if (tokenType != TokenTypeEnum.Integer) { // Disable multiple token parsing! multipleTokenParsing = false; return(true); } // Assign object number! int objectNumber = (int)token; // Backup the recovery position! long oldOffset = stream.Position; // 2. Generation number. // Try the possible generation number! MoveNext(); // Isn't it a valid generation number? if (tokenType != TokenTypeEnum.Integer) { // Rollback! stream.Seek(oldOffset); token = objectNumber; tokenType = TokenTypeEnum.Integer; // Disable multiple token parsing! multipleTokenParsing = false; return(true); } // Assign generation number! int generationNumber = (int)token; // 3. Reference keyword. // Try the possible reference keyword! MoveNext(); // Isn't it a valid reference keyword? if (tokenType != TokenTypeEnum.Reference) { // Rollback! stream.Seek(oldOffset); token = objectNumber; tokenType = TokenTypeEnum.Integer; // Disable multiple token parsing! multipleTokenParsing = false; return(true); } token = new Reference(objectNumber, generationNumber); // Disable multiple token parsing! multipleTokenParsing = false; return(true); } break; } // Building the number... buffer = new StringBuilder(); do { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (malformed number object).", stream.Position); } if (c == '.') { tokenType = TokenTypeEnum.Real; } else if (c < '0' || c > '9') { break; } } while(true); stream.Skip(-1); // Recover the first byte after the current token. break; case Symbol.OpenSquareBracket: // Array (begin). tokenType = TokenTypeEnum.ArrayBegin; break; case Symbol.CloseSquareBracket: // Array (end). tokenType = TokenTypeEnum.ArrayEnd; break; case Symbol.OpenAngleBracket: // Dictionary (begin) | Hexadecimal string. c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).", stream.Position); } // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])? if (c == Symbol.OpenAngleBracket) { tokenType = TokenTypeEnum.DictionaryBegin; break; } // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]). tokenType = TokenTypeEnum.Hex; // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it. buffer = new StringBuilder(); while (c != Symbol.CloseAngleBracket) // NOT string end. { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (malformed hex string).", stream.Position); } } break; case Symbol.CloseAngleBracket: // Dictionary (end). c = stream.ReadByte(); if (c != Symbol.CloseAngleBracket) { throw new FileFormatException("Malformed dictionary.", stream.Position); } tokenType = TokenTypeEnum.DictionaryEnd; break; case Symbol.OpenRoundBracket: // Literal string. tokenType = TokenTypeEnum.Literal; buffer = new StringBuilder(); int level = 0; while (true) { c = stream.ReadByte(); if (c == -1) { break; } if (c == Symbol.OpenRoundBracket) { level++; } else if (c == Symbol.CloseRoundBracket) { level--; } else if (c == '\\') { bool lineBreak = false; c = stream.ReadByte(); switch (c) { case 'n': c = Symbol.LineFeed; break; case 'r': c = Symbol.CarriageReturn; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case Symbol.OpenRoundBracket: case Symbol.CloseRoundBracket: case '\\': break; case Symbol.CarriageReturn: lineBreak = true; c = stream.ReadByte(); if (c != Symbol.LineFeed) { stream.Skip(-1); } break; case Symbol.LineFeed: lineBreak = true; break; default: { // Is it outside the octal encoding? if (c < '0' || c > '7') { break; } // Octal [PDF:1.6:3.2.3]. int octal = c - '0'; c = stream.ReadByte(); // Octal end? if (c < '0' || c > '7') { c = octal; stream.Skip(-1); break; } octal = (octal << 3) + c - '0'; c = stream.ReadByte(); // Octal end? if (c < '0' || c > '7') { c = octal; stream.Skip(-1); break; } octal = (octal << 3) + c - '0'; c = octal & 0xff; break; } } if (lineBreak) { continue; } if (c == -1) { break; } } else if (c == Symbol.CarriageReturn) { c = stream.ReadByte(); if (c == -1) { break; } if (c != Symbol.LineFeed) { c = Symbol.LineFeed; stream.Skip(-1); } } if (level == -1) { break; } buffer.Append((char)c); } if (c == -1) { throw new FileFormatException("Malformed literal string.", stream.Position); } break; case Symbol.CapitalR: // Indirect reference. tokenType = TokenTypeEnum.Reference; break; case Symbol.Percent: // Comment [PDF:1.6:3.1.2]. tokenType = TokenTypeEnum.Comment; buffer = new StringBuilder(); while (true) { c = stream.ReadByte(); if (c == -1 || IsEOL(c)) { break; } buffer.Append((char)c); } break; default: // Keyword object. tokenType = TokenTypeEnum.Keyword; buffer = new StringBuilder(); do { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { break; } } while(!IsDelimiter(c) && !IsWhitespace(c)); stream.Skip(-1); // Recover the first byte after the current token. break; } if (buffer != null) { /* * Current token initialization. */ // Wich token type? switch (tokenType) { case TokenTypeEnum.Keyword: token = buffer.ToString(); // Late recognition. switch ((string)token) { case Keyword.False: case Keyword.True: // Boolean. tokenType = TokenTypeEnum.Boolean; token = bool.Parse((string)token); break; case Keyword.Null: // Null. tokenType = TokenTypeEnum.Null; token = null; break; } break; case TokenTypeEnum.Comment: case TokenTypeEnum.Hex: case TokenTypeEnum.Name: token = buffer.ToString(); break; case TokenTypeEnum.Literal: token = buffer.ToString(); // Late recognition. if (((string)token).StartsWith(Keyword.DatePrefix)) // Date. { tokenType = TokenTypeEnum.Date; token = PdfDate.ToDate((string)token); } break; case TokenTypeEnum.Integer: token = Int32.Parse( buffer.ToString(), NumberStyles.Integer, StandardNumberFormatInfo ); break; case TokenTypeEnum.Real: // [FIX:1668410] Parsing of float numbers was buggy (localized default number format). token = Single.Parse( buffer.ToString(), NumberStyles.Float, StandardNumberFormatInfo ); break; } } return(true); }