/** * <summary>Retrieves the PDF version of the file [PDF:1.6:3.4.1].</summary> */ public string RetrieveVersion( ) { IInputStream stream = Stream; stream.Seek(0); string header = stream.ReadString(10); if (!header.StartsWith(Keyword.BOF)) { throw new PostScriptParseException("PDF header not found.", this); } return(header.Substring(Keyword.BOF.Length, 3)); }
public override bool MoveNext( ) { bool moved = base.MoveNext(); if (moved) { switch (TokenType) { case TokenTypeEnum.Integer: { /* * NOTE: We need to verify whether indirect reference pattern is applicable: * ref := { int int 'R' } */ IInputStream stream = Stream; long baseOffset = stream.Position; // Backs up the recovery position. // 1. Object number. int objectNumber = (int)Token; // 2. Generation number. base.MoveNext(); if (TokenType == TokenTypeEnum.Integer) { int generationNumber = (int)Token; // 3. Reference keyword. base.MoveNext(); if (TokenType == TokenTypeEnum.Keyword && Token.Equals(Keyword.Reference)) { Token = new Reference(objectNumber, generationNumber); } } if (!(Token is Reference)) { // Rollback! stream.Seek(baseOffset); Token = objectNumber; TokenType = TokenTypeEnum.Integer; } } break; } } return(moved); }
/** * <summary>Moves the pointer to the given absolute byte position.</summary> */ public void Seek( long offset ) { stream.Seek(offset); }
public override PdfDataObject ParsePdfObject( ) { switch (TokenType) { case TokenTypeEnum.Keyword: if (Token is Reference) { Reference reference = (Reference)Token; return(new PdfReference(reference.ObjectNumber, reference.GenerationNumber, file)); } break; } PdfDataObject pdfObject = base.ParsePdfObject(); if (pdfObject is PdfDictionary) { IInputStream stream = Stream; int oldOffset = (int)stream.Position; MoveNext(); // Is this dictionary the header of a stream object [PDF:1.6:3.2.7]? if ((TokenType == TokenTypeEnum.Keyword) && Token.Equals(Keyword.BeginStream)) { PdfDictionary streamHeader = (PdfDictionary)pdfObject; // Keep track of current position! /* * NOTE: Indirect reference resolution is an outbound call which affects the stream pointer position, * so we need to recover our current position after it returns. */ long position = stream.Position; // Get the stream length! int length = ((PdfInteger)streamHeader.Resolve(PdfName.Length)).IntValue; // Move to the stream data beginning! stream.Seek(position); SkipEOL(); // Copy the stream data to the instance! byte[] data = new byte[length]; stream.Read(data); MoveNext(); // Postcondition (last token should be 'endstream' keyword). Object streamType = streamHeader[PdfName.Type]; if (PdfName.ObjStm.Equals(streamType)) // Object stream [PDF:1.6:3.4.6]. { return(new ObjectStream( streamHeader, new bytes.Buffer(data) )); } else if (PdfName.XRef.Equals(streamType)) // Cross-reference stream [PDF:1.6:3.4.7]. { return(new XRefStream( streamHeader, new bytes.Buffer(data) )); } else // Generic stream. { return(new PdfStream( streamHeader, new bytes.Buffer(data) )); } } else // Stand-alone dictionary. { stream.Seek(oldOffset); } // Restores postcondition (last token should be the dictionary end). } return(pdfObject); }
/** * <summary>Parses the next token [PDF:1.6:3.1].</summary> * <remarks>To properly parse the current token, the pointer MUST be just before its starting * (leading whitespaces are ignored). When this method terminates, the pointer IS * at the last byte of the current token.</remarks> * <returns>Whether a new token was found.</returns> */ public bool MoveNext( ) { /* * NOTE: It'd be interesting to evaluate an alternative regular-expression-based * implementation... */ StringBuilder buffer = null; token = null; int c = 0; // Skip white-space characters [PDF:1.6:3.1.1]. do { c = stream.ReadByte(); if (c == -1) { return(false); } } while(IsWhitespace(c)); // Keep goin' till there's a white-space character... // Which character is it? switch (c) { case Symbol.Slash: // Name. tokenType = TokenTypeEnum.Name; buffer = new StringBuilder(); while (true) { c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (malformed name object).", stream.Position); } if (IsDelimiter(c) || IsWhitespace(c)) { break; } buffer.Append((char)c); } stream.Skip(-1); // Recover the first byte after the current token. break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': case '-': case '+': // Number [PDF:1.6:3.2.2] | Indirect reference. switch (c) { case '.': // Decimal point. tokenType = TokenTypeEnum.Real; break; case '-': case '+': // Signum. tokenType = TokenTypeEnum.Integer; // By default (it may be real). break; default: // Digit. if (multipleTokenParsing) // Plain number (multiple token parsing -- see indirect reference search). { tokenType = TokenTypeEnum.Integer; // By default (it may be real). } else // Maybe an indirect reference (postfix notation [PDF:1.6:3.2.9]). { /* * NOTE: We need to identify this pattern: * ref := { int int 'R' } */ // Enable multiple token parsing! // NOTE: This state MUST be disabled before returning. multipleTokenParsing = true; // 1. Object number. // Try the possible object number! stream.Skip(-1); MoveNext(); // Isn't it a valid object number? if (tokenType != TokenTypeEnum.Integer) { // Disable multiple token parsing! multipleTokenParsing = false; return(true); } // Assign object number! int objectNumber = (int)token; // Backup the recovery position! long oldOffset = stream.Position; // 2. Generation number. // Try the possible generation number! MoveNext(); // Isn't it a valid generation number? if (tokenType != TokenTypeEnum.Integer) { // Rollback! stream.Seek(oldOffset); token = objectNumber; tokenType = TokenTypeEnum.Integer; // Disable multiple token parsing! multipleTokenParsing = false; return(true); } // Assign generation number! int generationNumber = (int)token; // 3. Reference keyword. // Try the possible reference keyword! MoveNext(); // Isn't it a valid reference keyword? if (tokenType != TokenTypeEnum.Reference) { // Rollback! stream.Seek(oldOffset); token = objectNumber; tokenType = TokenTypeEnum.Integer; // Disable multiple token parsing! multipleTokenParsing = false; return(true); } token = new Reference(objectNumber, generationNumber); // Disable multiple token parsing! multipleTokenParsing = false; return(true); } break; } // Building the number... buffer = new StringBuilder(); do { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (malformed number object).", stream.Position); } if (c == '.') { tokenType = TokenTypeEnum.Real; } else if (c < '0' || c > '9') { break; } } while(true); stream.Skip(-1); // Recover the first byte after the current token. break; case Symbol.OpenSquareBracket: // Array (begin). tokenType = TokenTypeEnum.ArrayBegin; break; case Symbol.CloseSquareBracket: // Array (end). tokenType = TokenTypeEnum.ArrayEnd; break; case Symbol.OpenAngleBracket: // Dictionary (begin) | Hexadecimal string. c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).", stream.Position); } // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])? if (c == Symbol.OpenAngleBracket) { tokenType = TokenTypeEnum.DictionaryBegin; break; } // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]). tokenType = TokenTypeEnum.Hex; // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it. buffer = new StringBuilder(); while (c != Symbol.CloseAngleBracket) // NOT string end. { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { throw new FileFormatException("Unexpected EOF (malformed hex string).", stream.Position); } } break; case Symbol.CloseAngleBracket: // Dictionary (end). c = stream.ReadByte(); if (c != Symbol.CloseAngleBracket) { throw new FileFormatException("Malformed dictionary.", stream.Position); } tokenType = TokenTypeEnum.DictionaryEnd; break; case Symbol.OpenRoundBracket: // Literal string. tokenType = TokenTypeEnum.Literal; buffer = new StringBuilder(); int level = 0; while (true) { c = stream.ReadByte(); if (c == -1) { break; } if (c == Symbol.OpenRoundBracket) { level++; } else if (c == Symbol.CloseRoundBracket) { level--; } else if (c == '\\') { bool lineBreak = false; c = stream.ReadByte(); switch (c) { case 'n': c = Symbol.LineFeed; break; case 'r': c = Symbol.CarriageReturn; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case Symbol.OpenRoundBracket: case Symbol.CloseRoundBracket: case '\\': break; case Symbol.CarriageReturn: lineBreak = true; c = stream.ReadByte(); if (c != Symbol.LineFeed) { stream.Skip(-1); } break; case Symbol.LineFeed: lineBreak = true; break; default: { // Is it outside the octal encoding? if (c < '0' || c > '7') { break; } // Octal [PDF:1.6:3.2.3]. int octal = c - '0'; c = stream.ReadByte(); // Octal end? if (c < '0' || c > '7') { c = octal; stream.Skip(-1); break; } octal = (octal << 3) + c - '0'; c = stream.ReadByte(); // Octal end? if (c < '0' || c > '7') { c = octal; stream.Skip(-1); break; } octal = (octal << 3) + c - '0'; c = octal & 0xff; break; } } if (lineBreak) { continue; } if (c == -1) { break; } } else if (c == Symbol.CarriageReturn) { c = stream.ReadByte(); if (c == -1) { break; } if (c != Symbol.LineFeed) { c = Symbol.LineFeed; stream.Skip(-1); } } if (level == -1) { break; } buffer.Append((char)c); } if (c == -1) { throw new FileFormatException("Malformed literal string.", stream.Position); } break; case Symbol.CapitalR: // Indirect reference. tokenType = TokenTypeEnum.Reference; break; case Symbol.Percent: // Comment [PDF:1.6:3.1.2]. tokenType = TokenTypeEnum.Comment; buffer = new StringBuilder(); while (true) { c = stream.ReadByte(); if (c == -1 || IsEOL(c)) { break; } buffer.Append((char)c); } break; default: // Keyword object. tokenType = TokenTypeEnum.Keyword; buffer = new StringBuilder(); do { buffer.Append((char)c); c = stream.ReadByte(); if (c == -1) { break; } } while(!IsDelimiter(c) && !IsWhitespace(c)); stream.Skip(-1); // Recover the first byte after the current token. break; } if (buffer != null) { /* * Current token initialization. */ // Wich token type? switch (tokenType) { case TokenTypeEnum.Keyword: token = buffer.ToString(); // Late recognition. switch ((string)token) { case Keyword.False: case Keyword.True: // Boolean. tokenType = TokenTypeEnum.Boolean; token = bool.Parse((string)token); break; case Keyword.Null: // Null. tokenType = TokenTypeEnum.Null; token = null; break; } break; case TokenTypeEnum.Comment: case TokenTypeEnum.Hex: case TokenTypeEnum.Name: token = buffer.ToString(); break; case TokenTypeEnum.Literal: token = buffer.ToString(); // Late recognition. if (((string)token).StartsWith(Keyword.DatePrefix)) // Date. { tokenType = TokenTypeEnum.Date; token = PdfDate.ToDate((string)token); } break; case TokenTypeEnum.Integer: token = Int32.Parse( buffer.ToString(), NumberStyles.Integer, StandardNumberFormatInfo ); break; case TokenTypeEnum.Real: // [FIX:1668410] Parsing of float numbers was buggy (localized default number format). token = Single.Parse( buffer.ToString(), NumberStyles.Float, StandardNumberFormatInfo ); break; } } return(true); }
public static Index Parse(IInputStream stream, int offset) { stream.Seek(offset); return(Parse(stream)); }