/// <summary> /// Reads the next token that must be the specified one. /// </summary> Symbol ReadToken(string token) { Symbol current = this.lexer.ScanNextToken(); if (token != this.lexer.Token) throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); return current; }
/// <summary> /// /// </summary> PdfTrailer ReadXRefTableAndTrailer(PdfReferenceTable xrefTable) { Debug.Assert(xrefTable != null); Symbol symbol = ScanNextToken(); // Is it an xref stream? if (symbol == Symbol.Integer) { throw new PdfReaderException(PSSR.CannotHandleXRefStreams); } // TODO: It is very high on the todo list, but still undone Debug.Assert(symbol == Symbol.XRef); while (true) { symbol = ScanNextToken(); if (symbol == Symbol.Integer) { int start = this.lexer.TokenToInteger; int length = ReadInteger(); for (int id = start; id < start + length; id++) { int position = ReadInteger(); int generation = ReadInteger(); ReadSymbol(Symbol.Keyword); string token = lexer.Token; // Skip start entry if (id == 0) { continue; } // Skip unused entries. if (token != "n") { continue; } // Even it is restricted, an object can exists in more than one subsection. // (PDF Reference Implementation Notes 15). PdfObjectID objectID = new PdfObjectID(id, generation); // Ignore the latter one if (xrefTable.Contains(objectID)) { continue; } xrefTable.Add(new PdfReference(objectID, position)); } } else if (symbol == Symbol.Trailer) { ReadSymbol(Symbol.BeginDictionary); PdfTrailer trailer = new PdfTrailer(this.document); this.ReadDictionary(trailer, false); return(trailer); } else { throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); } } }
/// <summary> /// Reads the next symbol that must be the specified one. /// </summary> Symbol ReadSymbol(Symbol symbol) { Symbol current = this.lexer.ScanNextToken(); if (symbol != current) throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); return current; }
/// <summary> /// Reads a name from the PDF data stream. The preceding slash is part of the result string. /// </summary> string ReadName() { string name; Symbol symbol = ScanNextToken(out name); if (symbol != Symbol.Name) throw new PdfReaderException(PSSR.UnexpectedToken(name)); return name; }
/// <summary> /// Reads the next symbol that must be the specified one. /// </summary> CSymbol ReadSymbol(CSymbol symbol) { CSymbol current = _lexer.ScanNextToken(); if (symbol != current) { ContentReaderDiagnostics.ThrowContentReaderException(PSSR.UnexpectedToken(_lexer.Token)); } return(current); }
/// <summary> /// Reads the next token that must be the specified one. /// </summary> Symbol ReadToken(string token) { Symbol current = lexer.ScanNextToken(); if (token != lexer.Token) { throw new PdfReaderException(PSSR.UnexpectedToken(lexer.Token)); } return(current); }
/// <summary> /// Reads the next symbol that must be the specified one. /// </summary> Symbol ReadSymbol(Symbol symbol) { Symbol current = lexer.ScanNextToken(); if (symbol != current) { throw new PdfReaderException(PSSR.UnexpectedToken(lexer.Token)); } return(current); }
/// <summary> /// Reads the next symbol that must be the specified one. /// </summary> CSymbol ReadSymbol(CSymbol symbol) { CSymbol current = this.lexer.ScanNextToken(); if (symbol != current) { throw new ContentReaderException(PSSR.UnexpectedToken(this.lexer.Token)); } return(current); }
/* /// <summary> /// Reads a string immediately or (optionally) indirectly from the PDF data stream. /// </summary> protected string ReadString(bool canBeIndirect) { Symbol symbol = Symbol.None; //this.lexer.ScanNextToken(canBeIndirect); if (symbol == Symbol.String || symbol == Symbol.HexString) return this.lexer.Token; else if (symbol == Symbol.R) { int position = this.lexer.Position; MoveToObject(this.lexer.Token); ReadObjectID(null); string s = ReadString(); ReadSymbol(Symbol.EndObj); this.lexer.Position = position; return s; } throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); } protected string ReadString() { return ReadString(false); } /// <summary> /// Reads a string immediately or (optionally) indirectly from the PDF data stream. /// </summary> protected bool ReadBoolean(bool canBeIndirect) { Symbol symbol = this.lexer.ScanNextToken(canBeIndirect); if (symbol == Symbol.Boolean) return this.lexer.TokenToBoolean; else if (symbol == Symbol.R) { int position = this.lexer.Position; MoveToObject(this.lexer.Token); ReadObjectID(null); bool b = ReadBoolean(); ReadSymbol(Symbol.EndObj); this.lexer.Position = position; return b; } throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); } protected bool ReadBoolean() { return ReadBoolean(false); } */ /// <summary> /// Reads an integer value directly from the PDF data stream. /// </summary> int ReadInteger(bool canBeIndirect) { Symbol symbol = this.lexer.ScanNextToken(); if (symbol == Symbol.Integer) return this.lexer.TokenToInteger; else if (symbol == Symbol.R) { int position = this.lexer.Position; // MoveToObject(this.lexer.Token); ReadObjectID(null); int n = ReadInteger(); ReadSymbol(Symbol.EndObj); this.lexer.Position = position; return n; } throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); }
/// <summary> /// Reads PDF object from input stream. /// </summary> /// <param name="pdfObject">Either the instance of a derived type or null. If it is null /// an appropriate object is created.</param> /// <param name="objectID">The address of the object.</param> /// <param name="includeReferences">If true, specifies that all indirect objects /// are included recursively.</param> public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences) { MoveToObject(objectID); int objectNumber = ReadInteger(); int generationNumber = ReadInteger(); #if DEBUG // The following assertion sometime failed (see below) //Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber)); if (objectID != new PdfObjectID(objectNumber, generationNumber)) { // A special kind of bug? Or is this an undocumented PDF feature? // PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf' // The iref table of this file contains the following entries: // iref // 0 148 // 0000000000 65535 f // 0000000015 00000 n // 0000000346 00000 n // .... // 0000083236 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000080334 00000 n // .... // Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested // ignores this mismatch! The following assertion failed about 50 times with this file. #if true_ string message = String.Format("xref entry {0} {1} maps to object {2} {3}.", objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber); Debug.Assert(false, message); #endif } #endif // Always use object ID from iref table (see above) objectNumber = objectID.ObjectNumber; generationNumber = objectID.GenerationNumber; #if true_ Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber)); #endif ReadSymbol(Symbol.Obj); bool checkForStream = false; Symbol symbol = ScanNextToken(); switch (symbol) { case Symbol.BeginArray: PdfArray array; if (pdfObject == null) { array = new PdfArray(this.document); } else { array = (PdfArray)pdfObject; } //PdfObject.RegisterObject(array, objectID, generation); pdfObject = ReadArray(array, includeReferences); pdfObject.SetObjectID(objectNumber, generationNumber); break; case Symbol.BeginDictionary: PdfDictionary dict; if (pdfObject == null) { dict = new PdfDictionary(this.document); } else { dict = (PdfDictionary)pdfObject; } //PdfObject.RegisterObject(dict, objectID, generation); checkForStream = true; pdfObject = ReadDictionary(dict, includeReferences); pdfObject.SetObjectID(objectNumber, generationNumber); break; // Acrobat 6 Professional proudly presents: The Null object! // Even with a one-digit object number an indirect reference �x 0 R� to this object is // one character larger than the direct use of �null�. Probable this is the reason why // it is true that Acrobat Web Capture 6.0 creates this object, but obviously never // creates a reference to it! case Symbol.Null: pdfObject = new PdfNullObject(this.document); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Boolean: pdfObject = new PdfBooleanObject(this.document, string.Compare(this.lexer.Token, Boolean.TrueString, true) == 0); //!!!mod THHO 19.11.09 pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Integer: pdfObject = new PdfIntegerObject(this.document, this.lexer.TokenToInteger); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.UInteger: pdfObject = new PdfUIntegerObject(this.document, this.lexer.TokenToUInteger); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Real: pdfObject = new PdfRealObject(this.document, this.lexer.TokenToReal); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.String: pdfObject = new PdfStringObject(this.document, this.lexer.Token); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Name: pdfObject = new PdfNameObject(this.document, this.lexer.Token); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Keyword: // Should not come here anymore throw new NotImplementedException("Keyword"); default: // Should not come here anymore throw new NotImplementedException("unknown token \"" + symbol + "\""); } symbol = ScanNextToken(); if (symbol == Symbol.BeginStream) { PdfDictionary dict = (PdfDictionary)pdfObject; Debug.Assert(checkForStream, "Unexpected stream..."); int length = GetStreamLength(dict); byte[] bytes = this.lexer.ReadStream(length); #if true_ if (dict.Elements.GetString("/Filter") == "/FlateDecode") { if (dict.Elements["/Subtype"] == null) { try { byte[] decoded = Filtering.FlateDecode.Decode(bytes); if (decoded.Length == 0) { goto End; } string pageContent = Filtering.FlateDecode.DecodeToString(bytes); if (pageContent.Length > 100) { pageContent = pageContent.Substring(pageContent.Length - 100); } pageContent.GetType(); bytes = decoded; dict.Elements.Remove("/Filter"); dict.Elements.SetInteger("/Length", bytes.Length); } catch { } } End :; } #endif PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict); dict.Stream = stream; ReadSymbol(Symbol.EndStream); symbol = ScanNextToken(); } if (symbol != Symbol.EndObj) { throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); } return(pdfObject); }