/// <summary> /// Converts the specified value to string. /// If the value does not exist, the function returns the empty string. /// If the value is not convertible, the function throws an InvalidCastException. /// If the index is out of range, the function throws an ArgumentOutOfRangeException. /// </summary> public string GetString(int index) { if (index < 0 || index >= Count) { throw new ArgumentOutOfRangeException("index", index, PSSR.IndexOutOfRange); } object obj = this[index]; if (obj == null) { return(String.Empty); } PdfString str = obj as PdfString; if (str != null) { return(str.Value); } PdfStringObject strObject = obj as PdfStringObject; if (strObject != null) { return(strObject.Value); } throw new InvalidCastException("GetString: Object is not a string."); }
/// <summary> /// Reads PDF object from input stream. /// </summary> /// <param name="pdfObject">Either the instance of a derived type or null. If it is null /// an appropriate object is created.</param> /// <param name="objectID">The address of the object.</param> /// <param name="includeReferences">If true, specifies that all indirect objects /// are included recursively.</param> public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences) { MoveToObject(objectID); int objectNumber = ReadInteger(); int generationNumber = ReadInteger(); #if DEBUG // The following assertion sometime failed (see below) //Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber)); if (objectID != new PdfObjectID(objectNumber, generationNumber)) { // A special kind of bug? Or is this an undocumented PDF feature? // PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf' // The iref table of this file contains the following entries: // iref // 0 148 // 0000000000 65535 f // 0000000015 00000 n // 0000000346 00000 n // .... // 0000083236 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000080334 00000 n // .... // Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested // ignores this mismatch! The following assertion failed about 50 times with this file. #if true_ string message = String.Format("xref entry {0} {1} maps to object {2} {3}.", objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber); Debug.Assert(false, message); #endif } #endif // Always use object ID from iref table (see above) objectNumber = objectID.ObjectNumber; generationNumber = objectID.GenerationNumber; #if true_ Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber)); #endif ReadSymbol(Symbol.Obj); bool checkForStream = false; Symbol symbol = ScanNextToken(); switch (symbol) { case Symbol.BeginArray: PdfArray array; if (pdfObject == null) array = new PdfArray(this.document); else array = (PdfArray)pdfObject; //PdfObject.RegisterObject(array, objectID, generation); pdfObject = ReadArray(array, includeReferences); pdfObject.SetObjectID(objectNumber, generationNumber); break; case Symbol.BeginDictionary: PdfDictionary dict; if (pdfObject == null) dict = new PdfDictionary(this.document); else dict = (PdfDictionary)pdfObject; //PdfObject.RegisterObject(dict, objectID, generation); checkForStream = true; pdfObject = ReadDictionary(dict, includeReferences); pdfObject.SetObjectID(objectNumber, generationNumber); break; // Acrobat 6 Professional proudly presents: The Null object! // Even with a one-digit object number an indirect reference «x 0 R» to this object is // one character larger than the direct use of «null». Probable this is the reason why // it is true that Acrobat Web Capture 6.0 creates this object, but obviously never // creates a reference to it! case Symbol.Null: pdfObject = new PdfNullObject(this.document); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Boolean: pdfObject = new PdfBooleanObject(this.document, this.lexer.Token == Boolean.TrueString); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Integer: pdfObject = new PdfIntegerObject(this.document, this.lexer.TokenToInteger); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.UInteger: pdfObject = new PdfUIntegerObject(this.document, this.lexer.TokenToUInteger); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Real: pdfObject = new PdfRealObject(this.document, this.lexer.TokenToReal); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.String: pdfObject = new PdfStringObject(this.document, this.lexer.Token); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Name: pdfObject = new PdfNameObject(this.document, this.lexer.Token); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Keyword: // Should not come here anymore throw new NotImplementedException("Keyword"); default: // Should not come here anymore throw new NotImplementedException("unknown token"); } symbol = ScanNextToken(); if (symbol == Symbol.BeginStream) { PdfDictionary dict = (PdfDictionary)pdfObject; Debug.Assert(checkForStream, "Unexpected stream..."); int length = GetStreamLength(dict); byte[] bytes = this.lexer.ReadStream(length); #if true_ if (dict.Elements.GetString("/Filter") == "/FlateDecode") { if (dict.Elements["/Subtype"] == null) { try { byte[] decoded = Filtering.FlateDecode.Decode(bytes); if (decoded.Length == 0) goto End; string pageContent = Filtering.FlateDecode.DecodeToString(bytes); if (pageContent.Length > 100) pageContent = pageContent.Substring(pageContent.Length - 100); pageContent.GetType(); bytes = decoded; dict.Elements.Remove("/Filter"); dict.Elements.SetInteger("/Length", bytes.Length); } catch { } } End:; } #endif PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict); dict.Stream = stream; ReadSymbol(Symbol.EndStream); symbol = ScanNextToken(); } if (symbol != Symbol.EndObj) throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); return pdfObject; }