Пример #1
0
    /// <summary>
    /// Reads PDF object from input stream.
    /// </summary>
    /// <param name="pdfObject">Either the instance of a derived type or null. If it is null
    /// an appropriate object is created.</param>
    /// <param name="objectID">The address of the object.</param>
    /// <param name="includeReferences">If true, specifies that all indirect objects
    /// are included recursively.</param>
    public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences)
    {
      MoveToObject(objectID);
      int objectNumber = ReadInteger();
      int generationNumber = ReadInteger();
#if DEBUG
      // The following assertion sometime failed (see below)
      //Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber));
      if (objectID != new PdfObjectID(objectNumber, generationNumber))
      {
        // A special kind of bug? Or is this an undocumented PDF feature?
        // PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf'
        // The iref table of this file contains the following entries:
        //    iref
        //    0 148
        //    0000000000 65535 f 
        //    0000000015 00000 n 
        //    0000000346 00000 n 
        //    ....
        //    0000083236 00000 n 
        //    0000083045 00000 n 
        //    0000083045 00000 n 
        //    0000083045 00000 n 
        //    0000083045 00000 n 
        //    0000080334 00000 n 
        //    ....
        // Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested
        // ignores this mismatch! The following assertion failed about 50 times with this file.
#if true_
        string message = String.Format("xref entry {0} {1} maps to object {2} {3}.",
          objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber);
        Debug.Assert(false, message);
#endif
      }
#endif
      // Always use object ID from iref table (see above)
      objectNumber = objectID.ObjectNumber;
      generationNumber = objectID.GenerationNumber;
#if true_
      Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber));
#endif
      ReadSymbol(Symbol.Obj);

      bool checkForStream = false;
      Symbol symbol = ScanNextToken();
      switch (symbol)
      {
        case Symbol.BeginArray:
          PdfArray array;
          if (pdfObject == null)
            array = new PdfArray(this.document);
          else
            array = (PdfArray)pdfObject;
          //PdfObject.RegisterObject(array, objectID, generation);
          pdfObject = ReadArray(array, includeReferences);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          break;

        case Symbol.BeginDictionary:
          PdfDictionary dict;
          if (pdfObject == null)
            dict = new PdfDictionary(this.document);
          else
            dict = (PdfDictionary)pdfObject;
          //PdfObject.RegisterObject(dict, objectID, generation);
          checkForStream = true;
          pdfObject = ReadDictionary(dict, includeReferences);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          break;

        // Acrobat 6 Professional proudly presents: The Null object!
        // Even with a one-digit object number an indirect reference «x 0 R» to this object is
        // one character larger than the direct use of «null». Probable this is the reason why
        // it is true that Acrobat Web Capture 6.0 creates this object, but obviously never 
        // creates a reference to it!
        case Symbol.Null:
          pdfObject = new PdfNullObject(this.document);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          ReadSymbol(Symbol.EndObj);
          return pdfObject;

        case Symbol.Boolean:
          pdfObject = new PdfBooleanObject(this.document, this.lexer.Token == Boolean.TrueString);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          ReadSymbol(Symbol.EndObj);
          return pdfObject;

        case Symbol.Integer:
          pdfObject = new PdfIntegerObject(this.document, this.lexer.TokenToInteger);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          ReadSymbol(Symbol.EndObj);
          return pdfObject;

        case Symbol.UInteger:
          pdfObject = new PdfUIntegerObject(this.document, this.lexer.TokenToUInteger);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          ReadSymbol(Symbol.EndObj);
          return pdfObject;

        case Symbol.Real:
          pdfObject = new PdfRealObject(this.document, this.lexer.TokenToReal);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          ReadSymbol(Symbol.EndObj);
          return pdfObject;

        case Symbol.String:
          pdfObject = new PdfStringObject(this.document, this.lexer.Token);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          ReadSymbol(Symbol.EndObj);
          return pdfObject;

        case Symbol.Name:
          pdfObject = new PdfNameObject(this.document, this.lexer.Token);
          pdfObject.SetObjectID(objectNumber, generationNumber);
          ReadSymbol(Symbol.EndObj);
          return pdfObject;

        case Symbol.Keyword:
          // Should not come here anymore
          throw new NotImplementedException("Keyword");

        default:
          // Should not come here anymore
          throw new NotImplementedException("unknown token");
      }
      symbol = ScanNextToken();
      if (symbol == Symbol.BeginStream)
      {
        PdfDictionary dict = (PdfDictionary)pdfObject;
        Debug.Assert(checkForStream, "Unexpected stream...");
        int length = GetStreamLength(dict);
        byte[] bytes = this.lexer.ReadStream(length);
#if true_
        if (dict.Elements.GetString("/Filter") == "/FlateDecode")
        {
          if (dict.Elements["/Subtype"] == null)
          {
            try
            {
              byte[] decoded = Filtering.FlateDecode.Decode(bytes);
              if (decoded.Length == 0)
                goto End;
              string pageContent = Filtering.FlateDecode.DecodeToString(bytes);
              if (pageContent.Length > 100)
                pageContent = pageContent.Substring(pageContent.Length - 100);
              pageContent.GetType();
              bytes = decoded;
              dict.Elements.Remove("/Filter");
              dict.Elements.SetInteger("/Length", bytes.Length);
            }
            catch
            {
            }
          }
        End:;
        }
#endif
        PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
        dict.Stream = stream;
        ReadSymbol(Symbol.EndStream);
        symbol = ScanNextToken();
      }
      if (symbol != Symbol.EndObj)
        throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
      return pdfObject;
    }