/// <summary>
 /// Reads the next token that must be the specified one.
 /// </summary>
 Symbol ReadToken(string token)
 {
   Symbol current = this.lexer.ScanNextToken();
   if (token != this.lexer.Token)
     throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
   return current;
 }
Exemple #2
0
        /// <summary>
        ///
        /// </summary>
        PdfTrailer ReadXRefTableAndTrailer(PdfReferenceTable xrefTable)
        {
            Debug.Assert(xrefTable != null);

            Symbol symbol = ScanNextToken();

            // Is it an xref stream?
            if (symbol == Symbol.Integer)
            {
                throw new PdfReaderException(PSSR.CannotHandleXRefStreams);
            }
            // TODO: It is very high on the todo list, but still undone
            Debug.Assert(symbol == Symbol.XRef);
            while (true)
            {
                symbol = ScanNextToken();
                if (symbol == Symbol.Integer)
                {
                    int start  = this.lexer.TokenToInteger;
                    int length = ReadInteger();
                    for (int id = start; id < start + length; id++)
                    {
                        int position   = ReadInteger();
                        int generation = ReadInteger();
                        ReadSymbol(Symbol.Keyword);
                        string token = lexer.Token;
                        // Skip start entry
                        if (id == 0)
                        {
                            continue;
                        }
                        // Skip unused entries.
                        if (token != "n")
                        {
                            continue;
                        }
                        // Even it is restricted, an object can exists in more than one subsection.
                        // (PDF Reference Implementation Notes 15).
                        PdfObjectID objectID = new PdfObjectID(id, generation);
                        // Ignore the latter one
                        if (xrefTable.Contains(objectID))
                        {
                            continue;
                        }
                        xrefTable.Add(new PdfReference(objectID, position));
                    }
                }
                else if (symbol == Symbol.Trailer)
                {
                    ReadSymbol(Symbol.BeginDictionary);
                    PdfTrailer trailer = new PdfTrailer(this.document);
                    this.ReadDictionary(trailer, false);
                    return(trailer);
                }
                else
                {
                    throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
                }
            }
        }
 /// <summary>
 /// Reads the next symbol that must be the specified one.
 /// </summary>
 Symbol ReadSymbol(Symbol symbol)
 {
   Symbol current = this.lexer.ScanNextToken();
   if (symbol != current)
     throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
   return current;
 }
 /// <summary>
 /// Reads a name from the PDF data stream. The preceding slash is part of the result string.
 /// </summary>
 string ReadName()
 {
   string name;
   Symbol symbol = ScanNextToken(out name);
   if (symbol != Symbol.Name)
     throw new PdfReaderException(PSSR.UnexpectedToken(name));
   return name;
 }
Exemple #5
0
        /// <summary>
        /// Reads the next symbol that must be the specified one.
        /// </summary>
        CSymbol ReadSymbol(CSymbol symbol)
        {
            CSymbol current = _lexer.ScanNextToken();

            if (symbol != current)
            {
                ContentReaderDiagnostics.ThrowContentReaderException(PSSR.UnexpectedToken(_lexer.Token));
            }
            return(current);
        }
Exemple #6
0
        /// <summary>
        /// Reads the next token that must be the specified one.
        /// </summary>
        Symbol ReadToken(string token)
        {
            Symbol current = lexer.ScanNextToken();

            if (token != lexer.Token)
            {
                throw new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
            }
            return(current);
        }
Exemple #7
0
        /// <summary>
        /// Reads the next symbol that must be the specified one.
        /// </summary>
        Symbol ReadSymbol(Symbol symbol)
        {
            Symbol current = lexer.ScanNextToken();

            if (symbol != current)
            {
                throw new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
            }
            return(current);
        }
Exemple #8
0
        /// <summary>
        /// Reads the next symbol that must be the specified one.
        /// </summary>
        CSymbol ReadSymbol(CSymbol symbol)
        {
            CSymbol current = this.lexer.ScanNextToken();

            if (symbol != current)
            {
                throw new ContentReaderException(PSSR.UnexpectedToken(this.lexer.Token));
            }
            return(current);
        }
    /*
        /// <summary>
        /// Reads a string immediately or (optionally) indirectly from the PDF data stream.
        /// </summary>
        protected string ReadString(bool canBeIndirect)
        {
          Symbol symbol = Symbol.None; //this.lexer.ScanNextToken(canBeIndirect);
          if (symbol == Symbol.String || symbol == Symbol.HexString)
            return this.lexer.Token;
          else if (symbol == Symbol.R)
          {
            int position = this.lexer.Position;
            MoveToObject(this.lexer.Token);
            ReadObjectID(null);
            string s = ReadString();
            ReadSymbol(Symbol.EndObj);
            this.lexer.Position = position;
            return s;
          }
          throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
        }

        protected string ReadString()
        {
          return ReadString(false);
        }

        /// <summary>
        /// Reads a string immediately or (optionally) indirectly from the PDF data stream.
        /// </summary>
        protected bool ReadBoolean(bool canBeIndirect)
        {
          Symbol symbol = this.lexer.ScanNextToken(canBeIndirect);
          if (symbol == Symbol.Boolean)
            return this.lexer.TokenToBoolean;
          else if (symbol == Symbol.R)
          {
            int position = this.lexer.Position;
            MoveToObject(this.lexer.Token);
            ReadObjectID(null);
            bool b = ReadBoolean();
            ReadSymbol(Symbol.EndObj);
            this.lexer.Position = position;
            return b;
          }
          throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
        }

        protected bool ReadBoolean()
        {
          return ReadBoolean(false);
        }
    */
    /// <summary>
    /// Reads an integer value directly from the PDF data stream.
    /// </summary>
    int ReadInteger(bool canBeIndirect)
    {
      Symbol symbol = this.lexer.ScanNextToken();
      if (symbol == Symbol.Integer)
        return this.lexer.TokenToInteger;
      else if (symbol == Symbol.R)
      {
        int position = this.lexer.Position;
        //        MoveToObject(this.lexer.Token);
        ReadObjectID(null);
        int n = ReadInteger();
        ReadSymbol(Symbol.EndObj);
        this.lexer.Position = position;
        return n;
      }
      throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
    }
Exemple #10
0
        /// <summary>
        /// Reads PDF object from input stream.
        /// </summary>
        /// <param name="pdfObject">Either the instance of a derived type or null. If it is null
        /// an appropriate object is created.</param>
        /// <param name="objectID">The address of the object.</param>
        /// <param name="includeReferences">If true, specifies that all indirect objects
        /// are included recursively.</param>
        public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences)
        {
            MoveToObject(objectID);
            int objectNumber     = ReadInteger();
            int generationNumber = ReadInteger();

#if DEBUG
            // The following assertion sometime failed (see below)
            //Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber));
            if (objectID != new PdfObjectID(objectNumber, generationNumber))
            {
                // A special kind of bug? Or is this an undocumented PDF feature?
                // PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf'
                // The iref table of this file contains the following entries:
                //    iref
                //    0 148
                //    0000000000 65535 f
                //    0000000015 00000 n
                //    0000000346 00000 n
                //    ....
                //    0000083236 00000 n
                //    0000083045 00000 n
                //    0000083045 00000 n
                //    0000083045 00000 n
                //    0000083045 00000 n
                //    0000080334 00000 n
                //    ....
                // Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested
                // ignores this mismatch! The following assertion failed about 50 times with this file.
#if true_
                string message = String.Format("xref entry {0} {1} maps to object {2} {3}.",
                                               objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber);
                Debug.Assert(false, message);
#endif
            }
#endif
            // Always use object ID from iref table (see above)
            objectNumber     = objectID.ObjectNumber;
            generationNumber = objectID.GenerationNumber;
#if true_
            Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber));
#endif
            ReadSymbol(Symbol.Obj);

            bool   checkForStream = false;
            Symbol symbol         = ScanNextToken();
            switch (symbol)
            {
            case Symbol.BeginArray:
                PdfArray array;
                if (pdfObject == null)
                {
                    array = new PdfArray(this.document);
                }
                else
                {
                    array = (PdfArray)pdfObject;
                }
                //PdfObject.RegisterObject(array, objectID, generation);
                pdfObject = ReadArray(array, includeReferences);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                break;

            case Symbol.BeginDictionary:
                PdfDictionary dict;
                if (pdfObject == null)
                {
                    dict = new PdfDictionary(this.document);
                }
                else
                {
                    dict = (PdfDictionary)pdfObject;
                }
                //PdfObject.RegisterObject(dict, objectID, generation);
                checkForStream = true;
                pdfObject      = ReadDictionary(dict, includeReferences);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                break;

            // Acrobat 6 Professional proudly presents: The Null object!
            // Even with a one-digit object number an indirect reference �x 0 R� to this object is
            // one character larger than the direct use of �null�. Probable this is the reason why
            // it is true that Acrobat Web Capture 6.0 creates this object, but obviously never
            // creates a reference to it!
            case Symbol.Null:
                pdfObject = new PdfNullObject(this.document);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Boolean:
                pdfObject = new PdfBooleanObject(this.document, string.Compare(this.lexer.Token, Boolean.TrueString, true) == 0); //!!!mod THHO 19.11.09
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Integer:
                pdfObject = new PdfIntegerObject(this.document, this.lexer.TokenToInteger);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.UInteger:
                pdfObject = new PdfUIntegerObject(this.document, this.lexer.TokenToUInteger);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Real:
                pdfObject = new PdfRealObject(this.document, this.lexer.TokenToReal);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.String:
                pdfObject = new PdfStringObject(this.document, this.lexer.Token);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Name:
                pdfObject = new PdfNameObject(this.document, this.lexer.Token);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Keyword:
                // Should not come here anymore
                throw new NotImplementedException("Keyword");

            default:
                // Should not come here anymore
                throw new NotImplementedException("unknown token \"" + symbol + "\"");
            }
            symbol = ScanNextToken();
            if (symbol == Symbol.BeginStream)
            {
                PdfDictionary dict = (PdfDictionary)pdfObject;
                Debug.Assert(checkForStream, "Unexpected stream...");
                int    length = GetStreamLength(dict);
                byte[] bytes  = this.lexer.ReadStream(length);
#if true_
                if (dict.Elements.GetString("/Filter") == "/FlateDecode")
                {
                    if (dict.Elements["/Subtype"] == null)
                    {
                        try
                        {
                            byte[] decoded = Filtering.FlateDecode.Decode(bytes);
                            if (decoded.Length == 0)
                            {
                                goto End;
                            }
                            string pageContent = Filtering.FlateDecode.DecodeToString(bytes);
                            if (pageContent.Length > 100)
                            {
                                pageContent = pageContent.Substring(pageContent.Length - 100);
                            }
                            pageContent.GetType();
                            bytes = decoded;
                            dict.Elements.Remove("/Filter");
                            dict.Elements.SetInteger("/Length", bytes.Length);
                        }
                        catch
                        {
                        }
                    }
                    End :;
                }
#endif
                PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
                dict.Stream = stream;
                ReadSymbol(Symbol.EndStream);
                symbol = ScanNextToken();
            }
            if (symbol != Symbol.EndObj)
            {
                throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
            }
            return(pdfObject);
        }