Example #1
0
        public bool ReadStartXref(PdfStream st)
        {
            if (st.PeekLine(true, true) == "startxref")
            {
                Utility.TraceLine("Read startxref");
                st.ReadLine();

                try
                {
                    string sline = st.RegexMatch("^([0-9]+)");
                    m_xrefLocation = Int32.Parse(sline);
                }
                catch (Exception except)
                {
                    throw new ParseException("Error reading xref inset: " + except.Message);
                }

                string line = st.ReadLine(true, true);
                while (line != "%%EOF")
                {
                    line = st.ReadLine(true, true);
                }
                return(true);
            }
            return(false);
        }
Example #2
0
        private void ReadStream(PdfStream st)
        {
            if (st.PeekLine() == "stream")
            {
                Utility.TraceLine("Read stream: ");
                st.ReadLine();
                while (!st.PeekLine(true, true).EndsWith("endstream") && !st.Eof)
                {
                    Utility.Trace("-");
                    m_stream.Append(st.ReadLineAsBytes());
                }
                Utility.TraceLine(" Done");
                st.ReadLine();

                // Sometimes there is a blank line after the endstream
                st.ConsumeWhitespace();
            }
        }
Example #3
0
        public bool ReadXref(PdfStream st)
        {
            if (st.PeekLine(true, true) == "xref")
            {
                Utility.TraceLine("Reading a XREF");
                st.ReadLine();

                while (st.RegexMatch("^([0-9]+ [0-9]+)", false) != "")
                {
                    Utility.TraceLine("Found XREF value block");

                    // Each xref block starts with a line with the starting number of the objects in the
                    // block, and then a count of the number of objects in the block PDF 1.5 p70
                    int startat = 0;
                    int count   = 0;
                    try
                    {
                        startat = Int32.Parse(st.RegexMatch("^([0-9]+)", true));
                        count   = Int32.Parse(st.RegexMatch("^[ \t]*([0-9]+)", true));
                        st.ReadLine();

                        // The you get one line per object in the block. They contain:
                        // <byte offset> <generation> <n = inuse, f = free>
                        Utility.TraceLine("XREF reading " + count + " lines");
                        for (int i = 0; i < count; i++)
                        {
                            string line       = st.ReadLine();
                            long   offset     = Int64.Parse(line.Substring(0, 10));
                            int    generation = Int32.Parse(line.Substring(11, 5));
                            string inuse      = line.Substring(17, 1);

                            Utility.TraceLine("XREF line: " + offset + " " + generation + " " + inuse);

                            if ((inuse == "n") && !m_linflag)
                            {
                                // PDF/A requires us to verify these offsets
                                Object obj = m_objects.Get(startat + i, generation);
                                if (offset != obj.StartedAt)
                                {
                                    m_pdfa = false;
                                    Utility.TraceLine("PDF/A: Object offset is incorrect " + offset + " != " +
                                                      obj.StartedAt + " (required by section 5.4)");
                                }
                            }
                        }
                    }
                    catch (Exception except)
                    {
                        throw new ParseException("Error reading xref block starter: " + except.Message);
                    }
                }
            }

            return(false);
        }
Example #4
0
        public void Parse(PdfStream st)
        {
            Utility.TraceLine("Read object parse pass for object " + Number + " " + Generation);

            try
            {
                m_dictionary.ReadDictionary(st);
                ReadStream(st);
                ReadDirectValue(st);
                if (st.PeekLine(true, true) != "endobj")
                {
                    throw new ParseException("Object " + Number + " " + Generation + " ended early: \"" +
                                             st.PeekLine(true, true) + "\"");
                }
                st.ReadLine();
            }
            catch (ParseException ex)
            {
                Utility.CrashDump(m_parseStartedAt, st.Position, st);
                throw new RuntimeException("Error processing object: " + ex.Message + " for object " + Number + " " + Generation);
            }
        }
Example #5
0
 private void ReadDirectValue(PdfStream st)
 {
     if (m_dictionary.Count == 0)
     {
         while (st.PeekLine() != "endobj")
         {
             m_directValue += st.ReadLine();
         }
     }
     else
     {
         Utility.TraceLine("Not eligible for direct value");
     }
 }
Example #6
0
        public bool ReadTrailer(PdfStream st)
        {
            if (st.PeekLine() == "trailer")
            {
                st.ReadLine();
                m_trailer.ReadDictionary(st);

                // Enforce PDF/A requirements for the trailer (section 5.3 of draft standard)
                // I deliberately do all of these checks so that we can report _all_ errors
                // at once
                if (m_trailer.Get("Encrypt") != null)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Forbidden encrypt dictionary item in section 5.3 document trailer");
                }
                if (m_trailer.Get("Info") != null)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Forbidden info dictionary item in section 5.3 document trailer");
                }
                if (m_trailer.Get("Size") == null)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Required size dictionary document trailer item is missing");
                }
                if (m_trailer.Get("ID") == null)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Required ID dictionary document trailer item is missing");
                }

                Utility.TraceLine("Read trailer");
                return(true);
            }

            return(false);
        }
Example #7
0
        public void ReadDictionary(PdfStream st)
        {
            if (st.Expect("<<", false))
            {
                // Until we get to the end of this dictionary
                while (st.PeekBlock(2) != ">>")
                {
                    // Consume any leading whitespace
                    while (Utility.IsWhite(st.PeekBlock(1)))
                    {
                        st.ReadBlock(1);
                    }

                    // Name
                    Utility.TraceLine("Checking for a name");
                    string name = st.RegexMatch("^(/[^ \t/\\[\\]\\(\\)\\<\\>]+)[ \t]*");
                    if (name == "")
                    {
                        throw new ParseException("Dictionary items must have a name");
                    }

                    st.ConsumeWhitespace();

                    // Value
                    if (st.PeekBlock(2) == "<<")
                    {
                        Utility.TraceLine("Traversing subdictionary");
                        Dictionary dict = new Dictionary();
                        dict.ReadDictionary(st);
                        DictionaryItem di = new DictionaryItem(name, dict);
                        Add(di);
                    }
                    else
                    {
                        Utility.TraceLine("Finding the value");
                        string nameval   = st.RegexMatch("^(/[^ \t/\\[\\]\\(\\)\\<\\>]+)", false);
                        string objrefval = st.RegexMatch("^([0-9]+ [0-9]+ R)", false);
                        string numval    = st.RegexMatch("^(-{0,1}[0-9]+)", false);
                        string floatval  = st.RegexMatch("^(-{0,1}[0-9]+\\.[0-9]+)", false);

                        // The old version of these:
                        //string rdbrackets = st.RegexMatch("^(\\([^\\)]*\\)+)[ \t]*", false);

                        string sqbrackets = st.RegexMatch(@"^(\[.*?[^\\]+?\])", false);
                        string rdbrackets = st.RegexMatch(@"^(\(.*?[^\\]+?\))", false);
                        string anbrackets = st.RegexMatch(@"^(\<.*?[^\\]+?\>)", false);
                        string singleword = st.RegexMatch("^([^ \t]+)", false);

                        if (nameval != "")
                        {
                            DictionaryItem di = new DictionaryItem(name, nameval);
                            Add(di);
                            st.ReadBlock(nameval.Length);
                        }
                        else if (objrefval != "")
                        {
                            DictionaryItem di = new DictionaryItem(name, objrefval);
                            Add(di);
                            st.ReadBlock(objrefval.Length);
                        }
                        else if (floatval != "")
                        {
                            DictionaryItem di = new DictionaryItem(name, floatval);
                            Add(di);
                            st.ReadBlock(floatval.Length);
                        }
                        else if (numval != "")
                        {
                            DictionaryItem di = new DictionaryItem(name, numval);
                            Add(di);
                            st.ReadBlock(numval.Length);
                        }
                        else if (sqbrackets != "")
                        {
                            DictionaryItem di = new DictionaryItem(name, sqbrackets);
                            Add(di);
                            st.ReadBlock(sqbrackets.Length);
                        }
                        else if (rdbrackets != "")
                        {
                            DictionaryItem di = new DictionaryItem(name, rdbrackets);
                            Add(di);
                            st.ReadBlock(rdbrackets.Length);
                        }
                        else if (anbrackets != "")
                        {
                            DictionaryItem di = new DictionaryItem(name, anbrackets);
                            Add(di);
                            st.ReadBlock(anbrackets.Length);
                        }
                        else if (singleword != "")
                        {
                            DictionaryItem di = new DictionaryItem(name, singleword);
                            Add(di);
                            st.ReadBlock(singleword.Length);
                        }
                        else
                        {
                            throw new ParseException("Unknown value format: " + st.PeekLine());
                        }
                    }

                    st.ConsumeWhitespace();
                }

                // Skip over the >> at the end of the dictionary
                st.ReadBlock(2);
                st.ConsumeWhitespace();
            }
        }