public bool ReadStartXref(PdfStream st) { if (st.PeekLine(true, true) == "startxref") { Utility.TraceLine("Read startxref"); st.ReadLine(); try { string sline = st.RegexMatch("^([0-9]+)"); m_xrefLocation = Int32.Parse(sline); } catch (Exception except) { throw new ParseException("Error reading xref inset: " + except.Message); } string line = st.ReadLine(true, true); while (line != "%%EOF") { line = st.ReadLine(true, true); } return(true); } return(false); }
private void ReadStream(PdfStream st) { if (st.PeekLine() == "stream") { Utility.TraceLine("Read stream: "); st.ReadLine(); while (!st.PeekLine(true, true).EndsWith("endstream") && !st.Eof) { Utility.Trace("-"); m_stream.Append(st.ReadLineAsBytes()); } Utility.TraceLine(" Done"); st.ReadLine(); // Sometimes there is a blank line after the endstream st.ConsumeWhitespace(); } }
public bool ReadXref(PdfStream st) { if (st.PeekLine(true, true) == "xref") { Utility.TraceLine("Reading a XREF"); st.ReadLine(); while (st.RegexMatch("^([0-9]+ [0-9]+)", false) != "") { Utility.TraceLine("Found XREF value block"); // Each xref block starts with a line with the starting number of the objects in the // block, and then a count of the number of objects in the block PDF 1.5 p70 int startat = 0; int count = 0; try { startat = Int32.Parse(st.RegexMatch("^([0-9]+)", true)); count = Int32.Parse(st.RegexMatch("^[ \t]*([0-9]+)", true)); st.ReadLine(); // The you get one line per object in the block. They contain: // <byte offset> <generation> <n = inuse, f = free> Utility.TraceLine("XREF reading " + count + " lines"); for (int i = 0; i < count; i++) { string line = st.ReadLine(); long offset = Int64.Parse(line.Substring(0, 10)); int generation = Int32.Parse(line.Substring(11, 5)); string inuse = line.Substring(17, 1); Utility.TraceLine("XREF line: " + offset + " " + generation + " " + inuse); if ((inuse == "n") && !m_linflag) { // PDF/A requires us to verify these offsets Object obj = m_objects.Get(startat + i, generation); if (offset != obj.StartedAt) { m_pdfa = false; Utility.TraceLine("PDF/A: Object offset is incorrect " + offset + " != " + obj.StartedAt + " (required by section 5.4)"); } } } } catch (Exception except) { throw new ParseException("Error reading xref block starter: " + except.Message); } } } return(false); }
public void Parse(PdfStream st) { Utility.TraceLine("Read object parse pass for object " + Number + " " + Generation); try { m_dictionary.ReadDictionary(st); ReadStream(st); ReadDirectValue(st); if (st.PeekLine(true, true) != "endobj") { throw new ParseException("Object " + Number + " " + Generation + " ended early: \"" + st.PeekLine(true, true) + "\""); } st.ReadLine(); } catch (ParseException ex) { Utility.CrashDump(m_parseStartedAt, st.Position, st); throw new RuntimeException("Error processing object: " + ex.Message + " for object " + Number + " " + Generation); } }
private void ReadDirectValue(PdfStream st) { if (m_dictionary.Count == 0) { while (st.PeekLine() != "endobj") { m_directValue += st.ReadLine(); } } else { Utility.TraceLine("Not eligible for direct value"); } }
public bool ReadTrailer(PdfStream st) { if (st.PeekLine() == "trailer") { st.ReadLine(); m_trailer.ReadDictionary(st); // Enforce PDF/A requirements for the trailer (section 5.3 of draft standard) // I deliberately do all of these checks so that we can report _all_ errors // at once if (m_trailer.Get("Encrypt") != null) { m_pdfa = false; Utility.TraceLine("PDF/A: Forbidden encrypt dictionary item in section 5.3 document trailer"); } if (m_trailer.Get("Info") != null) { m_pdfa = false; Utility.TraceLine("PDF/A: Forbidden info dictionary item in section 5.3 document trailer"); } if (m_trailer.Get("Size") == null) { m_pdfa = false; Utility.TraceLine("PDF/A: Required size dictionary document trailer item is missing"); } if (m_trailer.Get("ID") == null) { m_pdfa = false; Utility.TraceLine("PDF/A: Required ID dictionary document trailer item is missing"); } Utility.TraceLine("Read trailer"); return(true); } return(false); }
public void ReadDictionary(PdfStream st) { if (st.Expect("<<", false)) { // Until we get to the end of this dictionary while (st.PeekBlock(2) != ">>") { // Consume any leading whitespace while (Utility.IsWhite(st.PeekBlock(1))) { st.ReadBlock(1); } // Name Utility.TraceLine("Checking for a name"); string name = st.RegexMatch("^(/[^ \t/\\[\\]\\(\\)\\<\\>]+)[ \t]*"); if (name == "") { throw new ParseException("Dictionary items must have a name"); } st.ConsumeWhitespace(); // Value if (st.PeekBlock(2) == "<<") { Utility.TraceLine("Traversing subdictionary"); Dictionary dict = new Dictionary(); dict.ReadDictionary(st); DictionaryItem di = new DictionaryItem(name, dict); Add(di); } else { Utility.TraceLine("Finding the value"); string nameval = st.RegexMatch("^(/[^ \t/\\[\\]\\(\\)\\<\\>]+)", false); string objrefval = st.RegexMatch("^([0-9]+ [0-9]+ R)", false); string numval = st.RegexMatch("^(-{0,1}[0-9]+)", false); string floatval = st.RegexMatch("^(-{0,1}[0-9]+\\.[0-9]+)", false); // The old version of these: //string rdbrackets = st.RegexMatch("^(\\([^\\)]*\\)+)[ \t]*", false); string sqbrackets = st.RegexMatch(@"^(\[.*?[^\\]+?\])", false); string rdbrackets = st.RegexMatch(@"^(\(.*?[^\\]+?\))", false); string anbrackets = st.RegexMatch(@"^(\<.*?[^\\]+?\>)", false); string singleword = st.RegexMatch("^([^ \t]+)", false); if (nameval != "") { DictionaryItem di = new DictionaryItem(name, nameval); Add(di); st.ReadBlock(nameval.Length); } else if (objrefval != "") { DictionaryItem di = new DictionaryItem(name, objrefval); Add(di); st.ReadBlock(objrefval.Length); } else if (floatval != "") { DictionaryItem di = new DictionaryItem(name, floatval); Add(di); st.ReadBlock(floatval.Length); } else if (numval != "") { DictionaryItem di = new DictionaryItem(name, numval); Add(di); st.ReadBlock(numval.Length); } else if (sqbrackets != "") { DictionaryItem di = new DictionaryItem(name, sqbrackets); Add(di); st.ReadBlock(sqbrackets.Length); } else if (rdbrackets != "") { DictionaryItem di = new DictionaryItem(name, rdbrackets); Add(di); st.ReadBlock(rdbrackets.Length); } else if (anbrackets != "") { DictionaryItem di = new DictionaryItem(name, anbrackets); Add(di); st.ReadBlock(anbrackets.Length); } else if (singleword != "") { DictionaryItem di = new DictionaryItem(name, singleword); Add(di); st.ReadBlock(singleword.Length); } else { throw new ParseException("Unknown value format: " + st.PeekLine()); } } st.ConsumeWhitespace(); } // Skip over the >> at the end of the dictionary st.ReadBlock(2); st.ConsumeWhitespace(); } }