public bool ReadStartXref(PdfStream st) { if (st.PeekLine(true, true) == "startxref") { Utility.TraceLine("Read startxref"); st.ReadLine(); try { string sline = st.RegexMatch("^([0-9]+)"); m_xrefLocation = Int32.Parse(sline); } catch (Exception except) { throw new ParseException("Error reading xref inset: " + except.Message); } string line = st.ReadLine(true, true); while (line != "%%EOF") { line = st.ReadLine(true, true); } return(true); } return(false); }
public bool ReadXref(PdfStream st) { if (st.PeekLine(true, true) == "xref") { Utility.TraceLine("Reading a XREF"); st.ReadLine(); while (st.RegexMatch("^([0-9]+ [0-9]+)", false) != "") { Utility.TraceLine("Found XREF value block"); // Each xref block starts with a line with the starting number of the objects in the // block, and then a count of the number of objects in the block PDF 1.5 p70 int startat = 0; int count = 0; try { startat = Int32.Parse(st.RegexMatch("^([0-9]+)", true)); count = Int32.Parse(st.RegexMatch("^[ \t]*([0-9]+)", true)); st.ReadLine(); // The you get one line per object in the block. They contain: // <byte offset> <generation> <n = inuse, f = free> Utility.TraceLine("XREF reading " + count + " lines"); for (int i = 0; i < count; i++) { string line = st.ReadLine(); long offset = Int64.Parse(line.Substring(0, 10)); int generation = Int32.Parse(line.Substring(11, 5)); string inuse = line.Substring(17, 1); Utility.TraceLine("XREF line: " + offset + " " + generation + " " + inuse); if ((inuse == "n") && !m_linflag) { // PDF/A requires us to verify these offsets Object obj = m_objects.Get(startat + i, generation); if (offset != obj.StartedAt) { m_pdfa = false; Utility.TraceLine("PDF/A: Object offset is incorrect " + offset + " != " + obj.StartedAt + " (required by section 5.4)"); } } } } catch (Exception except) { throw new ParseException("Error reading xref block starter: " + except.Message); } } } return(false); }
private void ReadStream(PdfStream st) { if (st.PeekLine() == "stream") { Utility.TraceLine("Read stream: "); st.ReadLine(); while (!st.PeekLine(true, true).EndsWith("endstream") && !st.Eof) { Utility.Trace("-"); m_stream.Append(st.ReadLineAsBytes()); } Utility.TraceLine(" Done"); st.ReadLine(); // Sometimes there is a blank line after the endstream st.ConsumeWhitespace(); } }
public string ReadComment(PdfStream st) { if (!st.Expect("%", false)) { return(""); } return(st.ReadLine()); }
private void ReadDirectValue(PdfStream st) { if (m_dictionary.Count == 0) { while (st.PeekLine() != "endobj") { m_directValue += st.ReadLine(); } } else { Utility.TraceLine("Not eligible for direct value"); } }
public void ReadHeader(PdfStream st) { try { st.Expect("%PDF-", true); m_version = float.Parse(st.ReadBlock(3)); st.ReadLine(); // PDF/A requires a four character comment using only characters with a value // greater than 127 (PDF/A ISO Specification 5.2) string comment = ReadComment(st); if (comment == "") { m_pdfa = false; Utility.TraceLine("PDF/A: No section 5.2 comment line"); } else if (comment.Length != 5) { m_pdfa = false; Utility.TraceLine("PDF/A: Section 5.2 comment is wrong size"); } else { for (int count = 1; count < 6; count++) { if (comment[count] < 128) { m_pdfa = false; Utility.TraceLine("PDF/A: Section 5.2 comment character " + count + " is not binary"); } } } Utility.TraceLine("Read header"); } catch (Exception except) { throw new ParseException("No PDF header found: " + except.Message); } }
public void Parse(PdfStream st) { Utility.TraceLine("Read object parse pass for object " + Number + " " + Generation); try { m_dictionary.ReadDictionary(st); ReadStream(st); ReadDirectValue(st); if (st.PeekLine(true, true) != "endobj") { throw new ParseException("Object " + Number + " " + Generation + " ended early: \"" + st.PeekLine(true, true) + "\""); } st.ReadLine(); } catch (ParseException ex) { Utility.CrashDump(m_parseStartedAt, st.Position, st); throw new RuntimeException("Error processing object: " + ex.Message + " for object " + Number + " " + Generation); } }
public bool ReadTrailer(PdfStream st) { if (st.PeekLine() == "trailer") { st.ReadLine(); m_trailer.ReadDictionary(st); // Enforce PDF/A requirements for the trailer (section 5.3 of draft standard) // I deliberately do all of these checks so that we can report _all_ errors // at once if (m_trailer.Get("Encrypt") != null) { m_pdfa = false; Utility.TraceLine("PDF/A: Forbidden encrypt dictionary item in section 5.3 document trailer"); } if (m_trailer.Get("Info") != null) { m_pdfa = false; Utility.TraceLine("PDF/A: Forbidden info dictionary item in section 5.3 document trailer"); } if (m_trailer.Get("Size") == null) { m_pdfa = false; Utility.TraceLine("PDF/A: Required size dictionary document trailer item is missing"); } if (m_trailer.Get("ID") == null) { m_pdfa = false; Utility.TraceLine("PDF/A: Required ID dictionary document trailer item is missing"); } Utility.TraceLine("Read trailer"); return(true); } return(false); }
public Pdf(string filename) { PdfStream st = new PdfStream(); st.FillFromFile(filename); long pos = st.Position; ReadHeader(st); // While we're still advancing through the stream, then all is good... bool eof = false; while (!eof && (st.Position != pos)) { Utility.TraceLine("Starting read pass"); pos = st.Position; ReadComment(st); Object obj = ReadObject(st); if (obj.Valid) { m_objects.Add(obj); if (m_objects.Count == 1) { DictionaryItem di = obj.Dictionary.Get("Linearized"); if (di.Valid) { if (di.Type == DictionaryItem.ValueType.Number) { if (di.ValueAsInteger() == 1) { m_linflag = true; m_linearized = true; Utility.TraceLine("Linearized PDF document found"); Utility.TraceLine("PDF/A: Linearization of document is ignored (section 5.10)"); } } else { throw new ParseException("Linearized dictionary item is not a number"); } } } } ReadXref(st); if (ReadTrailer(st)) { if (m_linflag == true) { m_linflag = false; Utility.TraceLine("Linearization trailer"); } else { eof = true; } } ReadStartXref(st); if (!eof && (st.Position == pos)) { st.ReadLine(); } } if (st.Position != st.Length) { m_pdfa = false; Utility.TraceLine("PDF/A: Extraneous content after EOF marker breaches section 5.3 requirements"); } }