protected static PDFDocument ParseDocument(ByteStreamReader reader) { PDFDocument doc = new PDFDocument(); doc.Load(reader); foreach (IPDFDictionary page in doc.Pages) { IPDFElement content; if (page.Dict.TryGet <IPDFElement>("Contents", out content)) { IPDFList clist = content as IPDFList; IPDFStream cstream = content as IPDFStream; PDFContent pcontent = null; if (clist != null && clist.List != null) { List <byte> data = new List <byte>(); foreach (IPDFStream elem in clist.List.OfType <IPDFStream>()) { if (elem.Stream != null) { data.AddRange(elem.Stream.Data); } } pcontent = new PDFContent(data.ToArray(), page); } else if (cstream != null && cstream.Stream != null) { pcontent = new PDFContent(cstream.Stream.Data, page); } if (pcontent != null) { page.Dict["PageContent"] = pcontent; Dictionary <long, PDFContentBlock> blocks = new Dictionary <long, PDFContentBlock>(); doc.ContentBlocks[((IPDFObjRef)page).ObjRef] = blocks; doc.ProcessPageContentBlocks(pcontent, blocks); } } } IPDFDictionary stree = doc.StructTreeRoot; if (stree != null) { doc.StructTree = doc.ProcessTreeNode(stree, (PDFName)stree.Dict["Type"]); } return(doc); }
public PDFTokenizer(ByteStreamReader reader, bool useStreamKeyword = false) { this.reader = reader; this.UseStreamKeyword = useStreamKeyword; }