public IReadOnlyList <IFilter> GetFilters(PdfDictionary streamDictionary) { if (streamDictionary == null) { throw new ArgumentNullException(nameof(streamDictionary)); } var filterObject = streamDictionary.GetItemOrDefault(CosName.FILTER); if (filterObject == null) { return(new IFilter[0]); } switch (filterObject) { case COSArray filters: // TODO: presumably this may be invalid... return(filters.Select(x => GetFilterStrict((CosName)x)).ToList()); case CosName name: return(new[] { GetFilterStrict(name) }); default: throw new InvalidOperationException("The filter for a stream may be either a string or an array, instead this Pdf has: " + filterObject.GetType()); } }
public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing, IPdfObjectParser parser) { PdfRawStream result; // read 'stream'; this was already tested in parseObjectsDynamically() ReadHelper.ReadExpectedString(reader, "stream"); skipWhiteSpaces(reader); // This needs to be streamDictionary.getItem because when we are parsing, the underlying object might still be null. ICosNumber streamLength = GetLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE), isLenientParsing, parser); ValidateStreamLength(reader, isLenientParsing, streamLength); // get output stream to copy data to using (var stream = new MemoryStream()) using (var writer = new BinaryWriter(stream)) { if (streamLength != null && validateStreamLength(reader, streamLength.AsLong(), reader.Length())) { ReadValidStream(reader, writer, streamLength); } else { ReadUntilEndStream(reader, writer); } result = new PdfRawStream(stream.ToArray(), streamDictionary); } String endStream = ReadHelper.ReadString(reader); if (endStream.Equals("endobj") && isLenientParsing) { log.Warn($"stream ends with \'endobj\' instead of \'endstream\' at offset {reader.GetPosition()}"); // avoid follow-up warning about missing endobj reader.Rewind("endobj".Length); } else if (endStream.Length > 9 && isLenientParsing && endStream.Substring(0, 9).Equals("endstream")) { log.Warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " + reader.GetPosition()); // unread the "extra" bytes reader.Rewind(OtherEncodings.StringAsLatin1Bytes(endStream.Substring(9)).Length); } else if (!endStream.Equals("endstream")) { throw new InvalidOperationException("Error reading stream, expected='endstream' actual='" + endStream + "' at offset " + reader.GetPosition()); } return(result); }
public Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetName(CosName.TYPE); if (type != null && !type.Equals(CosName.PAGE) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); LoadResources(dictionary, reader, isLenientParsing); PageContent content = default(PageContent); var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject; if (contentObject != null) { var contentStream = pdfObjectParser.Parse(contentObject.ToIndirectReference(), reader, false) as PdfRawStream; if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var contents = contentStream.Decode(filterProvider); var txt = OtherEncodings.BytesAsLatin1String(contents); var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents)); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit); content = context.Process(operations); } var page = new Page(number, mediaBox, cropBox, content); return(page); }
public void LoadResources(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) { var resources = dictionary.GetItemOrDefault(CosName.RESOURCES); if (resources is PdfDictionary resource) { resourceStore.LoadResourceDictionary(resource, reader, isLenientParsing); return; } if (resources is CosObject resourceObject) { var resourceDictionary = pdfObjectParser.Parse(resourceObject.ToIndirectReference(), reader, isLenientParsing); if (resourceDictionary is PdfDictionary resolvedDictionary) { resourceStore.LoadResourceDictionary(resolvedDictionary, reader, isLenientParsing); } } }
public CosBase Get(CosName name) { return(catalogDictionary.GetItemOrDefault(name)); }