public IFont Get(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) { var type = dictionary.GetName(CosName.TYPE); if (!type.Equals(CosName.FONT)) { var message = "The font dictionary did not have type 'Font'. " + dictionary; if (isLenientParsing) { log?.Error(message); } else { throw new InvalidFontFormatException(message); } } var subtype = dictionary.GetName(CosName.SUBTYPE); if (handlers.TryGetValue(subtype, out var handler)) { return(handler.Generate(dictionary, reader, isLenientParsing)); } throw new NotImplementedException($"Parsing not implemented for fonts of type: {subtype}, please submit a pull request or an issue."); }
private static CMap GetUcs2CMap(PdfDictionary dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont) { if (!isCMapPredefined) { return(null); } /* * If the font is a composite font that uses one of the predefined CMaps except Identity–H and Identity–V or whose descendant * CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or Adobe-Korea1 character collection use a UCS2 CMap. */ var encodingName = dictionary.GetName(CosName.ENCODING); if (encodingName == null) { return(null); } var isPredefinedIdentityMap = encodingName.Equals(CosName.IDENTITY_H) || encodingName.Equals(CosName.IDENTITY_V); if (isPredefinedIdentityMap && !usesDescendantAdobeFont) { return(null); } throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue."); }
public ICidFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) { var type = dictionary.GetName(CosName.TYPE); if (!CosName.FONT.Equals(type)) { throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type.Name}\'"); } var widths = ReadWidths(dictionary); var verticalWritingMetrics = ReadVerticalDisplacements(dictionary); FontDescriptor descriptor = null; if (TryGetFontDescriptor(dictionary, reader, out var descriptorDictionary)) { descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing); } var fontProgram = ReadDescriptorFile(descriptor, reader, isLenientParsing); var baseFont = dictionary.GetName(CosName.BASE_FONT); var systemInfo = GetSystemInfo(dictionary); var subType = dictionary.GetName(CosName.SUBTYPE); if (CosName.CID_FONT_TYPE0.Equals(subType)) { //return new PDCIDFontType0(dictionary, parent); } if (CosName.CID_FONT_TYPE2.Equals(subType)) { return(new Type2CidFont(type, subType, baseFont, systemInfo, descriptor, fontProgram, verticalWritingMetrics, widths)); } return(null); }
private ICidFont ParseDescendant(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) { var type = dictionary.GetName(CosName.TYPE); if (!CosName.FONT.Equals(type)) { throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type.Name}\'"); } var result = cidFontFactory.Generate(dictionary, reader, isLenientParsing); return(result); }
public Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetName(CosName.TYPE); if (type != null && !type.Equals(CosName.PAGE) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); LoadResources(dictionary, reader, isLenientParsing); PageContent content = default(PageContent); var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject; if (contentObject != null) { var contentStream = pdfObjectParser.Parse(contentObject.ToIndirectReference(), reader, false) as PdfRawStream; if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var contents = contentStream.Decode(filterProvider); var txt = OtherEncodings.BytesAsLatin1String(contents); var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents)); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit); content = context.Process(operations); } var page = new Page(number, mediaBox, cropBox, content); return(page); }
public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) { var baseFont = dictionary.GetName(CosName.BASE_FONT); var cMap = ReadEncoding(dictionary, out var isCMapPredefined); ICidFont cidFont; if (TryGetFirstDescendant(dictionary, out var descendantObject)) { var parsed = DirectObjectFinder.Find <PdfDictionary>(descendantObject, pdfObjectParser, reader, isLenientParsing); if (parsed is PdfDictionary descendantFontDictionary) { cidFont = ParseDescendant(descendantFontDictionary, reader, isLenientParsing); } else { throw new InvalidFontFormatException("Expected to find a Descendant Font dictionary, instead it was: " + parsed); } } else { throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary); } var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false); CMap toUnicodeCMap = null; if (dictionary.ContainsKey(CosName.TO_UNICODE)) { var toUnicodeValue = dictionary[CosName.TO_UNICODE]; var toUnicode = pdfObjectParser.Parse(((CosObject)toUnicodeValue).ToIndirectReference(), reader, isLenientParsing) as PdfRawStream; var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); if (decodedUnicodeCMap != null) { toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap); return(font); }
public bool FindPage(PdfDictionary currentPageDictionary, int soughtPageNumber, List <int> pageNumbersObserved) { var type = currentPageDictionary.GetName(CosName.TYPE); if (type.Equals(CosName.PAGE)) { var pageNumber = GetNextPageNumber(pageNumbersObserved); bool found = pageNumber == soughtPageNumber; locatedPages[pageNumber] = currentPageDictionary; pageNumbersObserved.Add(pageNumber); return(found); } if (!type.Equals(CosName.PAGES)) { log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary); return(false); } var kids = currentPageDictionary.GetDictionaryObject(CosName.KIDS) as COSArray; pageFactory.LoadResources(currentPageDictionary, reader, isLenientParsing); bool childFound = false; foreach (var kid in kids.OfType <CosObject>()) { // todo: exit early var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary; var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved); if (thisPageMatches) { childFound = true; break; } } return(childFound); }
public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing, IPdfObjectParser parser) { PdfRawStream result; // read 'stream'; this was already tested in parseObjectsDynamically() ReadHelper.ReadExpectedString(reader, "stream"); skipWhiteSpaces(reader); // This needs to be streamDictionary.getItem because when we are parsing, the underlying object might still be null. ICosNumber streamLength = GetLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE), isLenientParsing, parser); ValidateStreamLength(reader, isLenientParsing, streamLength); // get output stream to copy data to using (var stream = new MemoryStream()) using (var writer = new BinaryWriter(stream)) { if (streamLength != null && validateStreamLength(reader, streamLength.AsLong(), reader.Length())) { ReadValidStream(reader, writer, streamLength); } else { ReadUntilEndStream(reader, writer); } result = new PdfRawStream(stream.ToArray(), streamDictionary); } String endStream = ReadHelper.ReadString(reader); if (endStream.Equals("endobj") && isLenientParsing) { log.Warn($"stream ends with \'endobj\' instead of \'endstream\' at offset {reader.GetPosition()}"); // avoid follow-up warning about missing endobj reader.Rewind("endobj".Length); } else if (endStream.Length > 9 && isLenientParsing && endStream.Substring(0, 9).Equals("endstream")) { log.Warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " + reader.GetPosition()); // unread the "extra" bytes reader.Rewind(OtherEncodings.StringAsLatin1Bytes(endStream.Substring(9)).Length); } else if (!endStream.Equals("endstream")) { throw new InvalidOperationException("Error reading stream, expected='endstream' actual='" + endStream + "' at offset " + reader.GetPosition()); } return(result); }