public IFont Get(DictionaryToken dictionary, bool isLenientParsing) { var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Font)) { var message = "The font dictionary did not have type 'Font'. " + dictionary; if (isLenientParsing) { log?.Error(message); } else { throw new InvalidFontFormatException(message); } } var subtype = dictionary.GetNameOrDefault(NameToken.Subtype); if (handlers.TryGetValue(subtype, out var handler)) { return(handler.Generate(dictionary, isLenientParsing)); } throw new NotImplementedException($"Parsing not implemented for fonts of type: {subtype}, please submit a pull request or an issue."); }
public ICidFont Generate(DictionaryToken dictionary, bool isLenientParsing) { var type = dictionary.GetNameOrDefault(NameToken.Type); if (!NameToken.Font.Equals(type)) { throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type}\'"); } var widths = ReadWidths(dictionary); var defaultWidth = default(double?); if (dictionary.TryGet(NameToken.Dw, pdfScanner, out NumericToken defaultWidthToken)) { defaultWidth = defaultWidthToken.Double; } var verticalWritingMetrics = ReadVerticalDisplacements(dictionary); FontDescriptor descriptor = null; if (TryGetFontDescriptor(dictionary, out var descriptorDictionary)) { descriptor = descriptorFactory.Generate(descriptorDictionary, pdfScanner, isLenientParsing); } var fontProgram = ReadDescriptorFile(descriptor); var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont); var systemInfo = GetSystemInfo(dictionary); var subType = dictionary.GetNameOrDefault(NameToken.Subtype); if (NameToken.CidFontType0.Equals(subType)) { return(new Type0CidFont(fontProgram, type, subType, baseFont, systemInfo, descriptor, verticalWritingMetrics, widths, defaultWidth)); } if (NameToken.CidFontType2.Equals(subType)) { var cidToGid = GetCharacterIdentifierToGlyphIndexMap(dictionary, isLenientParsing); return(new Type2CidFont(type, subType, baseFont, systemInfo, descriptor, fontProgram, verticalWritingMetrics, widths, defaultWidth, cidToGid)); } return(null); }
private static CMap GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont) { if (!isCMapPredefined) { return(null); } /* * If the font is a composite font that uses one of the predefined CMaps except Identity–H and Identity–V or whose descendant * CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or Adobe-Korea1 character collection use a UCS2 CMap. */ var encodingName = dictionary.GetNameOrDefault(NameToken.Encoding); if (encodingName == null) { return(null); } var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV); if (isPredefinedIdentityMap && !usesDescendantAdobeFont) { return(null); } throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue."); }
public IFont Generate(DictionaryToken dictionary, bool isLenientParsing) { var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont); var cMap = ReadEncoding(dictionary, out var isCMapPredefined); ICidFont cidFont; if (TryGetFirstDescendant(dictionary, out var descendantObject)) { DictionaryToken descendantFontDictionary; if (descendantObject is IndirectReferenceToken obj) { var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, scanner); descendantFontDictionary = parsed; } else { descendantFontDictionary = (DictionaryToken) descendantObject; } cidFont = ParseDescendant(descendantFontDictionary, isLenientParsing); } else { throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary); } var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont); CMap toUnicodeCMap = null; if (dictionary.ContainsKey(NameToken.ToUnicode)) { var toUnicodeValue = dictionary.Data[NameToken.ToUnicode]; if (DirectObjectFinder.TryGet<StreamToken>(toUnicodeValue, scanner, out var toUnicodeStream)) { var decodedUnicodeCMap = toUnicodeStream?.Decode(filterProvider); if (decodedUnicodeCMap != null) { toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } else if (DirectObjectFinder.TryGet<NameToken>(toUnicodeValue, scanner, out var toUnicodeName)) { toUnicodeCMap = CMapCache.Get(toUnicodeName.Data); } else { throw new PdfDocumentFormatException($"Invalid type of toUnicode CMap encountered. Got: {toUnicodeValue}."); } } var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean); return font; }
public bool FindPage(DictionaryToken currentPageDictionary, int soughtPageNumber, List <int> pageNumbersObserved, PageTreeMembers pageTreeMembers) { var type = currentPageDictionary.GetNameOrDefault(NameToken.Type); if (type?.Equals(NameToken.Page) == true) { var pageNumber = GetNextPageNumber(pageNumbersObserved); bool found = pageNumber == soughtPageNumber; locatedPages[pageNumber] = currentPageDictionary; pageNumbersObserved.Add(pageNumber); return(found); } if (type?.Equals(NameToken.Pages) != true) { log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary); return(false); } if (currentPageDictionary.TryGet(NameToken.MediaBox, out var token)) { var mediaBox = DirectObjectFinder.Get <ArrayToken>(token, pdfScanner); pageTreeMembers.MediaBox = new MediaBox(new PdfRectangle(mediaBox.GetNumeric(0).Data, mediaBox.GetNumeric(1).Data, mediaBox.GetNumeric(2).Data, mediaBox.GetNumeric(3).Data)); } if (!currentPageDictionary.TryGet(NameToken.Kids, out var kids) || !(kids is ArrayToken kidsArray)) { return(false); } pageFactory.LoadResources(currentPageDictionary, isLenientParsing); bool childFound = false; foreach (var kid in kidsArray.Data) { // todo: exit early var child = DirectObjectFinder.Get <DictionaryToken>(kid, pdfScanner); var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved, pageTreeMembers); if (thisPageMatches) { childFound = true; break; } } return(childFound); }
private static (CMap, bool isChineseJapaneseOrKorean) GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, ICidFont cidFont) { if (!isCMapPredefined) { return(null, false); } /* * If the font is a composite font that uses one of the predefined CMaps except Identity–H and Identity–V or whose descendant * CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or Adobe-Korea1 character collection use a UCS2 CMap. */ var encodingName = dictionary.GetNameOrDefault(NameToken.Encoding); if (encodingName == null) { return(null, false); } var isChineseJapaneseOrKorean = false; if (cidFont != null && string.Equals(cidFont.SystemInfo.Registry, "Adobe", StringComparison.OrdinalIgnoreCase)) { isChineseJapaneseOrKorean = string.Equals(cidFont.SystemInfo.Ordering, "GB1", StringComparison.OrdinalIgnoreCase) || string.Equals(cidFont.SystemInfo.Ordering, "CNS1", StringComparison.OrdinalIgnoreCase) || string.Equals(cidFont.SystemInfo.Ordering, "Japan1", StringComparison.OrdinalIgnoreCase) || string.Equals(cidFont.SystemInfo.Ordering, "Korea1", StringComparison.OrdinalIgnoreCase); } var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV); if (isPredefinedIdentityMap && !isChineseJapaneseOrKorean) { return(null, false); } if (!isChineseJapaneseOrKorean) { return(null, false); } var fullCmapName = cidFont.SystemInfo.ToString(); var nonUnicodeCMap = CMapCache.Get(fullCmapName); if (nonUnicodeCMap == null) { return(null, true); } var unicodeCMapName = $"{nonUnicodeCMap.Info.Registry}-{nonUnicodeCMap.Info.Ordering}-UCS2"; return(CMapCache.Get(unicodeCMapName), true); }
private ICidFont ParseDescendant(DictionaryToken dictionary, bool isLenientParsing) { var type = dictionary.GetNameOrDefault(NameToken.Type); if (type?.Equals(NameToken.Font) != true) { throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type}\'"); } var result = cidFontFactory.Generate(dictionary, isLenientParsing); return result; }
public ICidFont Generate(DictionaryToken dictionary, bool isLenientParsing) { var type = dictionary.GetNameOrDefault(NameToken.Type); if (!NameToken.Font.Equals(type)) { throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type}\'"); } var widths = ReadWidths(dictionary); var verticalWritingMetrics = ReadVerticalDisplacements(dictionary); FontDescriptor descriptor = null; if (TryGetFontDescriptor(dictionary, out var descriptorDictionary)) { descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing); } var fontProgram = ReadDescriptorFile(descriptor); var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont); var systemInfo = GetSystemInfo(dictionary); var subType = dictionary.GetNameOrDefault(NameToken.Subtype); if (NameToken.CidFontType0.Equals(subType)) { //return new PDCIDFontType0(dictionary, parent); } if (NameToken.CidFontType2.Equals(subType)) { return(new Type2CidFont(type, subType, baseFont, systemInfo, descriptor, fontProgram, verticalWritingMetrics, widths)); } return(null); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'."); } var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) { rotation = new PageRotationDegrees(rotateToken.Int); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); var stackDepth = 0; while (pageTreeMembers.ParentResources.Count > 0) { var resource = pageTreeMembers.ParentResources.Dequeue(); resourceStore.LoadResourceDictionary(resource, isLenientParsing); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { resourceStore.LoadResourceDictionary(resources, isLenientParsing); stackDepth++; } UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); PageContent content = default(PageContent); if (!dictionary.TryGet(NameToken.Contents, out var contents)) { // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); for (var i = 0; i < array.Data.Count; i++) { var item = array.Data[i]; if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider)); if (i < array.Data.Count - 1) { bytes.Add((byte)'\n'); } } content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary, isLenientParsing), pdfScanner); for (var i = 0; i < stackDepth; i++) { resourceStore.UnloadResourceDictionary(); } return(page); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool clipPaths) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page)) { log?.Error($"Page {number} had its type specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) { rotation = new PageRotationDegrees(rotateToken.Int); } var stackDepth = 0; while (pageTreeMembers.ParentResources.Count > 0) { var resource = pageTreeMembers.ParentResources.Dequeue(); resourceStore.LoadResourceDictionary(resource); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { resourceStore.LoadResourceDictionary(resources); stackDepth++; } // Apply rotation. if (rotation.SwapsAxis) { mediaBox = new MediaBox(new PdfRectangle(mediaBox.Bounds.Bottom, mediaBox.Bounds.Left, mediaBox.Bounds.Top, mediaBox.Bounds.Right)); cropBox = new CropBox(new PdfRectangle(cropBox.Bounds.Bottom, cropBox.Bounds.Left, cropBox.Bounds.Top, cropBox.Bounds.Right)); } UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); PageContent content; if (!dictionary.TryGet(NameToken.Contents, out var contents)) { content = new PageContent(EmptyArray <IGraphicsStateOperation> .Instance, EmptyArray <Letter> .Instance, EmptyArray <PdfPath> .Instance, EmptyArray <Union <XObjectContentRecord, InlineImage> > .Instance, EmptyArray <MarkedContentElement> .Instance, pdfScanner, filterProvider, resourceStore); // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); for (var i = 0; i < array.Data.Count; i++) { var item = array.Data[i]; if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner)); if (i < array.Data.Count - 1) { bytes.Add((byte)'\n'); } } content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider, pdfScanner); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); } var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary), pdfScanner); for (var i = 0; i < stackDepth; i++) { resourceStore.UnloadResourceDictionary(); } return(page); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); LoadResources(dictionary, isLenientParsing); PageContent content = default(PageContent); if (!dictionary.TryGet(NameToken.Contents, out var contents)) { // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); foreach (var item in array.Data) { if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider)); } content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider); content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing); } var page = new Page(number, mediaBox, cropBox, content); return(page); }