internal static CMapToUnicode GetToUnicodeFromUniMap(String uniMap) { if (uniMap == null) { return(null); } lock (uniMaps) { if (uniMaps.Contains(uniMap)) { return(uniMaps.Get(uniMap)); } CMapToUnicode toUnicode; if (PdfEncodings.IDENTITY_H.Equals(uniMap)) { toUnicode = CMapToUnicode.GetIdentity(); } else { CMapUniCid uni = FontCache.GetUni2CidCmap(uniMap); if (uni == null) { return(null); } toUnicode = uni.ExportToUnicode(); } uniMaps[uniMap] = toUnicode; return(toUnicode); } }
public static string GetTJContent(string rawContent, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode) { string content; string rawArray = rawContent.Remove(rawContent.Length - 2).Trim(); if (string.IsNullOrWhiteSpace(rawArray)) { return(null); } PdfArrayDataType pdfArrayDataType = PdfArrayDataType.Parse(rawArray); content = string.Empty; foreach (string item in pdfArrayDataType.Elements.Where(_ => _ is string)) { string escapedContent; escapedContent = item.Trim(); content += PdfHexStringDataType.IsStartChar(escapedContent) ? PdfFontHelper.ToUnicode(PdfHexStringDataType.GetHexContent(escapedContent), cMapToUnicode, encodingDifferenceToUnicode).ToString() : PdfFontHelper.ToUnicode(PdfStringDataType.GetContentFromEscapedContent(escapedContent), cMapToUnicode, encodingDifferenceToUnicode); } if (content.Contains("Media")) { Console.WriteLine(); } return(content); }
internal static TrueTypeFont CreateFontProgram(PdfDictionary fontDictionary, CMapToUnicode toUnicode) { iText.Kernel.Font.DocTrueTypeFont fontProgram = new iText.Kernel.Font.DocTrueTypeFont(fontDictionary); PdfDictionary fontDescriptor = fontDictionary.GetAsDictionary(PdfName.FontDescriptor); FillFontDescriptor(fontProgram, fontDescriptor); int dw = (fontDescriptor != null && fontDescriptor.ContainsKey(PdfName.DW)) ? (int)fontDescriptor.GetAsInt (PdfName.DW) : 1000; if (toUnicode != null) { IntHashtable widths = FontUtil.ConvertCompositeWidthsArray(fontDictionary.GetAsArray(PdfName.W)); fontProgram.avgWidth = 0; foreach (int cid in toUnicode.GetCodes()) { int width = widths.ContainsKey(cid) ? widths.Get(cid) : dw; Glyph glyph = new Glyph(cid, width, toUnicode.Lookup(cid)); if (glyph.HasValidUnicode()) { fontProgram.unicodeToGlyph[glyph.GetUnicode()] = glyph; } fontProgram.codeToGlyph[cid] = glyph; fontProgram.avgWidth += width; } if (fontProgram.codeToGlyph.Count != 0) { fontProgram.avgWidth /= fontProgram.codeToGlyph.Count; } } if (fontProgram.codeToGlyph.Get(0) == null) { fontProgram.codeToGlyph[0] = new Glyph(0, dw, -1); } return(fontProgram); }
public static FontEncoding CreateDocFontEncoding(PdfObject encoding, CMapToUnicode toUnicode) { if (encoding != null) { if (encoding.IsName()) { return(FontEncoding.CreateFontEncoding(((PdfName)encoding).GetValue())); } else { if (encoding.IsDictionary()) { iText.Kernel.Font.DocFontEncoding fontEncoding = new iText.Kernel.Font.DocFontEncoding(); fontEncoding.differences = new String[256]; FillBaseEncoding(fontEncoding, ((PdfDictionary)encoding).GetAsName(PdfName.BaseEncoding)); FillDifferences(fontEncoding, ((PdfDictionary)encoding).GetAsArray(PdfName.Differences), toUnicode); return(fontEncoding); } } } if (toUnicode != null) { iText.Kernel.Font.DocFontEncoding fontEncoding = new iText.Kernel.Font.DocFontEncoding(); fontEncoding.differences = new String[256]; FillDifferences(fontEncoding, toUnicode); return(fontEncoding); } else { return(FontEncoding.CreateFontSpecificEncoding()); } }
internal static CMapToUnicode ProcessToUnicode(PdfObject toUnicode) { CMapToUnicode cMapToUnicode = null; if (toUnicode is PdfStream) { try { byte[] uniBytes = ((PdfStream)toUnicode).GetBytes(); ICMapLocation lb = new CMapLocationFromBytes(uniBytes); cMapToUnicode = new CMapToUnicode(); CMapParser.ParseCid("", cMapToUnicode, lb); } catch (Exception) { ILogger logger = LoggerFactory.GetLogger(typeof(CMapToUnicode)); logger.Error(LogMessageConstant.UNKNOWN_ERROR_WHILE_PROCESSING_CMAP); cMapToUnicode = CMapToUnicode.EmptyCMapToUnicodeMap; } } else { if (PdfName.IdentityH.Equals(toUnicode)) { cMapToUnicode = CMapToUnicode.GetIdentity(); } } return(cMapToUnicode); }
virtual public void TestCMapWithDefDictionaryKey() { byte[] touni = TestResourceUtils.GetResourceAsByteArray(TEST_RESOURCES_PATH, "cmap_with_def_dictionary_key.txt"); CidLocationFromByte lb = new CidLocationFromByte(touni); CMapToUnicode cmapRet = new CMapToUnicode(); CMapParserEx.ParseCid("", cmapRet, lb); }
/** * Parses the ToUnicode entry, if present, and constructs a CMap for it * @since 2.1.7 */ private void ProcessToUnicode() { PdfObject toUni = PdfReader.GetPdfObjectRelease(fontDic.Get(PdfName.TOUNICODE)); if (toUni is PRStream) { try { byte[] touni = PdfReader.GetStreamBytes((PRStream)toUni); CidLocationFromByte lb = new CidLocationFromByte(touni); toUnicodeCmap = new CMapToUnicode(); CMapParserEx.ParseCid("", toUnicodeCmap, lb); uni2cid = toUnicodeCmap.CreateReverseMapping(); } catch { toUnicodeCmap = null; uni2cid = null; // technically, we should log this or provide some sort of feedback... but sometimes the cmap will be junk, but it's still possible to get text, so we don't want to throw an exception //throw new IllegalStateException("Unable to process ToUnicode map - " + e.GetMessage(), e); } } else if (isType0) { // fake a ToUnicode for CJK Identity-H fonts try { PdfName encodingName = fontDic.GetAsName(PdfName.ENCODING); if (encodingName == null) { return; } String enc = PdfName.DecodeName(encodingName.ToString()); if (!enc.Equals("Identity-H")) { return; } PdfArray df = (PdfArray)PdfReader.GetPdfObjectRelease(fontDic.Get(PdfName.DESCENDANTFONTS)); PdfDictionary cidft = (PdfDictionary)PdfReader.GetPdfObjectRelease(df[0]); PdfDictionary cidinfo = cidft.GetAsDict(PdfName.CIDSYSTEMINFO); if (cidinfo == null) { return; } PdfString ordering = cidinfo.GetAsString(PdfName.ORDERING); if (ordering == null) { return; } CMapToUnicode touni = IdentityToUnicode.GetMapFromOrdering(ordering.ToUnicodeString()); if (touni == null) { return; } toUnicodeCmap = touni; uni2cid = toUnicodeCmap.CreateReverseMapping(); } catch (IOException e) { toUnicodeCmap = null; uni2cid = null; } } }
private void CheckInsertAndRetrieval(byte[] bytes, string uni) { CMapToUnicode c = new CMapToUnicode(); c.AddChar(new PdfString(bytes), new PdfString(uni, "UTF-16BE")); string lookupResult = c.Lookup(bytes, 0, bytes.Length); Assert.AreEqual(uni, lookupResult); }
internal PdfTrueTypeFont(PdfDictionary fontDictionary) : base(fontDictionary) { newFont = false; CMapToUnicode toUni = FontUtil.ProcessToUnicode(fontDictionary.Get(PdfName.ToUnicode)); fontEncoding = DocFontEncoding.CreateDocFontEncoding(fontDictionary.Get(PdfName.Encoding), toUni); fontProgram = DocTrueTypeFont.CreateFontProgram(fontDictionary, fontEncoding, toUni); embedded = ((IDocFontProgram)fontProgram).GetFontFile() != null; subset = false; }
public static char ToUnicode(int character, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode) { if (cMapToUnicode != null) { return(cMapToUnicode.ConvertToUnicodeChar(character)); } else if (encodingDifferenceToUnicode != null) { return(encodingDifferenceToUnicode.ConvertToUnicodeChar(character)); } else { return(Convert.ToChar(character)); } }
public static string ToUnicode(string content, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode) { if (cMapToUnicode != null) { return(cMapToUnicode.ConvertToString(content)); } else if (encodingDifferenceToUnicode != null) { return(encodingDifferenceToUnicode.ConvertToString(content)); } else { return(content); } }
/// <summary>Creates a Type 3 font based on an existing font dictionary, which must be an indirect object.</summary> /// <param name="fontDictionary">a dictionary of type <code>/Font</code>, must have an indirect reference.</param> internal PdfType3Font(PdfDictionary fontDictionary) : base(fontDictionary) { subset = true; embedded = true; fontProgram = new Type3Font(false); CMapToUnicode toUni = FontUtil.ProcessToUnicode(fontDictionary.Get(PdfName.ToUnicode)); fontEncoding = DocFontEncoding.CreateDocFontEncoding(fontDictionary.Get(PdfName.Encoding), toUni); PdfDictionary charProcsDic = GetPdfObject().GetAsDictionary(PdfName.CharProcs); PdfArray fontMatrixArray = GetPdfObject().GetAsArray(PdfName.FontMatrix); if (GetPdfObject().ContainsKey(PdfName.FontBBox)) { PdfArray fontBBox = GetPdfObject().GetAsArray(PdfName.FontBBox); fontProgram.GetFontMetrics().SetBbox(fontBBox.GetAsNumber(0).IntValue(), fontBBox.GetAsNumber(1).IntValue( ), fontBBox.GetAsNumber(2).IntValue(), fontBBox.GetAsNumber(3).IntValue()); } else { fontProgram.GetFontMetrics().SetBbox(0, 0, 0, 0); } int firstChar = NormalizeFirstLastChar(fontDictionary.GetAsNumber(PdfName.FirstChar), 0); int lastChar = NormalizeFirstLastChar(fontDictionary.GetAsNumber(PdfName.LastChar), 255); for (int i = firstChar; i <= lastChar; i++) { shortTag[i] = 1; } int[] widths = FontUtil.ConvertSimpleWidthsArray(fontDictionary.GetAsArray(PdfName.Widths), firstChar, 0); double[] fontMatrix = new double[6]; for (int i = 0; i < fontMatrixArray.Size(); i++) { fontMatrix[i] = ((PdfNumber)fontMatrixArray.Get(i)).GetValue(); } SetFontMatrix(fontMatrix); foreach (PdfName glyphName in charProcsDic.KeySet()) { int unicode = AdobeGlyphList.NameToUnicode(glyphName.GetValue()); if (unicode != -1 && fontEncoding.CanEncode(unicode)) { int code = fontEncoding.ConvertToByte(unicode); ((Type3Font)GetFontProgram()).AddGlyph(code, unicode, widths[code], null, new Type3Glyph(charProcsDic.GetAsStream (glyphName), GetDocument())); } } FillFontDescriptor(fontDictionary.GetAsDictionary(PdfName.FontDescriptor)); }
private void FillDiffMap(PdfDictionary encDic, CMapToUnicode toUnicode) { PdfArray diffs = encDic.GetAsArray(PdfName.DIFFERENCES); if (diffs != null) { diffmap = new IntHashtable(); int currentNumber = 0; for (int k = 0; k < diffs.Size; ++k) { PdfObject obj = diffs[k]; if (obj.IsNumber()) { currentNumber = ((PdfNumber)obj).IntValue; } else { int[] c = GlyphList.NameToUnicode(PdfName.DecodeName(((PdfName)obj).ToString())); if (c != null && c.Length > 0) { uni2byte[c[0]] = currentNumber; byte2uni[currentNumber] = c[0]; diffmap[c[0]] = currentNumber; } else { if (toUnicode == null) { toUnicode = ProcessToUnicode(); if (toUnicode == null) { toUnicode = new CMapToUnicode(); } } string unicode = toUnicode.Lookup(new byte[] { (byte)currentNumber }, 0, 1); if ((unicode != null) && (unicode.Length == 1)) { this.uni2byte[unicode[0]] = currentNumber; this.byte2uni[currentNumber] = unicode[0]; this.diffmap[unicode[0]] = currentNumber; } } ++currentNumber; } } } }
internal PdfType1Font(PdfDictionary fontDictionary) : base(fontDictionary) { newFont = false; CMapToUnicode toUni = FontUtil.ProcessToUnicode(fontDictionary.Get(PdfName.ToUnicode)); // if there is no FontDescriptor, it is most likely one of the Standard Font with StandardEncoding as base encoding. // unused variable. // boolean fillStandardEncoding = !fontDictionary.containsKey(PdfName.FontDescriptor); fontEncoding = DocFontEncoding.CreateDocFontEncoding(fontDictionary.Get(PdfName.Encoding), toUni); fontProgram = DocType1Font.CreateFontProgram(fontDictionary, fontEncoding, toUni); if (fontProgram is IDocFontProgram) { embedded = ((IDocFontProgram)fontProgram).GetFontFile() != null; } subset = false; }
private CMapToUnicode ProcessToUnicode() { CMapToUnicode cmapRet = null; PdfObject toUni = PdfReader.GetPdfObjectRelease(this.font.Get(PdfName.TOUNICODE)); if (toUni is PRStream) { try { byte[] touni = PdfReader.GetStreamBytes((PRStream)toUni); CidLocationFromByte lb = new CidLocationFromByte(touni); cmapRet = new CMapToUnicode(); CMapParserEx.ParseCid("", cmapRet, lb); } catch { cmapRet = null; } } return(cmapRet); }
public static string ToUnicode(int[] content, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode) { if (cMapToUnicode != null) { return(cMapToUnicode.ConvertToString(content)); } else if (encodingDifferenceToUnicode != null) { return(encodingDifferenceToUnicode.ConvertToString(content)); } else { byte[] byteContent = new byte[content.Length * sizeof(int)]; Buffer.BlockCopy(content, 0, byteContent, 0, byteContent.Length); string stringContent = System.Text.Encoding.Unicode.GetString(byteContent); return(stringContent); } }
public static CMapToUnicode GetFontCMapToUnicode(PdfReader pdfReader, int pageNumber, string fontKey) { PdfDictionary resources = pdfReader.GetPageN(pageNumber).GetAsDict(PdfName.RESOURCES); var fontDict = FindFontDictionary(resources, fontKey); if (fontDict == null) { return(null); } PRStream toUnicodeIndirectReference = (PRStream)PdfReader.GetPdfObject(fontDict.Get(PdfName.TOUNICODE)); if (toUnicodeIndirectReference == null) { return(null); } string toUnicode = Encoding.UTF8.GetString(PdfReader.GetStreamBytes(toUnicodeIndirectReference)); return(CMapToUnicode.Parse(toUnicode)); }
/** * Parses the ToUnicode entry, if present, and constructs a CMap for it * @since 2.1.7 */ private void ProcessToUnicode() { PdfObject toUni = PdfReader.GetPdfObjectRelease(fontDic.Get(PdfName.TOUNICODE)); if (toUni is PRStream) { try { byte[] touni = PdfReader.GetStreamBytes((PRStream)toUni); CidLocationFromByte lb = new CidLocationFromByte(touni); toUnicodeCmap = new CMapToUnicode(); CMapParserEx.ParseCid("", toUnicodeCmap, lb); uni2cid = toUnicodeCmap.CreateReverseMapping(); } catch { toUnicodeCmap = null; uni2cid = null; // technically, we should log this or provide some sort of feedback... but sometimes the cmap will be junk, but it's still possible to get text, so we don't want to throw an exception //throw new IllegalStateException("Unable to process ToUnicode map - " + e.GetMessage(), e); } } }
internal PdfType0Font(PdfDictionary fontDictionary) : base(fontDictionary) { CheckFontDictionary(fontDictionary, PdfName.Type0); newFont = false; PdfDictionary cidFont = fontDictionary.GetAsArray(PdfName.DescendantFonts).GetAsDictionary(0); String cmap = fontDictionary.GetAsName(PdfName.Encoding).GetValue(); if (PdfEncodings.IDENTITY_H.Equals(cmap) || PdfEncodings.IDENTITY_V.Equals(cmap)) { PdfObject toUnicode = fontDictionary.Get(PdfName.ToUnicode); CMapToUnicode toUnicodeCMap = FontUtil.ProcessToUnicode(toUnicode); if (toUnicodeCMap == null) { String uniMap = GetUniMapFromOrdering(GetOrdering(cidFont)); toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(uniMap); if (toUnicodeCMap == null) { toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(PdfEncodings.IDENTITY_H); ILogger logger = LoggerFactory.GetLogger(typeof(iText.Kernel.Font.PdfType0Font)); logger.Error(String.Format(LogMessageConstant.UNKNOWN_CMAP, uniMap)); } } fontProgram = DocTrueTypeFont.CreateFontProgram(cidFont, toUnicodeCMap); cmapEncoding = new CMapEncoding(cmap); System.Diagnostics.Debug.Assert(fontProgram is IDocFontProgram); embedded = ((IDocFontProgram)fontProgram).GetFontFile() != null; cidFontType = CID_FONT_TYPE_2; } else { String cidFontName = cidFont.GetAsName(PdfName.BaseFont).GetValue(); String uniMap = GetUniMapFromOrdering(GetOrdering(cidFont)); if (uniMap != null && uniMap.StartsWith("Uni") && CidFontProperties.IsCidFont(cidFontName, uniMap)) { try { fontProgram = FontProgramFactory.CreateFont(cidFontName); cmapEncoding = new CMapEncoding(cmap, uniMap); embedded = false; } catch (System.IO.IOException) { fontProgram = null; cmapEncoding = null; } } else { CMapToUnicode toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(uniMap); if (toUnicodeCMap != null) { fontProgram = DocTrueTypeFont.CreateFontProgram(cidFont, toUnicodeCMap); cmapEncoding = new CMapEncoding(cmap, uniMap); } } if (fontProgram == null) { throw new PdfException(String.Format("Cannot recognise document font {0} with {1} encoding", cidFontName, cmap)); } cidFontType = CID_FONT_TYPE_0; } longTag = new LinkedDictionary <int, int[]>(); subset = false; }
private void DoType1TT() { CMapToUnicode toUnicode = null; PdfObject enc = PdfReader.GetPdfObject(font.Get(PdfName.ENCODING)); if (enc == null) { PdfName baseFont = font.GetAsName(PdfName.BASEFONT); if (BuiltinFonts14.ContainsKey(fontName) && (PdfName.SYMBOL.Equals(baseFont) || PdfName.ZAPFDINGBATS.Equals(baseFont))) { FillEncoding(baseFont); } else { FillEncoding(null); } toUnicode = ProcessToUnicode(); if (toUnicode != null) { IDictionary <int, int> rm = toUnicode.CreateReverseMapping(); foreach (KeyValuePair <int, int> kv in rm) { uni2byte[kv.Key] = kv.Value; byte2uni[kv.Value] = kv.Key; } } } else { if (enc.IsName()) { FillEncoding((PdfName)enc); } else if (enc.IsDictionary()) { PdfDictionary encDic = (PdfDictionary)enc; enc = PdfReader.GetPdfObject(encDic.Get(PdfName.BASEENCODING)); if (enc == null) { FillEncoding(null); } else { FillEncoding((PdfName)enc); } PdfArray diffs = encDic.GetAsArray(PdfName.DIFFERENCES); if (diffs != null) { diffmap = new IntHashtable(); int currentNumber = 0; for (int k = 0; k < diffs.Size; ++k) { PdfObject obj = diffs[k]; if (obj.IsNumber()) { currentNumber = ((PdfNumber)obj).IntValue; } else { int[] c = GlyphList.NameToUnicode(PdfName.DecodeName(((PdfName)obj).ToString())); if (c != null && c.Length > 0) { uni2byte[c[0]] = currentNumber; byte2uni[currentNumber] = c[0]; diffmap[c[0]] = currentNumber; } else { if (toUnicode == null) { toUnicode = ProcessToUnicode(); if (toUnicode == null) { toUnicode = new CMapToUnicode(); } } string unicode = toUnicode.Lookup(new byte[] { (byte)currentNumber }, 0, 1); if ((unicode != null) && (unicode.Length == 1)) { this.uni2byte[unicode[0]] = currentNumber; this.byte2uni[currentNumber] = unicode[0]; this.diffmap[unicode[0]] = currentNumber; } } ++currentNumber; } } } } } PdfArray newWidths = font.GetAsArray(PdfName.WIDTHS); PdfNumber first = font.GetAsNumber(PdfName.FIRSTCHAR); PdfNumber last = font.GetAsNumber(PdfName.LASTCHAR); if (BuiltinFonts14.ContainsKey(fontName)) { BaseFont bf = BaseFont.CreateFont(fontName, WINANSI, false); int[] e = uni2byte.ToOrderedKeys(); for (int k = 0; k < e.Length; ++k) { int n = uni2byte[e[k]]; widths[n] = bf.GetRawWidth(n, GlyphList.UnicodeToName(e[k])); } if (diffmap != null) //widths for differences must override existing ones { e = diffmap.ToOrderedKeys(); for (int k = 0; k < e.Length; ++k) { int n = diffmap[e[k]]; widths[n] = bf.GetRawWidth(n, GlyphList.UnicodeToName(e[k])); } diffmap = null; } Ascender = bf.GetFontDescriptor(ASCENT, 1000); CapHeight = bf.GetFontDescriptor(CAPHEIGHT, 1000); Descender = bf.GetFontDescriptor(DESCENT, 1000); ItalicAngle = bf.GetFontDescriptor(ITALICANGLE, 1000); fontWeight = bf.GetFontDescriptor(FONT_WEIGHT, 1000); llx = bf.GetFontDescriptor(BBOXLLX, 1000); lly = bf.GetFontDescriptor(BBOXLLY, 1000); urx = bf.GetFontDescriptor(BBOXURX, 1000); ury = bf.GetFontDescriptor(BBOXURY, 1000); } if (first != null && last != null && newWidths != null) { int f = first.IntValue; int nSize = f + newWidths.Size; if (widths.Length < nSize) { int[] tmp = new int[nSize]; System.Array.Copy(widths, 0, tmp, 0, f); widths = tmp; } for (int k = 0; k < newWidths.Size; ++k) { widths[f + k] = newWidths.GetAsNumber(k).IntValue; } } FillFontDesc(font.GetAsDict(PdfName.FONTDESCRIPTOR)); }
internal PdfType0Font(PdfDictionary fontDictionary) : base(fontDictionary) { newFont = false; PdfDictionary cidFont = fontDictionary.GetAsArray(PdfName.DescendantFonts).GetAsDictionary(0); PdfObject cmap = fontDictionary.Get(PdfName.Encoding); PdfObject toUnicode = fontDictionary.Get(PdfName.ToUnicode); CMapToUnicode toUnicodeCMap = FontUtil.ProcessToUnicode(toUnicode); if (cmap.IsName() && (PdfEncodings.IDENTITY_H.Equals(((PdfName)cmap).GetValue()) || PdfEncodings.IDENTITY_V .Equals(((PdfName)cmap).GetValue()))) { if (toUnicodeCMap == null) { String uniMap = GetUniMapFromOrdering(GetOrdering(cidFont)); toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(uniMap); if (toUnicodeCMap == null) { toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(PdfEncodings.IDENTITY_H); ILog logger = LogManager.GetLogger(typeof(iText.Kernel.Font.PdfType0Font)); logger.Error(MessageFormatUtil.Format(iText.IO.LogMessageConstant.UNKNOWN_CMAP, uniMap)); } } fontProgram = DocTrueTypeFont.CreateFontProgram(cidFont, toUnicodeCMap); cmapEncoding = CreateCMap(cmap, null); System.Diagnostics.Debug.Assert(fontProgram is IDocFontProgram); embedded = ((IDocFontProgram)fontProgram).GetFontFile() != null; } else { String cidFontName = cidFont.GetAsName(PdfName.BaseFont).GetValue(); String uniMap = GetUniMapFromOrdering(GetOrdering(cidFont)); if (uniMap != null && uniMap.StartsWith("Uni") && CidFontProperties.IsCidFont(cidFontName, uniMap)) { try { fontProgram = FontProgramFactory.CreateFont(cidFontName); cmapEncoding = CreateCMap(cmap, uniMap); embedded = false; } catch (System.IO.IOException) { fontProgram = null; cmapEncoding = null; } } else { if (toUnicodeCMap == null) { toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(uniMap); } if (toUnicodeCMap != null) { fontProgram = DocTrueTypeFont.CreateFontProgram(cidFont, toUnicodeCMap); cmapEncoding = CreateCMap(cmap, uniMap); } } if (fontProgram == null) { throw new PdfException(MessageFormatUtil.Format(PdfException.CannotRecogniseDocumentFontWithEncoding, cidFontName , cmap)); } } // DescendantFonts is a one-element array specifying the CIDFont dictionary that is the descendant of this Type 0 font. PdfDictionary cidFontDictionary = fontDictionary.GetAsArray(PdfName.DescendantFonts).GetAsDictionary(0); // Required according to the spec PdfName subtype = cidFontDictionary.GetAsName(PdfName.Subtype); if (PdfName.CIDFontType0.Equals(subtype)) { cidFontType = CID_FONT_TYPE_0; } else { if (PdfName.CIDFontType2.Equals(subtype)) { cidFontType = CID_FONT_TYPE_2; } else { LogManager.GetLogger(GetType()).Error(iText.IO.LogMessageConstant.FAILED_TO_DETERMINE_CID_FONT_SUBTYPE); } } longTag = new SortedSet <int>(); subset = false; }
private void DoType1TT() { CMapToUnicode toUnicode = null; PdfObject enc = PdfReader.GetPdfObject(font.Get(PdfName.ENCODING)); if (enc == null) { PdfName baseFont = font.GetAsName(PdfName.BASEFONT); if (BuiltinFonts14.ContainsKey(fontName) && (PdfName.SYMBOL.Equals(baseFont) || PdfName.ZAPFDINGBATS.Equals(baseFont))) { FillEncoding(baseFont); } else { FillEncoding(null); } toUnicode = ProcessToUnicode(); if (toUnicode != null) { IDictionary <int, int> rm = toUnicode.CreateReverseMapping(); foreach (KeyValuePair <int, int> kv in rm) { uni2byte[kv.Key] = kv.Value; byte2uni[kv.Value] = kv.Key; } } } else { if (enc.IsName()) { FillEncoding((PdfName)enc); } else if (enc.IsDictionary()) { PdfDictionary encDic = (PdfDictionary)enc; enc = PdfReader.GetPdfObject(encDic.Get(PdfName.BASEENCODING)); if (enc == null) { FillEncoding(null); } else { FillEncoding((PdfName)enc); } FillDiffMap(encDic, toUnicode); } } if (BuiltinFonts14.ContainsKey(fontName)) { BaseFont bf = BaseFont.CreateFont(fontName, WINANSI, false); int[] e = uni2byte.ToOrderedKeys(); for (int k = 0; k < e.Length; ++k) { int n = uni2byte[e[k]]; widths[n] = bf.GetRawWidth(n, GlyphList.UnicodeToName(e[k])); } if (diffmap != null) { //widths for differences must override existing ones e = diffmap.ToOrderedKeys(); for (int k = 0; k < e.Length; ++k) { int n = diffmap[e[k]]; widths[n] = bf.GetRawWidth(n, GlyphList.UnicodeToName(e[k])); } diffmap = null; } Ascender = bf.GetFontDescriptor(ASCENT, 1000); CapHeight = bf.GetFontDescriptor(CAPHEIGHT, 1000); Descender = bf.GetFontDescriptor(DESCENT, 1000); ItalicAngle = bf.GetFontDescriptor(ITALICANGLE, 1000); fontWeight = bf.GetFontDescriptor(FONT_WEIGHT, 1000); llx = bf.GetFontDescriptor(BBOXLLX, 1000); lly = bf.GetFontDescriptor(BBOXLLY, 1000); urx = bf.GetFontDescriptor(BBOXURX, 1000); ury = bf.GetFontDescriptor(BBOXURY, 1000); } FillWidths(); FillFontDesc(font.GetAsDict(PdfName.FONTDESCRIPTOR)); }
private static void FillDifferences(iText.Kernel.Font.DocFontEncoding fontEncoding, PdfArray diffs, CMapToUnicode toUnicode) { IntHashtable byte2uni = toUnicode != null?toUnicode.CreateDirectMapping() : new IntHashtable(); if (diffs != null) { int currentNumber = 0; for (int k = 0; k < diffs.Size(); ++k) { PdfObject obj = diffs.Get(k); if (obj.IsNumber()) { currentNumber = ((PdfNumber)obj).IntValue(); } else { String glyphName = ((PdfName)obj).GetValue(); int unicode = (int)AdobeGlyphList.NameToUnicode(glyphName); if (unicode != -1) { fontEncoding.codeToUnicode[currentNumber] = (int)unicode; fontEncoding.unicodeToCode.Put((int)unicode, currentNumber); fontEncoding.differences[currentNumber] = glyphName; fontEncoding.unicodeDifferences.Put((int)unicode, (int)unicode); } else { if (byte2uni.Contains(currentNumber)) { unicode = byte2uni.Get(currentNumber); fontEncoding.codeToUnicode[currentNumber] = (int)unicode; fontEncoding.unicodeToCode.Put((int)unicode, currentNumber); fontEncoding.differences[currentNumber] = glyphName; fontEncoding.unicodeDifferences.Put((int)unicode, (int)unicode); } } currentNumber++; } } } }
internal static TrueTypeFont CreateFontProgram(PdfDictionary fontDictionary, FontEncoding fontEncoding, CMapToUnicode toUnicode) { iText.Kernel.Font.DocTrueTypeFont fontProgram = new iText.Kernel.Font.DocTrueTypeFont(fontDictionary); FillFontDescriptor(fontProgram, fontDictionary.GetAsDictionary(PdfName.FontDescriptor)); PdfNumber firstCharNumber = fontDictionary.GetAsNumber(PdfName.FirstChar); int firstChar = firstCharNumber != null?Math.Max(firstCharNumber.IntValue(), 0) : 0; int[] widths = FontUtil.ConvertSimpleWidthsArray(fontDictionary.GetAsArray(PdfName.Widths), firstChar, fontProgram .GetMissingWidth()); fontProgram.avgWidth = 0; int glyphsWithWidths = 0; for (int i = 0; i < 256; i++) { Glyph glyph = new Glyph(i, widths[i], fontEncoding.GetUnicode(i)); fontProgram.codeToGlyph.Put(i, glyph); //FontEncoding.codeToUnicode table has higher priority if (glyph.HasValidUnicode() && fontEncoding.ConvertToByte(glyph.GetUnicode()) == i) { fontProgram.unicodeToGlyph.Put(glyph.GetUnicode(), glyph); } else { if (toUnicode != null) { glyph.SetChars(toUnicode.Lookup(i)); } } if (widths[i] > 0) { glyphsWithWidths++; fontProgram.avgWidth += widths[i]; } } if (glyphsWithWidths != 0) { fontProgram.avgWidth /= glyphsWithWidths; } return(fontProgram); }
protected internal PdfSimpleFont(PdfDictionary fontDictionary) : base(fontDictionary) { toUnicode = FontUtil.ProcessToUnicode(fontDictionary.Get(PdfName.ToUnicode)); }
public static FontEncoding CreateDocFontEncoding(PdfObject encoding, CMapToUnicode toUnicode) { return(CreateDocFontEncoding(encoding, toUnicode, true)); }
private static string GetTextFromPage(PdfReader pdfReader, int pageNumber) { StringBuilder sb = new StringBuilder(); Matrix transformMatrix = Matrix.Identity; float leadingParameter = 0; Point position; CMapToUnicode cMapToUnicode = null; EncodingDifferenceToUnicode encodingDifferenceToUnicode = null; double oldY = 0; string lineContent = null; string rawPdfContent = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, pdfReader.GetPageContent(pageNumber))); int pointer = 0; string statement = Statement.GetNextStatement(rawPdfContent, ref pointer); while (statement != null) { // Embedded image if (statement.EndsWith("BI")) { pointer = rawPdfContent.IndexOf("\nEI", pointer, StringComparison.Ordinal); } else if (statement.EndsWith("Tm")) { Matrix matrix; if (Matrix.TryParse(statement, out matrix)) { transformMatrix = matrix; } } else if (statement.EndsWith("Tf")) { string[] fontParameters = statement.Split(' '); cMapToUnicode = PdfFontHelper.GetFontCMapToUnicode(pdfReader, pageNumber, fontParameters[fontParameters.Length - 3]); encodingDifferenceToUnicode = EncodingDifferenceToUnicode.Parse(PdfFontHelper.GetFont(pdfReader, pageNumber, fontParameters[fontParameters.Length - 3])); } else if (statement.EndsWith("Td")) { float tx; float ty; string[] parameters = statement.Split(' '); if ( float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tx) && float.TryParse(parameters[1], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out ty)) { transformMatrix = new Matrix(1, 0, 0, 1, tx, ty); } } else if (statement.EndsWith("TD")) { float tx; float ty; string[] parameters = statement.Split(' '); if ( float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tx) && float.TryParse(parameters[1], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out ty)) { transformMatrix = new Matrix(1, 0, 0, 1, tx, ty) * transformMatrix; leadingParameter = -ty; } } else if (statement.EndsWith("TL")) { float tl; string[] parameters = statement.Split(' '); if ( float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tl)) { leadingParameter = tl; } } else if (statement.EndsWith("T*")) { transformMatrix = new Matrix(1, 0, 0, 1, 0, -leadingParameter) * transformMatrix; } else if (statement.EndsWith("TJ")) { string content = TextObjectStatement.GetTJContent(statement, cMapToUnicode, encodingDifferenceToUnicode); if (!string.IsNullOrWhiteSpace(content)) { content = content.Trim(); //line.Position = BaseTransformMatrix.TransformPoint(new Point(transformMatrix.TransformX(position.X, position.Y), transformMatrix.TransformY(position.X, position.Y) + line.FontHeight)).Rotate(pageRotation); position = new Point(transformMatrix.TransformX(Point.Origin.X, Point.Origin.Y), transformMatrix.TransformY(Point.Origin.X, Point.Origin.Y)); if (oldY == position.Y) { if (!string.IsNullOrWhiteSpace(lineContent)) { lineContent += " " + content; } else { lineContent = content; } } else { if (!string.IsNullOrWhiteSpace(lineContent)) { sb.AppendLine(lineContent); } lineContent = content; oldY = position.Y; } } } else if (statement.Trim().EndsWith("Tj")) { string escapedContent; escapedContent = statement.Trim(); escapedContent = escapedContent.Remove(escapedContent.Length - 2); string content = PdfHexStringDataType.IsStartChar(escapedContent) ? PdfHexStringDataType.GetContent(escapedContent) : PdfStringDataType.GetContentFromEscapedContent(escapedContent); content = content.Trim(); content = PdfFontHelper.ToUnicode(content, cMapToUnicode, encodingDifferenceToUnicode); //line.Position = BaseTransformMatrix.TransformPoint(new Point(transformMatrix.TransformX(position.X, position.Y), transformMatrix.TransformY(position.X, position.Y) + line.FontHeight)).Rotate(pageRotation); position = new Point(transformMatrix.TransformX(Point.Origin.X, Point.Origin.Y), transformMatrix.TransformY(Point.Origin.X, Point.Origin.Y)); if (Math.Abs(oldY - position.Y) < 1) { if (!string.IsNullOrWhiteSpace(lineContent)) { lineContent += " " + content; } else { lineContent = content; } } else { if (!string.IsNullOrWhiteSpace(lineContent)) { sb.AppendLine(lineContent); } lineContent = content; oldY = position.Y; } } statement = Statement.GetNextStatement(rawPdfContent, ref pointer); } if (!string.IsNullOrWhiteSpace(lineContent)) { sb.Append(lineContent); } string textFromPage = sb.ToString(); return(textFromPage); }
internal static Type1Font CreateFontProgram(PdfDictionary fontDictionary, FontEncoding fontEncoding, CMapToUnicode toUnicode) { PdfName baseFontName = fontDictionary.GetAsName(PdfName.BaseFont); String baseFont; if (baseFontName != null) { baseFont = baseFontName.GetValue(); } else { baseFont = FontUtil.CreateRandomFontName(); } if (!fontDictionary.ContainsKey(PdfName.FontDescriptor)) { Type1Font type1StdFont; try { //if there are no font modifiers, cached font could be used, //otherwise a new instance should be created. type1StdFont = (Type1Font)FontProgramFactory.CreateFont(baseFont, true); } catch (Exception) { type1StdFont = null; } if (type1StdFont != null) { return(type1StdFont); } } iText.Kernel.Font.DocType1Font fontProgram = new iText.Kernel.Font.DocType1Font(baseFont); PdfDictionary fontDesc = fontDictionary.GetAsDictionary(PdfName.FontDescriptor); fontProgram.subtype = fontDesc.GetAsName(PdfName.Subtype); FillFontDescriptor(fontProgram, fontDesc); PdfNumber firstCharNumber = fontDictionary.GetAsNumber(PdfName.FirstChar); int firstChar = firstCharNumber != null?Math.Max(firstCharNumber.IntValue(), 0) : 0; int[] widths = FontUtil.ConvertSimpleWidthsArray(fontDictionary.GetAsArray(PdfName.Widths), firstChar, fontProgram .GetMissingWidth()); fontProgram.avgWidth = 0; int glyphsWithWidths = 0; for (int i = 0; i < 256; i++) { Glyph glyph = new Glyph(i, widths[i], fontEncoding.GetUnicode(i)); fontProgram.codeToGlyph[i] = glyph; if (glyph.HasValidUnicode()) { //FontEncoding.codeToUnicode table has higher priority if (fontEncoding.ConvertToByte(glyph.GetUnicode()) == i) { fontProgram.unicodeToGlyph[glyph.GetUnicode()] = glyph; } } else { if (toUnicode != null) { glyph.SetChars(toUnicode.Lookup(i)); } } if (widths[i] > 0) { glyphsWithWidths++; fontProgram.avgWidth += widths[i]; } } if (glyphsWithWidths != 0) { fontProgram.avgWidth /= glyphsWithWidths; } return(fontProgram); }
private static void FillDifferences(iText.Kernel.Font.DocFontEncoding fontEncoding, PdfArray diffs, CMapToUnicode toUnicode) { IntHashtable byte2uni = toUnicode != null?toUnicode.CreateDirectMapping() : new IntHashtable(); if (diffs != null) { int currentNumber = 0; for (int k = 0; k < diffs.Size(); ++k) { PdfObject obj = diffs.Get(k); if (obj.IsNumber()) { currentNumber = ((PdfNumber)obj).IntValue(); } else { if (currentNumber > 255) { ILog LOGGER = LogManager.GetLogger(typeof(iText.Kernel.Font.DocFontEncoding)); LOGGER.Warn(MessageFormatUtil.Format(iText.IO.LogMessageConstant.DOCFONT_HAS_ILLEGAL_DIFFERENCES, ((PdfName )obj).GetValue())); } else { /* don't return or break, because differences subarrays may * be in any order: * e.g. [255 /space /one 250 /two /three] * /one is invalid but all others should be parsed */ String glyphName = ((PdfName)obj).GetValue(); int unicode = AdobeGlyphList.NameToUnicode(glyphName); if (unicode != -1) { fontEncoding.codeToUnicode[currentNumber] = (int)unicode; fontEncoding.unicodeToCode.Put((int)unicode, currentNumber); fontEncoding.differences[currentNumber] = glyphName; fontEncoding.unicodeDifferences.Put((int)unicode, (int)unicode); } else { if (byte2uni.ContainsKey(currentNumber)) { unicode = byte2uni.Get(currentNumber); fontEncoding.codeToUnicode[currentNumber] = (int)unicode; fontEncoding.unicodeToCode.Put((int)unicode, currentNumber); fontEncoding.differences[currentNumber] = glyphName; fontEncoding.unicodeDifferences.Put((int)unicode, (int)unicode); } } currentNumber++; } } } } }
private static void FillDifferences(iText.Kernel.Font.DocFontEncoding fontEncoding, CMapToUnicode toUnicode ) { IntHashtable byte2uni = toUnicode.CreateDirectMapping(); foreach (int?code in byte2uni.GetKeys()) { int unicode = byte2uni.Get((int)code); String glyphName = AdobeGlyphList.UnicodeToName(unicode); fontEncoding.codeToUnicode[(int)code] = unicode; fontEncoding.unicodeToCode.Put(unicode, (int)code); fontEncoding.differences[(int)code] = glyphName; fontEncoding.unicodeDifferences.Put(unicode, unicode); } }