Exemple #1
0
 internal static CMapToUnicode GetToUnicodeFromUniMap(String uniMap)
 {
     if (uniMap == null)
     {
         return(null);
     }
     lock (uniMaps) {
         if (uniMaps.Contains(uniMap))
         {
             return(uniMaps.Get(uniMap));
         }
         CMapToUnicode toUnicode;
         if (PdfEncodings.IDENTITY_H.Equals(uniMap))
         {
             toUnicode = CMapToUnicode.GetIdentity();
         }
         else
         {
             CMapUniCid uni = FontCache.GetUni2CidCmap(uniMap);
             if (uni == null)
             {
                 return(null);
             }
             toUnicode = uni.ExportToUnicode();
         }
         uniMaps[uniMap] = toUnicode;
         return(toUnicode);
     }
 }
        public static string GetTJContent(string rawContent, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode)
        {
            string content;
            string rawArray = rawContent.Remove(rawContent.Length - 2).Trim();

            if (string.IsNullOrWhiteSpace(rawArray))
            {
                return(null);
            }
            PdfArrayDataType pdfArrayDataType = PdfArrayDataType.Parse(rawArray);

            content = string.Empty;
            foreach (string item in pdfArrayDataType.Elements.Where(_ => _ is string))
            {
                string escapedContent;
                escapedContent = item.Trim();
                content       +=
                    PdfHexStringDataType.IsStartChar(escapedContent) ?
                    PdfFontHelper.ToUnicode(PdfHexStringDataType.GetHexContent(escapedContent), cMapToUnicode, encodingDifferenceToUnicode).ToString() :
                    PdfFontHelper.ToUnicode(PdfStringDataType.GetContentFromEscapedContent(escapedContent), cMapToUnicode, encodingDifferenceToUnicode);
            }
            if (content.Contains("Media"))
            {
                Console.WriteLine();
            }
            return(content);
        }
        internal static TrueTypeFont CreateFontProgram(PdfDictionary fontDictionary, CMapToUnicode toUnicode)
        {
            iText.Kernel.Font.DocTrueTypeFont fontProgram = new iText.Kernel.Font.DocTrueTypeFont(fontDictionary);
            PdfDictionary fontDescriptor = fontDictionary.GetAsDictionary(PdfName.FontDescriptor);

            FillFontDescriptor(fontProgram, fontDescriptor);
            int dw = (fontDescriptor != null && fontDescriptor.ContainsKey(PdfName.DW)) ? (int)fontDescriptor.GetAsInt
                         (PdfName.DW) : 1000;

            if (toUnicode != null)
            {
                IntHashtable widths = FontUtil.ConvertCompositeWidthsArray(fontDictionary.GetAsArray(PdfName.W));
                fontProgram.avgWidth = 0;
                foreach (int cid in toUnicode.GetCodes())
                {
                    int   width = widths.ContainsKey(cid) ? widths.Get(cid) : dw;
                    Glyph glyph = new Glyph(cid, width, toUnicode.Lookup(cid));
                    if (glyph.HasValidUnicode())
                    {
                        fontProgram.unicodeToGlyph[glyph.GetUnicode()] = glyph;
                    }
                    fontProgram.codeToGlyph[cid] = glyph;
                    fontProgram.avgWidth        += width;
                }
                if (fontProgram.codeToGlyph.Count != 0)
                {
                    fontProgram.avgWidth /= fontProgram.codeToGlyph.Count;
                }
            }
            if (fontProgram.codeToGlyph.Get(0) == null)
            {
                fontProgram.codeToGlyph[0] = new Glyph(0, dw, -1);
            }
            return(fontProgram);
        }
Exemple #4
0
 public static FontEncoding CreateDocFontEncoding(PdfObject encoding, CMapToUnicode toUnicode)
 {
     if (encoding != null)
     {
         if (encoding.IsName())
         {
             return(FontEncoding.CreateFontEncoding(((PdfName)encoding).GetValue()));
         }
         else
         {
             if (encoding.IsDictionary())
             {
                 iText.Kernel.Font.DocFontEncoding fontEncoding = new iText.Kernel.Font.DocFontEncoding();
                 fontEncoding.differences = new String[256];
                 FillBaseEncoding(fontEncoding, ((PdfDictionary)encoding).GetAsName(PdfName.BaseEncoding));
                 FillDifferences(fontEncoding, ((PdfDictionary)encoding).GetAsArray(PdfName.Differences), toUnicode);
                 return(fontEncoding);
             }
         }
     }
     if (toUnicode != null)
     {
         iText.Kernel.Font.DocFontEncoding fontEncoding = new iText.Kernel.Font.DocFontEncoding();
         fontEncoding.differences = new String[256];
         FillDifferences(fontEncoding, toUnicode);
         return(fontEncoding);
     }
     else
     {
         return(FontEncoding.CreateFontSpecificEncoding());
     }
 }
Exemple #5
0
        internal static CMapToUnicode ProcessToUnicode(PdfObject toUnicode)
        {
            CMapToUnicode cMapToUnicode = null;

            if (toUnicode is PdfStream)
            {
                try {
                    byte[]        uniBytes = ((PdfStream)toUnicode).GetBytes();
                    ICMapLocation lb       = new CMapLocationFromBytes(uniBytes);
                    cMapToUnicode = new CMapToUnicode();
                    CMapParser.ParseCid("", cMapToUnicode, lb);
                }
                catch (Exception) {
                    ILogger logger = LoggerFactory.GetLogger(typeof(CMapToUnicode));
                    logger.Error(LogMessageConstant.UNKNOWN_ERROR_WHILE_PROCESSING_CMAP);
                    cMapToUnicode = CMapToUnicode.EmptyCMapToUnicodeMap;
                }
            }
            else
            {
                if (PdfName.IdentityH.Equals(toUnicode))
                {
                    cMapToUnicode = CMapToUnicode.GetIdentity();
                }
            }
            return(cMapToUnicode);
        }
Exemple #6
0
        virtual public void TestCMapWithDefDictionaryKey()
        {
            byte[] touni                = TestResourceUtils.GetResourceAsByteArray(TEST_RESOURCES_PATH, "cmap_with_def_dictionary_key.txt");
            CidLocationFromByte lb      = new CidLocationFromByte(touni);
            CMapToUnicode       cmapRet = new CMapToUnicode();

            CMapParserEx.ParseCid("", cmapRet, lb);
        }
Exemple #7
0
        /**
         * Parses the ToUnicode entry, if present, and constructs a CMap for it
         * @since 2.1.7
         */
        private void ProcessToUnicode()
        {
            PdfObject toUni = PdfReader.GetPdfObjectRelease(fontDic.Get(PdfName.TOUNICODE));

            if (toUni is PRStream)
            {
                try {
                    byte[] touni           = PdfReader.GetStreamBytes((PRStream)toUni);
                    CidLocationFromByte lb = new CidLocationFromByte(touni);
                    toUnicodeCmap = new CMapToUnicode();
                    CMapParserEx.ParseCid("", toUnicodeCmap, lb);
                    uni2cid = toUnicodeCmap.CreateReverseMapping();
                } catch {
                    toUnicodeCmap = null;
                    uni2cid       = null;
                    // technically, we should log this or provide some sort of feedback... but sometimes the cmap will be junk, but it's still possible to get text, so we don't want to throw an exception
                    //throw new IllegalStateException("Unable to process ToUnicode map - " + e.GetMessage(), e);
                }
            }
            else if (isType0)
            {
                // fake a ToUnicode for CJK Identity-H fonts
                try {
                    PdfName encodingName = fontDic.GetAsName(PdfName.ENCODING);
                    if (encodingName == null)
                    {
                        return;
                    }
                    String enc = PdfName.DecodeName(encodingName.ToString());
                    if (!enc.Equals("Identity-H"))
                    {
                        return;
                    }
                    PdfArray      df      = (PdfArray)PdfReader.GetPdfObjectRelease(fontDic.Get(PdfName.DESCENDANTFONTS));
                    PdfDictionary cidft   = (PdfDictionary)PdfReader.GetPdfObjectRelease(df[0]);
                    PdfDictionary cidinfo = cidft.GetAsDict(PdfName.CIDSYSTEMINFO);
                    if (cidinfo == null)
                    {
                        return;
                    }
                    PdfString ordering = cidinfo.GetAsString(PdfName.ORDERING);
                    if (ordering == null)
                    {
                        return;
                    }
                    CMapToUnicode touni = IdentityToUnicode.GetMapFromOrdering(ordering.ToUnicodeString());
                    if (touni == null)
                    {
                        return;
                    }
                    toUnicodeCmap = touni;
                    uni2cid       = toUnicodeCmap.CreateReverseMapping();
                } catch (IOException e) {
                    toUnicodeCmap = null;
                    uni2cid       = null;
                }
            }
        }
Exemple #8
0
        private void CheckInsertAndRetrieval(byte[] bytes, string uni)
        {
            CMapToUnicode c = new CMapToUnicode();

            c.AddChar(new PdfString(bytes), new PdfString(uni, "UTF-16BE"));
            string lookupResult = c.Lookup(bytes, 0, bytes.Length);

            Assert.AreEqual(uni, lookupResult);
        }
Exemple #9
0
        internal PdfTrueTypeFont(PdfDictionary fontDictionary)
            : base(fontDictionary)
        {
            newFont = false;
            CMapToUnicode toUni = FontUtil.ProcessToUnicode(fontDictionary.Get(PdfName.ToUnicode));

            fontEncoding = DocFontEncoding.CreateDocFontEncoding(fontDictionary.Get(PdfName.Encoding), toUni);
            fontProgram  = DocTrueTypeFont.CreateFontProgram(fontDictionary, fontEncoding, toUni);
            embedded     = ((IDocFontProgram)fontProgram).GetFontFile() != null;
            subset       = false;
        }
 public static char ToUnicode(int character, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode)
 {
     if (cMapToUnicode != null)
     {
         return(cMapToUnicode.ConvertToUnicodeChar(character));
     }
     else if (encodingDifferenceToUnicode != null)
     {
         return(encodingDifferenceToUnicode.ConvertToUnicodeChar(character));
     }
     else
     {
         return(Convert.ToChar(character));
     }
 }
 public static string ToUnicode(string content, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode)
 {
     if (cMapToUnicode != null)
     {
         return(cMapToUnicode.ConvertToString(content));
     }
     else if (encodingDifferenceToUnicode != null)
     {
         return(encodingDifferenceToUnicode.ConvertToString(content));
     }
     else
     {
         return(content);
     }
 }
Exemple #12
0
        /// <summary>Creates a Type 3 font based on an existing font dictionary, which must be an indirect object.</summary>
        /// <param name="fontDictionary">a dictionary of type <code>/Font</code>, must have an indirect reference.</param>
        internal PdfType3Font(PdfDictionary fontDictionary)
            : base(fontDictionary)
        {
            subset      = true;
            embedded    = true;
            fontProgram = new Type3Font(false);
            CMapToUnicode toUni = FontUtil.ProcessToUnicode(fontDictionary.Get(PdfName.ToUnicode));

            fontEncoding = DocFontEncoding.CreateDocFontEncoding(fontDictionary.Get(PdfName.Encoding), toUni);
            PdfDictionary charProcsDic    = GetPdfObject().GetAsDictionary(PdfName.CharProcs);
            PdfArray      fontMatrixArray = GetPdfObject().GetAsArray(PdfName.FontMatrix);

            if (GetPdfObject().ContainsKey(PdfName.FontBBox))
            {
                PdfArray fontBBox = GetPdfObject().GetAsArray(PdfName.FontBBox);
                fontProgram.GetFontMetrics().SetBbox(fontBBox.GetAsNumber(0).IntValue(), fontBBox.GetAsNumber(1).IntValue(
                                                         ), fontBBox.GetAsNumber(2).IntValue(), fontBBox.GetAsNumber(3).IntValue());
            }
            else
            {
                fontProgram.GetFontMetrics().SetBbox(0, 0, 0, 0);
            }
            int firstChar = NormalizeFirstLastChar(fontDictionary.GetAsNumber(PdfName.FirstChar), 0);
            int lastChar  = NormalizeFirstLastChar(fontDictionary.GetAsNumber(PdfName.LastChar), 255);

            for (int i = firstChar; i <= lastChar; i++)
            {
                shortTag[i] = 1;
            }
            int[]    widths     = FontUtil.ConvertSimpleWidthsArray(fontDictionary.GetAsArray(PdfName.Widths), firstChar, 0);
            double[] fontMatrix = new double[6];
            for (int i = 0; i < fontMatrixArray.Size(); i++)
            {
                fontMatrix[i] = ((PdfNumber)fontMatrixArray.Get(i)).GetValue();
            }
            SetFontMatrix(fontMatrix);
            foreach (PdfName glyphName in charProcsDic.KeySet())
            {
                int unicode = AdobeGlyphList.NameToUnicode(glyphName.GetValue());
                if (unicode != -1 && fontEncoding.CanEncode(unicode))
                {
                    int code = fontEncoding.ConvertToByte(unicode);
                    ((Type3Font)GetFontProgram()).AddGlyph(code, unicode, widths[code], null, new Type3Glyph(charProcsDic.GetAsStream
                                                                                                                 (glyphName), GetDocument()));
                }
            }
            FillFontDescriptor(fontDictionary.GetAsDictionary(PdfName.FontDescriptor));
        }
Exemple #13
0
        private void FillDiffMap(PdfDictionary encDic, CMapToUnicode toUnicode)
        {
            PdfArray diffs = encDic.GetAsArray(PdfName.DIFFERENCES);

            if (diffs != null)
            {
                diffmap = new IntHashtable();
                int currentNumber = 0;
                for (int k = 0; k < diffs.Size; ++k)
                {
                    PdfObject obj = diffs[k];
                    if (obj.IsNumber())
                    {
                        currentNumber = ((PdfNumber)obj).IntValue;
                    }
                    else
                    {
                        int[] c = GlyphList.NameToUnicode(PdfName.DecodeName(((PdfName)obj).ToString()));
                        if (c != null && c.Length > 0)
                        {
                            uni2byte[c[0]]          = currentNumber;
                            byte2uni[currentNumber] = c[0];
                            diffmap[c[0]]           = currentNumber;
                        }
                        else
                        {
                            if (toUnicode == null)
                            {
                                toUnicode = ProcessToUnicode();
                                if (toUnicode == null)
                                {
                                    toUnicode = new CMapToUnicode();
                                }
                            }
                            string unicode = toUnicode.Lookup(new byte[] { (byte)currentNumber }, 0, 1);
                            if ((unicode != null) && (unicode.Length == 1))
                            {
                                this.uni2byte[unicode[0]]    = currentNumber;
                                this.byte2uni[currentNumber] = unicode[0];
                                this.diffmap[unicode[0]]     = currentNumber;
                            }
                        }
                        ++currentNumber;
                    }
                }
            }
        }
Exemple #14
0
        internal PdfType1Font(PdfDictionary fontDictionary)
            : base(fontDictionary)
        {
            newFont = false;
            CMapToUnicode toUni = FontUtil.ProcessToUnicode(fontDictionary.Get(PdfName.ToUnicode));

            // if there is no FontDescriptor, it is most likely one of the Standard Font with StandardEncoding as base encoding.
            // unused variable.
            // boolean fillStandardEncoding = !fontDictionary.containsKey(PdfName.FontDescriptor);
            fontEncoding = DocFontEncoding.CreateDocFontEncoding(fontDictionary.Get(PdfName.Encoding), toUni);
            fontProgram  = DocType1Font.CreateFontProgram(fontDictionary, fontEncoding, toUni);
            if (fontProgram is IDocFontProgram)
            {
                embedded = ((IDocFontProgram)fontProgram).GetFontFile() != null;
            }
            subset = false;
        }
Exemple #15
0
        private CMapToUnicode ProcessToUnicode()
        {
            CMapToUnicode cmapRet = null;
            PdfObject     toUni   = PdfReader.GetPdfObjectRelease(this.font.Get(PdfName.TOUNICODE));

            if (toUni is PRStream)
            {
                try {
                    byte[] touni           = PdfReader.GetStreamBytes((PRStream)toUni);
                    CidLocationFromByte lb = new CidLocationFromByte(touni);
                    cmapRet = new CMapToUnicode();
                    CMapParserEx.ParseCid("", cmapRet, lb);
                } catch {
                    cmapRet = null;
                }
            }
            return(cmapRet);
        }
 public static string ToUnicode(int[] content, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode)
 {
     if (cMapToUnicode != null)
     {
         return(cMapToUnicode.ConvertToString(content));
     }
     else if (encodingDifferenceToUnicode != null)
     {
         return(encodingDifferenceToUnicode.ConvertToString(content));
     }
     else
     {
         byte[] byteContent = new byte[content.Length * sizeof(int)];
         Buffer.BlockCopy(content, 0, byteContent, 0, byteContent.Length);
         string stringContent = System.Text.Encoding.Unicode.GetString(byteContent);
         return(stringContent);
     }
 }
        public static CMapToUnicode GetFontCMapToUnicode(PdfReader pdfReader, int pageNumber, string fontKey)
        {
            PdfDictionary resources = pdfReader.GetPageN(pageNumber).GetAsDict(PdfName.RESOURCES);
            var           fontDict  = FindFontDictionary(resources, fontKey);

            if (fontDict == null)
            {
                return(null);
            }
            PRStream toUnicodeIndirectReference = (PRStream)PdfReader.GetPdfObject(fontDict.Get(PdfName.TOUNICODE));

            if (toUnicodeIndirectReference == null)
            {
                return(null);
            }
            string toUnicode = Encoding.UTF8.GetString(PdfReader.GetStreamBytes(toUnicodeIndirectReference));

            return(CMapToUnicode.Parse(toUnicode));
        }
        /**
         * Parses the ToUnicode entry, if present, and constructs a CMap for it
         * @since 2.1.7
         */
        private void ProcessToUnicode()
        {
            PdfObject toUni = PdfReader.GetPdfObjectRelease(fontDic.Get(PdfName.TOUNICODE));

            if (toUni is PRStream)
            {
                try {
                    byte[] touni           = PdfReader.GetStreamBytes((PRStream)toUni);
                    CidLocationFromByte lb = new CidLocationFromByte(touni);
                    toUnicodeCmap = new CMapToUnicode();
                    CMapParserEx.ParseCid("", toUnicodeCmap, lb);
                    uni2cid = toUnicodeCmap.CreateReverseMapping();
                } catch {
                    toUnicodeCmap = null;
                    uni2cid       = null;
                    // technically, we should log this or provide some sort of feedback... but sometimes the cmap will be junk, but it's still possible to get text, so we don't want to throw an exception
                    //throw new IllegalStateException("Unable to process ToUnicode map - " + e.GetMessage(), e);
                }
            }
        }
        internal PdfType0Font(PdfDictionary fontDictionary)
            : base(fontDictionary)
        {
            CheckFontDictionary(fontDictionary, PdfName.Type0);
            newFont = false;
            PdfDictionary cidFont = fontDictionary.GetAsArray(PdfName.DescendantFonts).GetAsDictionary(0);
            String        cmap    = fontDictionary.GetAsName(PdfName.Encoding).GetValue();

            if (PdfEncodings.IDENTITY_H.Equals(cmap) || PdfEncodings.IDENTITY_V.Equals(cmap))
            {
                PdfObject     toUnicode     = fontDictionary.Get(PdfName.ToUnicode);
                CMapToUnicode toUnicodeCMap = FontUtil.ProcessToUnicode(toUnicode);
                if (toUnicodeCMap == null)
                {
                    String uniMap = GetUniMapFromOrdering(GetOrdering(cidFont));
                    toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(uniMap);
                    if (toUnicodeCMap == null)
                    {
                        toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(PdfEncodings.IDENTITY_H);
                        ILogger logger = LoggerFactory.GetLogger(typeof(iText.Kernel.Font.PdfType0Font));
                        logger.Error(String.Format(LogMessageConstant.UNKNOWN_CMAP, uniMap));
                    }
                }
                fontProgram  = DocTrueTypeFont.CreateFontProgram(cidFont, toUnicodeCMap);
                cmapEncoding = new CMapEncoding(cmap);
                System.Diagnostics.Debug.Assert(fontProgram is IDocFontProgram);
                embedded    = ((IDocFontProgram)fontProgram).GetFontFile() != null;
                cidFontType = CID_FONT_TYPE_2;
            }
            else
            {
                String cidFontName = cidFont.GetAsName(PdfName.BaseFont).GetValue();
                String uniMap      = GetUniMapFromOrdering(GetOrdering(cidFont));
                if (uniMap != null && uniMap.StartsWith("Uni") && CidFontProperties.IsCidFont(cidFontName, uniMap))
                {
                    try {
                        fontProgram  = FontProgramFactory.CreateFont(cidFontName);
                        cmapEncoding = new CMapEncoding(cmap, uniMap);
                        embedded     = false;
                    }
                    catch (System.IO.IOException) {
                        fontProgram  = null;
                        cmapEncoding = null;
                    }
                }
                else
                {
                    CMapToUnicode toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(uniMap);
                    if (toUnicodeCMap != null)
                    {
                        fontProgram  = DocTrueTypeFont.CreateFontProgram(cidFont, toUnicodeCMap);
                        cmapEncoding = new CMapEncoding(cmap, uniMap);
                    }
                }
                if (fontProgram == null)
                {
                    throw new PdfException(String.Format("Cannot recognise document font {0} with {1} encoding", cidFontName,
                                                         cmap));
                }
                cidFontType = CID_FONT_TYPE_0;
            }
            longTag = new LinkedDictionary <int, int[]>();
            subset  = false;
        }
Exemple #20
0
        private void DoType1TT()
        {
            CMapToUnicode toUnicode = null;
            PdfObject     enc       = PdfReader.GetPdfObject(font.Get(PdfName.ENCODING));

            if (enc == null)
            {
                PdfName baseFont = font.GetAsName(PdfName.BASEFONT);
                if (BuiltinFonts14.ContainsKey(fontName) &&
                    (PdfName.SYMBOL.Equals(baseFont) || PdfName.ZAPFDINGBATS.Equals(baseFont)))
                {
                    FillEncoding(baseFont);
                }
                else
                {
                    FillEncoding(null);
                }
                toUnicode = ProcessToUnicode();
                if (toUnicode != null)
                {
                    IDictionary <int, int> rm = toUnicode.CreateReverseMapping();
                    foreach (KeyValuePair <int, int> kv in rm)
                    {
                        uni2byte[kv.Key]   = kv.Value;
                        byte2uni[kv.Value] = kv.Key;
                    }
                }
            }
            else
            {
                if (enc.IsName())
                {
                    FillEncoding((PdfName)enc);
                }
                else if (enc.IsDictionary())
                {
                    PdfDictionary encDic = (PdfDictionary)enc;
                    enc = PdfReader.GetPdfObject(encDic.Get(PdfName.BASEENCODING));
                    if (enc == null)
                    {
                        FillEncoding(null);
                    }
                    else
                    {
                        FillEncoding((PdfName)enc);
                    }
                    PdfArray diffs = encDic.GetAsArray(PdfName.DIFFERENCES);
                    if (diffs != null)
                    {
                        diffmap = new IntHashtable();
                        int currentNumber = 0;
                        for (int k = 0; k < diffs.Size; ++k)
                        {
                            PdfObject obj = diffs[k];
                            if (obj.IsNumber())
                            {
                                currentNumber = ((PdfNumber)obj).IntValue;
                            }
                            else
                            {
                                int[] c = GlyphList.NameToUnicode(PdfName.DecodeName(((PdfName)obj).ToString()));
                                if (c != null && c.Length > 0)
                                {
                                    uni2byte[c[0]]          = currentNumber;
                                    byte2uni[currentNumber] = c[0];
                                    diffmap[c[0]]           = currentNumber;
                                }
                                else
                                {
                                    if (toUnicode == null)
                                    {
                                        toUnicode = ProcessToUnicode();
                                        if (toUnicode == null)
                                        {
                                            toUnicode = new CMapToUnicode();
                                        }
                                    }
                                    string unicode = toUnicode.Lookup(new byte[] { (byte)currentNumber }, 0, 1);
                                    if ((unicode != null) && (unicode.Length == 1))
                                    {
                                        this.uni2byte[unicode[0]]    = currentNumber;
                                        this.byte2uni[currentNumber] = unicode[0];
                                        this.diffmap[unicode[0]]     = currentNumber;
                                    }
                                }
                                ++currentNumber;
                            }
                        }
                    }
                }
            }
            PdfArray  newWidths = font.GetAsArray(PdfName.WIDTHS);
            PdfNumber first     = font.GetAsNumber(PdfName.FIRSTCHAR);
            PdfNumber last      = font.GetAsNumber(PdfName.LASTCHAR);

            if (BuiltinFonts14.ContainsKey(fontName))
            {
                BaseFont bf = BaseFont.CreateFont(fontName, WINANSI, false);
                int[]    e  = uni2byte.ToOrderedKeys();
                for (int k = 0; k < e.Length; ++k)
                {
                    int n = uni2byte[e[k]];
                    widths[n] = bf.GetRawWidth(n, GlyphList.UnicodeToName(e[k]));
                }
                if (diffmap != null)   //widths for differences must override existing ones
                {
                    e = diffmap.ToOrderedKeys();
                    for (int k = 0; k < e.Length; ++k)
                    {
                        int n = diffmap[e[k]];
                        widths[n] = bf.GetRawWidth(n, GlyphList.UnicodeToName(e[k]));
                    }
                    diffmap = null;
                }
                Ascender    = bf.GetFontDescriptor(ASCENT, 1000);
                CapHeight   = bf.GetFontDescriptor(CAPHEIGHT, 1000);
                Descender   = bf.GetFontDescriptor(DESCENT, 1000);
                ItalicAngle = bf.GetFontDescriptor(ITALICANGLE, 1000);
                fontWeight  = bf.GetFontDescriptor(FONT_WEIGHT, 1000);
                llx         = bf.GetFontDescriptor(BBOXLLX, 1000);
                lly         = bf.GetFontDescriptor(BBOXLLY, 1000);
                urx         = bf.GetFontDescriptor(BBOXURX, 1000);
                ury         = bf.GetFontDescriptor(BBOXURY, 1000);
            }
            if (first != null && last != null && newWidths != null)
            {
                int f     = first.IntValue;
                int nSize = f + newWidths.Size;
                if (widths.Length < nSize)
                {
                    int[] tmp = new int[nSize];
                    System.Array.Copy(widths, 0, tmp, 0, f);
                    widths = tmp;
                }
                for (int k = 0; k < newWidths.Size; ++k)
                {
                    widths[f + k] = newWidths.GetAsNumber(k).IntValue;
                }
            }
            FillFontDesc(font.GetAsDict(PdfName.FONTDESCRIPTOR));
        }
        internal PdfType0Font(PdfDictionary fontDictionary)
            : base(fontDictionary)
        {
            newFont = false;
            PdfDictionary cidFont       = fontDictionary.GetAsArray(PdfName.DescendantFonts).GetAsDictionary(0);
            PdfObject     cmap          = fontDictionary.Get(PdfName.Encoding);
            PdfObject     toUnicode     = fontDictionary.Get(PdfName.ToUnicode);
            CMapToUnicode toUnicodeCMap = FontUtil.ProcessToUnicode(toUnicode);

            if (cmap.IsName() && (PdfEncodings.IDENTITY_H.Equals(((PdfName)cmap).GetValue()) || PdfEncodings.IDENTITY_V
                                  .Equals(((PdfName)cmap).GetValue())))
            {
                if (toUnicodeCMap == null)
                {
                    String uniMap = GetUniMapFromOrdering(GetOrdering(cidFont));
                    toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(uniMap);
                    if (toUnicodeCMap == null)
                    {
                        toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(PdfEncodings.IDENTITY_H);
                        ILog logger = LogManager.GetLogger(typeof(iText.Kernel.Font.PdfType0Font));
                        logger.Error(MessageFormatUtil.Format(iText.IO.LogMessageConstant.UNKNOWN_CMAP, uniMap));
                    }
                }
                fontProgram  = DocTrueTypeFont.CreateFontProgram(cidFont, toUnicodeCMap);
                cmapEncoding = CreateCMap(cmap, null);
                System.Diagnostics.Debug.Assert(fontProgram is IDocFontProgram);
                embedded = ((IDocFontProgram)fontProgram).GetFontFile() != null;
            }
            else
            {
                String cidFontName = cidFont.GetAsName(PdfName.BaseFont).GetValue();
                String uniMap      = GetUniMapFromOrdering(GetOrdering(cidFont));
                if (uniMap != null && uniMap.StartsWith("Uni") && CidFontProperties.IsCidFont(cidFontName, uniMap))
                {
                    try {
                        fontProgram  = FontProgramFactory.CreateFont(cidFontName);
                        cmapEncoding = CreateCMap(cmap, uniMap);
                        embedded     = false;
                    }
                    catch (System.IO.IOException) {
                        fontProgram  = null;
                        cmapEncoding = null;
                    }
                }
                else
                {
                    if (toUnicodeCMap == null)
                    {
                        toUnicodeCMap = FontUtil.GetToUnicodeFromUniMap(uniMap);
                    }
                    if (toUnicodeCMap != null)
                    {
                        fontProgram  = DocTrueTypeFont.CreateFontProgram(cidFont, toUnicodeCMap);
                        cmapEncoding = CreateCMap(cmap, uniMap);
                    }
                }
                if (fontProgram == null)
                {
                    throw new PdfException(MessageFormatUtil.Format(PdfException.CannotRecogniseDocumentFontWithEncoding, cidFontName
                                                                    , cmap));
                }
            }
            // DescendantFonts is a one-element array specifying the CIDFont dictionary that is the descendant of this Type 0 font.
            PdfDictionary cidFontDictionary = fontDictionary.GetAsArray(PdfName.DescendantFonts).GetAsDictionary(0);
            // Required according to the spec
            PdfName subtype = cidFontDictionary.GetAsName(PdfName.Subtype);

            if (PdfName.CIDFontType0.Equals(subtype))
            {
                cidFontType = CID_FONT_TYPE_0;
            }
            else
            {
                if (PdfName.CIDFontType2.Equals(subtype))
                {
                    cidFontType = CID_FONT_TYPE_2;
                }
                else
                {
                    LogManager.GetLogger(GetType()).Error(iText.IO.LogMessageConstant.FAILED_TO_DETERMINE_CID_FONT_SUBTYPE);
                }
            }
            longTag = new SortedSet <int>();
            subset  = false;
        }
Exemple #22
0
        private void DoType1TT()
        {
            CMapToUnicode toUnicode = null;
            PdfObject     enc       = PdfReader.GetPdfObject(font.Get(PdfName.ENCODING));

            if (enc == null)
            {
                PdfName baseFont = font.GetAsName(PdfName.BASEFONT);
                if (BuiltinFonts14.ContainsKey(fontName) &&
                    (PdfName.SYMBOL.Equals(baseFont) || PdfName.ZAPFDINGBATS.Equals(baseFont)))
                {
                    FillEncoding(baseFont);
                }
                else
                {
                    FillEncoding(null);
                }
                toUnicode = ProcessToUnicode();
                if (toUnicode != null)
                {
                    IDictionary <int, int> rm = toUnicode.CreateReverseMapping();
                    foreach (KeyValuePair <int, int> kv in rm)
                    {
                        uni2byte[kv.Key]   = kv.Value;
                        byte2uni[kv.Value] = kv.Key;
                    }
                }
            }
            else
            {
                if (enc.IsName())
                {
                    FillEncoding((PdfName)enc);
                }
                else if (enc.IsDictionary())
                {
                    PdfDictionary encDic = (PdfDictionary)enc;
                    enc = PdfReader.GetPdfObject(encDic.Get(PdfName.BASEENCODING));
                    if (enc == null)
                    {
                        FillEncoding(null);
                    }
                    else
                    {
                        FillEncoding((PdfName)enc);
                    }
                    FillDiffMap(encDic, toUnicode);
                }
            }
            if (BuiltinFonts14.ContainsKey(fontName))
            {
                BaseFont bf = BaseFont.CreateFont(fontName, WINANSI, false);
                int[]    e  = uni2byte.ToOrderedKeys();
                for (int k = 0; k < e.Length; ++k)
                {
                    int n = uni2byte[e[k]];
                    widths[n] = bf.GetRawWidth(n, GlyphList.UnicodeToName(e[k]));
                }
                if (diffmap != null)
                {
                    //widths for differences must override existing ones
                    e = diffmap.ToOrderedKeys();
                    for (int k = 0; k < e.Length; ++k)
                    {
                        int n = diffmap[e[k]];
                        widths[n] = bf.GetRawWidth(n, GlyphList.UnicodeToName(e[k]));
                    }
                    diffmap = null;
                }
                Ascender    = bf.GetFontDescriptor(ASCENT, 1000);
                CapHeight   = bf.GetFontDescriptor(CAPHEIGHT, 1000);
                Descender   = bf.GetFontDescriptor(DESCENT, 1000);
                ItalicAngle = bf.GetFontDescriptor(ITALICANGLE, 1000);
                fontWeight  = bf.GetFontDescriptor(FONT_WEIGHT, 1000);
                llx         = bf.GetFontDescriptor(BBOXLLX, 1000);
                lly         = bf.GetFontDescriptor(BBOXLLY, 1000);
                urx         = bf.GetFontDescriptor(BBOXURX, 1000);
                ury         = bf.GetFontDescriptor(BBOXURY, 1000);
            }
            FillWidths();
            FillFontDesc(font.GetAsDict(PdfName.FONTDESCRIPTOR));
        }
Exemple #23
0
        private static void FillDifferences(iText.Kernel.Font.DocFontEncoding fontEncoding, PdfArray diffs, CMapToUnicode
                                            toUnicode)
        {
            IntHashtable byte2uni = toUnicode != null?toUnicode.CreateDirectMapping() : new IntHashtable();

            if (diffs != null)
            {
                int currentNumber = 0;
                for (int k = 0; k < diffs.Size(); ++k)
                {
                    PdfObject obj = diffs.Get(k);
                    if (obj.IsNumber())
                    {
                        currentNumber = ((PdfNumber)obj).IntValue();
                    }
                    else
                    {
                        String glyphName = ((PdfName)obj).GetValue();
                        int    unicode   = (int)AdobeGlyphList.NameToUnicode(glyphName);
                        if (unicode != -1)
                        {
                            fontEncoding.codeToUnicode[currentNumber] = (int)unicode;
                            fontEncoding.unicodeToCode.Put((int)unicode, currentNumber);
                            fontEncoding.differences[currentNumber] = glyphName;
                            fontEncoding.unicodeDifferences.Put((int)unicode, (int)unicode);
                        }
                        else
                        {
                            if (byte2uni.Contains(currentNumber))
                            {
                                unicode = byte2uni.Get(currentNumber);
                                fontEncoding.codeToUnicode[currentNumber] = (int)unicode;
                                fontEncoding.unicodeToCode.Put((int)unicode, currentNumber);
                                fontEncoding.differences[currentNumber] = glyphName;
                                fontEncoding.unicodeDifferences.Put((int)unicode, (int)unicode);
                            }
                        }
                        currentNumber++;
                    }
                }
            }
        }
Exemple #24
0
        internal static TrueTypeFont CreateFontProgram(PdfDictionary fontDictionary, FontEncoding fontEncoding, CMapToUnicode
                                                       toUnicode)
        {
            iText.Kernel.Font.DocTrueTypeFont fontProgram = new iText.Kernel.Font.DocTrueTypeFont(fontDictionary);
            FillFontDescriptor(fontProgram, fontDictionary.GetAsDictionary(PdfName.FontDescriptor));
            PdfNumber firstCharNumber = fontDictionary.GetAsNumber(PdfName.FirstChar);
            int       firstChar       = firstCharNumber != null?Math.Max(firstCharNumber.IntValue(), 0) : 0;

            int[] widths = FontUtil.ConvertSimpleWidthsArray(fontDictionary.GetAsArray(PdfName.Widths), firstChar, fontProgram
                                                             .GetMissingWidth());
            fontProgram.avgWidth = 0;
            int glyphsWithWidths = 0;

            for (int i = 0; i < 256; i++)
            {
                Glyph glyph = new Glyph(i, widths[i], fontEncoding.GetUnicode(i));
                fontProgram.codeToGlyph.Put(i, glyph);
                //FontEncoding.codeToUnicode table has higher priority
                if (glyph.HasValidUnicode() && fontEncoding.ConvertToByte(glyph.GetUnicode()) == i)
                {
                    fontProgram.unicodeToGlyph.Put(glyph.GetUnicode(), glyph);
                }
                else
                {
                    if (toUnicode != null)
                    {
                        glyph.SetChars(toUnicode.Lookup(i));
                    }
                }
                if (widths[i] > 0)
                {
                    glyphsWithWidths++;
                    fontProgram.avgWidth += widths[i];
                }
            }
            if (glyphsWithWidths != 0)
            {
                fontProgram.avgWidth /= glyphsWithWidths;
            }
            return(fontProgram);
        }
Exemple #25
0
 protected internal PdfSimpleFont(PdfDictionary fontDictionary)
     : base(fontDictionary)
 {
     toUnicode = FontUtil.ProcessToUnicode(fontDictionary.Get(PdfName.ToUnicode));
 }
Exemple #26
0
 public static FontEncoding CreateDocFontEncoding(PdfObject encoding, CMapToUnicode toUnicode)
 {
     return(CreateDocFontEncoding(encoding, toUnicode, true));
 }
Exemple #27
0
        private static string GetTextFromPage(PdfReader pdfReader, int pageNumber)
        {
            StringBuilder sb = new StringBuilder();

            Matrix        transformMatrix  = Matrix.Identity;
            float         leadingParameter = 0;
            Point         position;
            CMapToUnicode cMapToUnicode = null;
            EncodingDifferenceToUnicode encodingDifferenceToUnicode = null;

            double oldY        = 0;
            string lineContent = null;

            string rawPdfContent = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, pdfReader.GetPageContent(pageNumber)));
            int    pointer       = 0;

            string statement = Statement.GetNextStatement(rawPdfContent, ref pointer);

            while (statement != null)
            {
                // Embedded image
                if (statement.EndsWith("BI"))
                {
                    pointer = rawPdfContent.IndexOf("\nEI", pointer, StringComparison.Ordinal);
                }
                else if (statement.EndsWith("Tm"))
                {
                    Matrix matrix;
                    if (Matrix.TryParse(statement, out matrix))
                    {
                        transformMatrix = matrix;
                    }
                }
                else if (statement.EndsWith("Tf"))
                {
                    string[] fontParameters = statement.Split(' ');
                    cMapToUnicode = PdfFontHelper.GetFontCMapToUnicode(pdfReader, pageNumber, fontParameters[fontParameters.Length - 3]);
                    encodingDifferenceToUnicode = EncodingDifferenceToUnicode.Parse(PdfFontHelper.GetFont(pdfReader, pageNumber, fontParameters[fontParameters.Length - 3]));
                }
                else if (statement.EndsWith("Td"))
                {
                    float    tx;
                    float    ty;
                    string[] parameters = statement.Split(' ');
                    if (
                        float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tx) &&
                        float.TryParse(parameters[1], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out ty))
                    {
                        transformMatrix = new Matrix(1, 0, 0, 1, tx, ty);
                    }
                }
                else if (statement.EndsWith("TD"))
                {
                    float    tx;
                    float    ty;
                    string[] parameters = statement.Split(' ');
                    if (
                        float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tx) &&
                        float.TryParse(parameters[1], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out ty))
                    {
                        transformMatrix  = new Matrix(1, 0, 0, 1, tx, ty) * transformMatrix;
                        leadingParameter = -ty;
                    }
                }
                else if (statement.EndsWith("TL"))
                {
                    float    tl;
                    string[] parameters = statement.Split(' ');
                    if (
                        float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tl))
                    {
                        leadingParameter = tl;
                    }
                }
                else if (statement.EndsWith("T*"))
                {
                    transformMatrix = new Matrix(1, 0, 0, 1, 0, -leadingParameter) * transformMatrix;
                }
                else if (statement.EndsWith("TJ"))
                {
                    string content = TextObjectStatement.GetTJContent(statement, cMapToUnicode, encodingDifferenceToUnicode);
                    if (!string.IsNullOrWhiteSpace(content))
                    {
                        content = content.Trim();

                        //line.Position = BaseTransformMatrix.TransformPoint(new Point(transformMatrix.TransformX(position.X, position.Y), transformMatrix.TransformY(position.X, position.Y) + line.FontHeight)).Rotate(pageRotation);
                        position = new Point(transformMatrix.TransformX(Point.Origin.X, Point.Origin.Y), transformMatrix.TransformY(Point.Origin.X, Point.Origin.Y));
                        if (oldY == position.Y)
                        {
                            if (!string.IsNullOrWhiteSpace(lineContent))
                            {
                                lineContent += " " + content;
                            }
                            else
                            {
                                lineContent = content;
                            }
                        }
                        else
                        {
                            if (!string.IsNullOrWhiteSpace(lineContent))
                            {
                                sb.AppendLine(lineContent);
                            }
                            lineContent = content;
                            oldY        = position.Y;
                        }
                    }
                }
                else if (statement.Trim().EndsWith("Tj"))
                {
                    string escapedContent;
                    escapedContent = statement.Trim();
                    escapedContent = escapedContent.Remove(escapedContent.Length - 2);
                    string content = PdfHexStringDataType.IsStartChar(escapedContent) ? PdfHexStringDataType.GetContent(escapedContent) : PdfStringDataType.GetContentFromEscapedContent(escapedContent);
                    content = content.Trim();
                    content = PdfFontHelper.ToUnicode(content, cMapToUnicode, encodingDifferenceToUnicode);
                    //line.Position = BaseTransformMatrix.TransformPoint(new Point(transformMatrix.TransformX(position.X, position.Y), transformMatrix.TransformY(position.X, position.Y) + line.FontHeight)).Rotate(pageRotation);
                    position = new Point(transformMatrix.TransformX(Point.Origin.X, Point.Origin.Y), transformMatrix.TransformY(Point.Origin.X, Point.Origin.Y));
                    if (Math.Abs(oldY - position.Y) < 1)
                    {
                        if (!string.IsNullOrWhiteSpace(lineContent))
                        {
                            lineContent += " " + content;
                        }
                        else
                        {
                            lineContent = content;
                        }
                    }
                    else
                    {
                        if (!string.IsNullOrWhiteSpace(lineContent))
                        {
                            sb.AppendLine(lineContent);
                        }
                        lineContent = content;
                        oldY        = position.Y;
                    }
                }


                statement = Statement.GetNextStatement(rawPdfContent, ref pointer);
            }

            if (!string.IsNullOrWhiteSpace(lineContent))
            {
                sb.Append(lineContent);
            }
            string textFromPage = sb.ToString();

            return(textFromPage);
        }
        internal static Type1Font CreateFontProgram(PdfDictionary fontDictionary, FontEncoding fontEncoding, CMapToUnicode
                                                    toUnicode)
        {
            PdfName baseFontName = fontDictionary.GetAsName(PdfName.BaseFont);
            String  baseFont;

            if (baseFontName != null)
            {
                baseFont = baseFontName.GetValue();
            }
            else
            {
                baseFont = FontUtil.CreateRandomFontName();
            }
            if (!fontDictionary.ContainsKey(PdfName.FontDescriptor))
            {
                Type1Font type1StdFont;
                try {
                    //if there are no font modifiers, cached font could be used,
                    //otherwise a new instance should be created.
                    type1StdFont = (Type1Font)FontProgramFactory.CreateFont(baseFont, true);
                }
                catch (Exception) {
                    type1StdFont = null;
                }
                if (type1StdFont != null)
                {
                    return(type1StdFont);
                }
            }
            iText.Kernel.Font.DocType1Font fontProgram = new iText.Kernel.Font.DocType1Font(baseFont);
            PdfDictionary fontDesc = fontDictionary.GetAsDictionary(PdfName.FontDescriptor);

            fontProgram.subtype = fontDesc.GetAsName(PdfName.Subtype);
            FillFontDescriptor(fontProgram, fontDesc);
            PdfNumber firstCharNumber = fontDictionary.GetAsNumber(PdfName.FirstChar);
            int       firstChar       = firstCharNumber != null?Math.Max(firstCharNumber.IntValue(), 0) : 0;

            int[] widths = FontUtil.ConvertSimpleWidthsArray(fontDictionary.GetAsArray(PdfName.Widths), firstChar, fontProgram
                                                             .GetMissingWidth());
            fontProgram.avgWidth = 0;
            int glyphsWithWidths = 0;

            for (int i = 0; i < 256; i++)
            {
                Glyph glyph = new Glyph(i, widths[i], fontEncoding.GetUnicode(i));
                fontProgram.codeToGlyph[i] = glyph;
                if (glyph.HasValidUnicode())
                {
                    //FontEncoding.codeToUnicode table has higher priority
                    if (fontEncoding.ConvertToByte(glyph.GetUnicode()) == i)
                    {
                        fontProgram.unicodeToGlyph[glyph.GetUnicode()] = glyph;
                    }
                }
                else
                {
                    if (toUnicode != null)
                    {
                        glyph.SetChars(toUnicode.Lookup(i));
                    }
                }
                if (widths[i] > 0)
                {
                    glyphsWithWidths++;
                    fontProgram.avgWidth += widths[i];
                }
            }
            if (glyphsWithWidths != 0)
            {
                fontProgram.avgWidth /= glyphsWithWidths;
            }
            return(fontProgram);
        }
Exemple #29
0
        private static void FillDifferences(iText.Kernel.Font.DocFontEncoding fontEncoding, PdfArray diffs, CMapToUnicode
                                            toUnicode)
        {
            IntHashtable byte2uni = toUnicode != null?toUnicode.CreateDirectMapping() : new IntHashtable();

            if (diffs != null)
            {
                int currentNumber = 0;
                for (int k = 0; k < diffs.Size(); ++k)
                {
                    PdfObject obj = diffs.Get(k);
                    if (obj.IsNumber())
                    {
                        currentNumber = ((PdfNumber)obj).IntValue();
                    }
                    else
                    {
                        if (currentNumber > 255)
                        {
                            ILog LOGGER = LogManager.GetLogger(typeof(iText.Kernel.Font.DocFontEncoding));
                            LOGGER.Warn(MessageFormatUtil.Format(iText.IO.LogMessageConstant.DOCFONT_HAS_ILLEGAL_DIFFERENCES, ((PdfName
                                                                                                                                )obj).GetValue()));
                        }
                        else
                        {
                            /* don't return or break, because differences subarrays may
                             * be in any order:
                             * e.g. [255 /space /one 250 /two /three]
                             * /one is invalid but all others should be parsed
                             */
                            String glyphName = ((PdfName)obj).GetValue();
                            int    unicode   = AdobeGlyphList.NameToUnicode(glyphName);
                            if (unicode != -1)
                            {
                                fontEncoding.codeToUnicode[currentNumber] = (int)unicode;
                                fontEncoding.unicodeToCode.Put((int)unicode, currentNumber);
                                fontEncoding.differences[currentNumber] = glyphName;
                                fontEncoding.unicodeDifferences.Put((int)unicode, (int)unicode);
                            }
                            else
                            {
                                if (byte2uni.ContainsKey(currentNumber))
                                {
                                    unicode = byte2uni.Get(currentNumber);
                                    fontEncoding.codeToUnicode[currentNumber] = (int)unicode;
                                    fontEncoding.unicodeToCode.Put((int)unicode, currentNumber);
                                    fontEncoding.differences[currentNumber] = glyphName;
                                    fontEncoding.unicodeDifferences.Put((int)unicode, (int)unicode);
                                }
                            }
                            currentNumber++;
                        }
                    }
                }
            }
        }
Exemple #30
0
        private static void FillDifferences(iText.Kernel.Font.DocFontEncoding fontEncoding, CMapToUnicode toUnicode
                                            )
        {
            IntHashtable byte2uni = toUnicode.CreateDirectMapping();

            foreach (int?code in byte2uni.GetKeys())
            {
                int    unicode   = byte2uni.Get((int)code);
                String glyphName = AdobeGlyphList.UnicodeToName(unicode);
                fontEncoding.codeToUnicode[(int)code] = unicode;
                fontEncoding.unicodeToCode.Put(unicode, (int)code);
                fontEncoding.differences[(int)code] = glyphName;
                fontEncoding.unicodeDifferences.Put(unicode, unicode);
            }
        }