Beispiel #1
0
 private void ParseCIDRange(CMap codes, IList <object> operands)
 {
     for (int itemIndex = 0, itemCount = (int)operands[0]; itemIndex < itemCount; itemIndex++)
     {
         // 1. Beginning input code.
         MoveNext();
         byte[] beginInputCode = ParseInputCode();
         int    beginInput     = ConvertUtils.ByteArrayToInt(beginInputCode);
         // 2. Ending input code.
         MoveNext();
         byte[] endInputCode = ParseInputCode();
         int    entInput     = ConvertUtils.ByteArrayToInt(endInputCode);
         MoveNext();
         int mappedCode = ParseUnicode();
         // 3. Character codes.
         if (beginInputCode.Length <= 2 && endInputCode.Length <= 2)
         {
             // some CMaps are using CID ranges to map single values
             if (beginInput == entInput)
             {
                 codes.AddCIDMapping(mappedCode, beginInput);
             }
             else
             {
                 codes.AddCIDRange((char)beginInput, (char)entInput, mappedCode);
             }
         }
         else
         {
             // TODO Is this even possible?
             int endOfMappings = mappedCode + entInput - beginInput;
             while (mappedCode <= endOfMappings)
             {
                 int mappedCID = ConvertUtils.ByteArrayToInt(beginInputCode);
                 codes.AddCIDMapping(mappedCode++, mappedCID);
                 OperationUtils.Increment(beginInputCode);
             }
         }
     }
 }
Beispiel #2
0
        private void ParseBFRange(CMap codes, IList <object> operands)
        {
            //NOTE: The first and second elements in each line are the beginning and
            //ending valid input codes for the template font; the third element is
            //the beginning character code for the range.
            for (int itemIndex = 0, itemCount = (int)operands[0]; itemIndex < itemCount; itemIndex++)
            {
                // 1. Beginning input code.
                MoveNext();
                byte[] beginInputCode = ParseInputCode();
                int    beginInput     = ConvertUtils.ByteArrayToInt(beginInputCode);
                // 2. Ending input code.
                MoveNext();
                byte[] endInputCode = ParseInputCode();
                int    entInput     = ConvertUtils.ByteArrayToInt(endInputCode);


                MoveNext();
                switch (TokenType)
                {
                case TokenTypeEnum.ArrayBegin:
                {
                    byte[] inputCode = beginInputCode;
                    while (MoveNext() &&
                           TokenType != TokenTypeEnum.ArrayEnd)
                    {
                        // FIXME: Unicode character sequences (such as ligatures) have not been supported yet [BUG:72].
                        try
                        {
                            codes.AddCharMapping(inputCode, ParseUnicode());
                        }
                        catch (OverflowException)
                        { Debug.WriteLine($"WARN: Unable to process Unicode sequence from {codes.CMapName} CMap: {Token}"); }
                        OperationUtils.Increment(inputCode);
                    }
                    break;
                }

                default:
                {
                    var tokenBytes = ParseInputCode();
                    if (tokenBytes.Length > 0)
                    {
                        // some pdfs use the malformed bfrange <0000> <FFFF> <0000>. Add support by adding a identity
                        // mapping for the whole range instead of cutting it after 255 entries
                        // TODO find a more efficient method to represent all values for a identity mapping
                        if (tokenBytes.Length == 2 && beginInput == 0 && entInput == 0xffff &&
                            tokenBytes[0] == 0 && tokenBytes[1] == 0)
                        {
                            for (int i = 0; i < 256; i++)
                            {
                                beginInputCode[1] = (byte)i;
                                tokenBytes[1]     = (byte)i;
                                AddMappingFrombfrange(codes, beginInputCode, 0xff, tokenBytes);
                            }
                        }
                        else
                        {
                            // PDFBOX-4661: avoid overflow of the last byte, all following values are undefined
                            int values = Math.Min(entInput - beginInput,
                                                  255 - (tokenBytes[tokenBytes.Length - 1] & 0xFF)) + 1;
                            AddMappingFrombfrange(codes, beginInputCode, values, tokenBytes);
                        }
                    }
                    break;
                }
                }
            }
        }
Beispiel #3
0
        /**
         * <summary>Parses the character-code-to-unicode mapping [PDF:1.6:5.9.1].</summary>
         */
        public IDictionary <ByteArray, int> Parse(
            )
        {
            Stream.Position = 0;
            IDictionary <ByteArray, int> codes = new Dictionary <ByteArray, int>();

            {
                int itemCount = 0;
                while (MoveNext())
                {
                    switch (TokenType)
                    {
                    case TokenTypeEnum.Keyword:
                    {
                        string operator_ = (string)Token;
                        if (operator_.Equals(BeginBaseFontCharOperator) ||
                            operator_.Equals(BeginCIDCharOperator))
                        {
                            /*
                             * NOTE: The first element on each line is the input code of the template font;
                             * the second element is the code or name of the character.
                             */
                            for (
                                int itemIndex = 0;
                                itemIndex < itemCount;
                                itemIndex++
                                )
                            {
                                MoveNext();
                                ByteArray inputCode = new ByteArray(ParseInputCode());
                                MoveNext();
                                codes[inputCode] = ParseUnicode();
                            }
                        }
                        else if (operator_.Equals(BeginBaseFontRangeOperator) ||
                                 operator_.Equals(BeginCIDRangeOperator))
                        {
                            /*
                             * NOTE: The first and second elements in each line are the beginning and
                             * ending valid input codes for the template font; the third element is
                             * the beginning character code for the range.
                             */
                            for (
                                int itemIndex = 0;
                                itemIndex < itemCount;
                                itemIndex++
                                )
                            {
                                // 1. Beginning input code.
                                MoveNext();
                                byte[] beginInputCode = ParseInputCode();
                                // 2. Ending input code.
                                MoveNext();
                                byte[] endInputCode = ParseInputCode();
                                // 3. Character codes.
                                MoveNext();
                                switch (TokenType)
                                {
                                case TokenTypeEnum.ArrayBegin:
                                {
                                    byte[] inputCode = beginInputCode;
                                    while (MoveNext() &&
                                           TokenType != TokenTypeEnum.ArrayEnd)
                                    {
                                        codes[new ByteArray(inputCode)] = ParseUnicode();
                                        OperationUtils.Increment(inputCode);
                                    }
                                    break;
                                }

                                default:
                                {
                                    byte[] inputCode   = beginInputCode;
                                    int    charCode    = ParseUnicode();
                                    int    endCharCode = charCode + (ConvertUtils.ByteArrayToInt(endInputCode) - ConvertUtils.ByteArrayToInt(beginInputCode));
                                    while (true)
                                    {
                                        codes[new ByteArray(inputCode)] = charCode;
                                        if (charCode == endCharCode)
                                        {
                                            break;
                                        }

                                        OperationUtils.Increment(inputCode);
                                        charCode++;
                                    }
                                    break;
                                }
                                }
                            }
                        }
                        break;
                    }

                    case TokenTypeEnum.Integer:
                    {
                        itemCount = (int)Token;
                        break;
                    }
                    }
                }
            }
            return(codes);
        }
Beispiel #4
0
        /**
         * <summary>Parses the character-code-to-unicode mapping [PDF:1.6:5.9.1].</summary>
         */
        public IDictionary <ByteArray, int> Parse(
            )
        {
            Stream.Seek(0);
            IDictionary <ByteArray, int> codes = new Dictionary <ByteArray, int>();

            {
                IList <object> operands = new List <object>();
                string         cmapName = null;
                while (MoveNext())
                {
                    switch (TokenType)
                    {
                    case TokenTypeEnum.Keyword:
                    {
                        string @operator = (string)Token;
                        if (@operator.Equals(BeginBaseFontCharOperator) ||
                            @operator.Equals(BeginCIDCharOperator))
                        {
                            /*
                             * NOTE: The first element on each line is the input code of the template font;
                             * the second element is the code or name of the character.
                             */
                            for (int itemIndex = 0, itemCount = (int)operands[0]; itemIndex < itemCount; itemIndex++)
                            {
                                MoveNext();
                                ByteArray inputCode = new ByteArray(ParseInputCode());
                                MoveNext();
                                // FIXME: Unicode character sequences (such as ligatures) have not been supported yet [BUG:72].
                                try
                                {
                                    codes[inputCode] = ParseUnicode();
                                }
                                catch (OverflowException)
                                { Debug.WriteLine(String.Format("WARN: Unable to process Unicode sequence from {0} CMap: {1}", cmapName, Token)); }
                            }
                        }
                        else if (@operator.Equals(BeginBaseFontRangeOperator) ||
                                 @operator.Equals(BeginCIDRangeOperator))
                        {
                            /*
                             * NOTE: The first and second elements in each line are the beginning and
                             * ending valid input codes for the template font; the third element is
                             * the beginning character code for the range.
                             */
                            for (int itemIndex = 0, itemCount = (int)operands[0]; itemIndex < itemCount; itemIndex++)
                            {
                                // 1. Beginning input code.
                                MoveNext();
                                byte[] beginInputCode = ParseInputCode();
                                // 2. Ending input code.
                                MoveNext();
                                byte[] endInputCode = ParseInputCode();
                                // 3. Character codes.
                                MoveNext();
                                switch (TokenType)
                                {
                                case TokenTypeEnum.ArrayBegin:
                                {
                                    byte[] inputCode = beginInputCode;
                                    while (MoveNext() &&
                                           TokenType != TokenTypeEnum.ArrayEnd)
                                    {
                                        // FIXME: Unicode character sequences (such as ligatures) have not been supported yet [BUG:72].
                                        try
                                        {
                                            codes[new ByteArray(inputCode)] = ParseUnicode();
                                        }
                                        catch (OverflowException)
                                        { Debug.WriteLine(String.Format("WARN: Unable to process Unicode sequence from {0} CMap: {1}", cmapName, Token)); }
                                        OperationUtils.Increment(inputCode);
                                    }
                                    break;
                                }

                                default:
                                {
                                    byte[] inputCode   = beginInputCode;
                                    int    charCode    = ParseUnicode();
                                    int    endCharCode = charCode + (ConvertUtils.ByteArrayToInt(endInputCode) - ConvertUtils.ByteArrayToInt(beginInputCode));
                                    while (true)
                                    {
                                        codes[new ByteArray(inputCode)] = charCode;
                                        if (charCode == endCharCode)
                                        {
                                            break;
                                        }

                                        OperationUtils.Increment(inputCode);
                                        charCode++;
                                    }
                                    break;
                                }
                                }
                            }
                        }
                        else if (@operator.Equals(UseCMapOperator))
                        {
                            codes = CMap.Get((string)operands[0]);
                        }
                        else if (@operator.Equals(DefOperator) && operands.Count != 0)
                        {
                            if (CMapName.Equals(operands[0]))
                            {
                                cmapName = (string)operands[1];
                            }
                        }
                        operands.Clear();
                        break;
                    }

                    case TokenTypeEnum.ArrayBegin:
                    case TokenTypeEnum.DictionaryBegin:
                    {
                        // Skip.
                        while (MoveNext())
                        {
                            if (TokenType == TokenTypeEnum.ArrayEnd ||
                                TokenType == TokenTypeEnum.DictionaryEnd)
                            {
                                break;
                            }
                        }
                        break;
                    }

                    case TokenTypeEnum.Comment:
                        // Skip.
                        break;

                    default:
                    {
                        operands.Add(Token);
                        break;
                    }
                    }
                }
            }
            return(codes);
        }
Beispiel #5
0
        /**
         * <summary>Parses the character-code-to-unicode mapping [PDF:1.6:5.9.1].</summary>
         */
        public IDictionary <ByteArray, int> Parse(
            )
        {
            stream.Position = 0;
            IDictionary <ByteArray, int> codes = new Dictionary <ByteArray, int>();

            {
                int itemCount = 0;
                try
                {
                    while (MoveNext())
                    {
                        switch (tokenType)
                        {
                        case TokenTypeEnum.Keyword:
                        {
                            string operator_ = (String)token;
                            if (operator_.Equals(BeginBaseFontCharOperator) ||
                                operator_.Equals(BeginCIDCharOperator))
                            {
                                /*
                                 * NOTE: The first element on each line is the input code of the template font;
                                 * the second element is the code or name of the character.
                                 */
                                for (
                                    int itemIndex = 0;
                                    itemIndex < itemCount;
                                    itemIndex++
                                    )
                                {
                                    // 1. Input code.
                                    MoveNext();
                                    ByteArray inputCode = new ByteArray((byte[])token);
                                    // 2. Character...
                                    MoveNext();
                                    switch (tokenType)
                                    {
                                    case TokenTypeEnum.Hex: // ...code (hex).
                                        codes[inputCode] = ConvertUtils.ByteArrayToInt((byte[])token);
                                        break;

                                    case TokenTypeEnum.Integer: // ...code (plain).
                                        codes[inputCode] = (int)token;
                                        break;

                                    case TokenTypeEnum.Name: // ...name.
                                        codes[inputCode] = GlyphMapping.NameToCode((String)token);
                                        break;

                                    default:
                                        throw new Exception(
                                                  operator_ + " section syntax error: hex string, integer or name expected instead of " + tokenType
                                                  );
                                    }
                                }
                            }
                            else if (operator_.Equals(BeginBaseFontRangeOperator) ||
                                     operator_.Equals(BeginCIDRangeOperator))
                            {
                                /*
                                 * NOTE: The first and second elements in each line are the beginning and
                                 * ending valid input codes for the template font; the third element is
                                 * the beginning character code for the range.
                                 */
                                for (
                                    int itemIndex = 0;
                                    itemIndex < itemCount;
                                    itemIndex++
                                    )
                                {
                                    // 1. Beginning input code.
                                    MoveNext();
                                    byte[] beginInputCode = (byte[])token;
                                    // 2. Ending input code.
                                    MoveNext();
                                    byte[] endInputCode = (byte[])token;
                                    // 3. Character codes.
                                    MoveNext();
                                    switch (tokenType)
                                    {
                                    case TokenTypeEnum.Hex:
                                    case TokenTypeEnum.Integer:
                                    {
                                        byte[] inputCode = beginInputCode;
                                        int    charCode;
                                        switch (tokenType)
                                        {
                                        case TokenTypeEnum.Hex:
                                            charCode = ConvertUtils.ByteArrayToInt((byte[])token);
                                            break;

                                        case TokenTypeEnum.Integer:
                                            charCode = (int)token;
                                            break;

                                        default:
                                            throw new Exception(
                                                      operator_ + " section syntax error: hex string or integer expected instead of " + tokenType
                                                      );
                                        }
                                        int endCharCode = charCode + (ConvertUtils.ByteArrayToInt(endInputCode) - ConvertUtils.ByteArrayToInt(beginInputCode));
                                        while (true)
                                        {
                                            codes[new ByteArray(inputCode)] = charCode;
                                            if (charCode == endCharCode)
                                            {
                                                break;
                                            }

                                            OperationUtils.Increment(inputCode);
                                            charCode++;
                                        }
                                        break;
                                    }

                                    case TokenTypeEnum.ArrayBegin:
                                    {
                                        byte[] inputCode = beginInputCode;
                                        while (MoveNext() &&
                                               tokenType != TokenTypeEnum.ArrayEnd)
                                        {
                                            codes[new ByteArray(inputCode)] = GlyphMapping.NameToCode((String)token);
                                            OperationUtils.Increment(inputCode);
                                        }
                                        break;
                                    }

                                    default:
                                        throw new Exception(
                                                  operator_ + " section syntax error: hex string, integer or name array expected instead of " + tokenType
                                                  );
                                    }
                                }
                            }
                            break;
                        }

                        case TokenTypeEnum.Integer:
                        {
                            itemCount = (int)token;
                            break;
                        }
                        }
                    }
                }
                catch (FileFormatException fileFormatException)
                { throw new Exception("Failed character map parsing.", fileFormatException); }
            }
            return(codes);
        }