Exemplo n.º 1
0
        protected override void ReadContentFrom(BinaryReader input)
        {
            //https://www.microsoft.com/typography/otspec/cmap.htm
            long beginAt = input.BaseStream.Position;
            //
            ushort version = input.ReadUInt16(); // 0
            ushort tableCount = input.ReadUInt16();

            var entries = new CMapEntry[tableCount];
            for (int i = 0; i < tableCount; i++)
            {
                ushort platformId = input.ReadUInt16();
                ushort encodingId = input.ReadUInt16();
                uint offset = input.ReadUInt32();
                entries[i] = new CMapEntry(platformId, encodingId, offset);
            }

            charMaps = new CharacterMap[tableCount];
            for (int i = 0; i < tableCount; i++)
            {
                CMapEntry entry = entries[i];
                input.BaseStream.Seek(beginAt + entry.Offset, SeekOrigin.Begin);
                CharacterMap cmap = charMaps[i] = ReadCharacterMap(entry, input);
                cmap.PlatformId = entry.PlatformId;
                cmap.EncodingId = entry.EncodingId;
            }
        }
Exemplo n.º 2
0
        private static CharacterMap ReadCharacterMap(CMapEntry entry, BinaryReader input)
        {
            // I want to thank Microsoft for not giving a simple count on the glyphIdArray
            long tableStart = input.BaseStream.Position;

            var format = input.ReadUInt16();
            var length = input.ReadUInt16();
            if (format == 4)
            {
                var version = input.ReadUInt16();
                var segCountX2 = input.ReadUInt16();
                var searchRange = input.ReadUInt16();
                var entrySelector = input.ReadUInt16();
                var rangeShift = input.ReadUInt16();

                var segCount = segCountX2 / 2;

                var endCode = ReadUInt16Array(input, segCount); // last = 0xffff. What does that mean??

                input.ReadUInt16(); // Reserved = 0

                var startCode = ReadUInt16Array(input, segCount);
                var idDelta = ReadUInt16Array(input, segCount);
                var idRangeOffset = ReadUInt16Array(input, segCount);

                // I want to thank Microsoft for not giving a simple count on the glyphIdArray
                var glyphIdArrayLength = (int)((input.BaseStream.Position - tableStart) / sizeof(UInt16));
                var glyphIdArray = ReadUInt16Array(input, glyphIdArrayLength);

                return new CharacterMap(segCount, startCode, endCode, idDelta, idRangeOffset, glyphIdArray);
            }
            throw new ApplicationException("Unknown cmap subtable: " + format); // TODO: Replace all applicationexceptions
        }
Exemplo n.º 3
0
        protected override void ReadContentFrom(BinaryReader input)
        {
            //https://www.microsoft.com/typography/otspec/cmap.htm
            long beginAt = input.BaseStream.Position;
            //
            ushort version    = input.ReadUInt16(); // 0
            ushort tableCount = input.ReadUInt16();

            var entries = new CMapEntry[tableCount];

            for (int i = 0; i < tableCount; i++)
            {
                ushort platformId = input.ReadUInt16();
                ushort encodingId = input.ReadUInt16();
                uint   offset     = input.ReadUInt32();
                entries[i] = new CMapEntry(platformId, encodingId, offset);
            }

            charMaps = new CharacterMap[tableCount];
            for (int i = 0; i < tableCount; i++)
            {
                CMapEntry entry = entries[i];
                input.BaseStream.Seek(beginAt + entry.Offset, SeekOrigin.Begin);
                CharacterMap cmap = charMaps[i] = ReadCharacterMap(entry, input);
                cmap.PlatformId = entry.PlatformId;
                cmap.EncodingId = entry.EncodingId;
            }
        }
Exemplo n.º 4
0
        private static CharacterMap ReadCharacterMap(CMapEntry entry, BinaryReader input)
        {
            // I want to thank Microsoft for not giving a simple count on the glyphIdArray
            long tableStart = input.BaseStream.Position;

            var format = input.ReadUInt16();
            var length = input.ReadUInt16();

            long tableEndAt = tableStart + length;


            if (format == 4)
            {
                var version       = input.ReadUInt16();
                var segCountX2    = input.ReadUInt16();
                var searchRange   = input.ReadUInt16();
                var entrySelector = input.ReadUInt16();
                var rangeShift    = input.ReadUInt16();

                var segCount = segCountX2 / 2;

                var endCode = ReadUInt16Array(input, segCount); // last = 0xffff. What does that mean??

                input.ReadUInt16();                             // Reserved = 0

                var startCode     = ReadUInt16Array(input, segCount);
                var idDelta       = ReadUInt16Array(input, segCount);
                var idRangeOffset = ReadUInt16Array(input, segCount);

                // I want to thank Microsoft for not giving a simple count on the glyphIdArray
                //var glyphIdArrayLength = (int)((input.BaseStream.Position - tableStart) / sizeof(UInt16));
                //int glyphIdArrayLength = FindGlyphIdArrayLenInBytes(idRangeOffset) / 2;
                int glyphIdArrayLength = (int)(tableEndAt - input.BaseStream.Position) / 2;
                var glyphIdArray       = ReadUInt16Array(input, glyphIdArrayLength);

                return(new CharacterMap(segCount, startCode, endCode, idDelta, idRangeOffset, glyphIdArray));
            }
            else if (format == 0)
            {
                ushort   language            = input.ReadUInt16();
                byte[]   only256Glyphs       = input.ReadBytes(256);
                ushort[] only256UInt16Glyphs = new ushort[256];
                for (int i = 255; i >= 0; --i)
                {
                    //expand
                    only256UInt16Glyphs[i] = only256Glyphs[i];
                }
                //convert to format4 cmap table
                return(new CharacterMap(1, new ushort[] { 0 }, new ushort[] { 255 }, null, null, only256UInt16Glyphs));
            }
            throw new NRasterizerException("Unknown cmap subtable: " + format);
        }
Exemplo n.º 5
0
        static CharacterMap ReadCharacterMap(CMapEntry entry, BinaryReader input)
        {
            ushort format = input.ReadUInt16();

            switch (format)
            {
            default:
                throw new Exception("Unknown cmap subtable: " + format);     // TODO: Replace all application exceptions

            case 0: return(ReadFormat_0(input));

            case 2: return(ReadFormat_2(input));

            case 4: return(ReadFormat_4(input));

            case 6: return(ReadFormat_6(input));

            case 12: return(ReadFormat_12(input));
            }
        }
Exemplo n.º 6
0
        static CharacterMap ReadCharacterMap(CMapEntry entry, BinaryReader input)
        {
            ushort format = input.ReadUInt16();

            switch (format)
            {
            default:
                Utils.WarnUnimplemented("cmap subtable format {0}", format);
                return(new NullCharMap());

            case 0: return(ReadFormat_0(input));

            case 2: return(ReadFormat_2(input));

            case 4: return(ReadFormat_4(input));

            case 6: return(ReadFormat_6(input));

            case 12: return(ReadFormat_12(input));
            }
        }
Exemplo n.º 7
0
        static CharacterMap ReadCharacterMap(CMapEntry entry, BinaryReader input)
        {

            ushort format = input.ReadUInt16();
            ushort length = input.ReadUInt16();
            switch (format)
            {
                default:
                    {
                        throw new Exception("Unknown cmap subtable: " + format); // TODO: Replace all application exceptions
                    }
                case 0:
                    {
                        //Format 0: Byte encoding table
                        //This is the Apple standard character to glyph index mapping table.
                        //Type 	Name 	Description
                        //USHORT 	format 	Format number is set to 0.
                        //USHORT 	length 	This is the length in bytes of the subtable.
                        //USHORT 	language 	Please see “Note on the language field in 'cmap' subtables“ in this document.
                        //BYTE 	glyphIdArray[256] 	An array that maps character codes to glyph index values.
                        //This is a simple 1 to 1 mapping of character codes to glyph indices. 
                        //The glyph set is limited to 256. Note that if this format is used to index into a larger glyph set,
                        //only the first 256 glyphs will be accessible. 

                        ushort language = input.ReadUInt16(); 
                        byte[] only256Glyphs = input.ReadBytes(256);
                        ushort[] only256UInt16Glyphs = new ushort[256];
                        for (int i = 255; i >= 0; --i)
                        {
                            //expand
                            only256UInt16Glyphs[i] = only256Glyphs[i];
                        }
                        //convert to format4 cmap table
                        return new CharacterMap(1, new ushort[] { 0 }, new ushort[] { 255 }, null, null, only256UInt16Glyphs);
                    }
                case 4:
                    {
                        //This is the Microsoft standard character to glyph index mapping table for fonts that support Unicode ranges other than the range [U+D800 - U+DFFF] (defined as Surrogates Area, in Unicode v 3.0) 
                        //which is used for UCS-4 characters.
                        //If a font supports this character range (i.e. in turn supports the UCS-4 characters) a subtable in this format with a platform specific encoding ID 1 is yet needed,
                        //in addition to a subtable in format 12 with a platform specific encoding ID 10. Please see details on format 12 below, for fonts that support UCS-4 characters on Windows.
                        //  
                        //This format is used when the character codes for the characters represented by a font fall into several contiguous ranges, 
                        //possibly with holes in some or all of the ranges (that is, some of the codes in a range may not have a representation in the font). 
                        //The format-dependent data is divided into three parts, which must occur in the following order:
                        //    A four-word header gives parameters for an optimized search of the segment list;
                        //    Four parallel arrays describe the segments (one segment for each contiguous range of codes);
                        //    A variable-length array of glyph IDs (unsigned words).
                        long tableStartEndAt = input.BaseStream.Position + length;

                        ushort language = input.ReadUInt16();
                        //Note on the language field in 'cmap' subtables:
                        //Note on the language field in 'cmap' subtables: 
                        //The language field must be set to zero for all cmap subtables whose platform IDs are other than Macintosh (platform ID 1).
                        //For cmap subtables whose platform IDs are Macintosh, set this field to the Macintosh language ID of the cmap subtable plus one, 
                        //or to zero if the cmap subtable is not language-specific.
                        //For example, a Mac OS Turkish cmap subtable must set this field to 18, since the Macintosh language ID for Turkish is 17. 
                        //A Mac OS Roman cmap subtable must set this field to 0, since Mac OS Roman is not a language-specific encoding.

                        ushort segCountX2 = input.ReadUInt16(); //2 * segCount
                        ushort searchRange = input.ReadUInt16(); //2 * (2**FLOOR(log2(segCount)))
                        ushort entrySelector = input.ReadUInt16();//2 * (2**FLOOR(log2(segCount)))
                        ushort rangeShift = input.ReadUInt16(); //2 * (2**FLOOR(log2(segCount)))
                        int segCount = segCountX2 / 2;
                        ushort[] endCode = Utils.ReadUInt16Array(input, segCount);//Ending character code for each segment, last = 0xFFFF.            
                        //>To ensure that the search will terminate, the final endCode value must be 0xFFFF.
                        //>This segment need not contain any valid mappings. It can simply map the single character code 0xFFFF to the missing character glyph, glyph 0.

                        input.ReadUInt16(); // Reserved = 0               
                        ushort[] startCode = Utils.ReadUInt16Array(input, segCount); //Starting character code for each segment
                        ushort[] idDelta = Utils.ReadUInt16Array(input, segCount); //Delta for all character codes in segment
                        ushort[] idRangeOffset = Utils.ReadUInt16Array(input, segCount); //Offset in bytes to glyph indexArray, or 0   
                        //------------------------------------------------------------------------------------ 
                        long remainingLen = tableStartEndAt - input.BaseStream.Position;
                        int recordNum2 = (int)(remainingLen / 2);
                        ushort[] glyphIdArray = Utils.ReadUInt16Array(input, recordNum2);//Glyph index array 
                        return new CharacterMap(segCount, startCode, endCode, idDelta, idRangeOffset, glyphIdArray);
                    }
            }
        }
Exemplo n.º 8
0
        static CharacterMap ReadCharacterMap(CMapEntry entry, BinaryReader input)
        {
            ushort format = input.ReadUInt16();
            ushort length = input.ReadUInt16();

            switch (format)
            {
            default:
            {
                throw new Exception("Unknown cmap subtable: " + format);         // TODO: Replace all application exceptions
            }

            case 0:
            {
                //Format 0: Byte encoding table
                //This is the Apple standard character to glyph index mapping table.
                //Type  Name    Description
                //USHORT    format  Format number is set to 0.
                //USHORT    length  This is the length in bytes of the subtable.
                //USHORT    language    Please see “Note on the language field in 'cmap' subtables“ in this document.
                //BYTE  glyphIdArray[256]   An array that maps character codes to glyph index values.
                //This is a simple 1 to 1 mapping of character codes to glyph indices.
                //The glyph set is limited to 256. Note that if this format is used to index into a larger glyph set,
                //only the first 256 glyphs will be accessible.

                ushort   language            = input.ReadUInt16();
                byte[]   only256Glyphs       = input.ReadBytes(256);
                ushort[] only256UInt16Glyphs = new ushort[256];
                for (int i = 255; i >= 0; --i)
                {
                    //expand
                    only256UInt16Glyphs[i] = only256Glyphs[i];
                }
                //convert to format4 cmap table
                return(CharacterMap.BuildFromFormat4(1, new ushort[] { 0 }, new ushort[] { 255 }, null, null, only256UInt16Glyphs));
            }

            case 4:
            {
                //This is the Microsoft standard character to glyph index mapping table for fonts that support Unicode ranges other than the range [U+D800 - U+DFFF] (defined as Surrogates Area, in Unicode v 3.0)
                //which is used for UCS-4 characters.
                //If a font supports this character range (i.e. in turn supports the UCS-4 characters) a subtable in this format with a platform specific encoding ID 1 is yet needed,
                //in addition to a subtable in format 12 with a platform specific encoding ID 10. Please see details on format 12 below, for fonts that support UCS-4 characters on Windows.
                //
                //This format is used when the character codes for the characters represented by a font fall into several contiguous ranges,
                //possibly with holes in some or all of the ranges (that is, some of the codes in a range may not have a representation in the font).
                //The format-dependent data is divided into three parts, which must occur in the following order:
                //    A four-word header gives parameters for an optimized search of the segment list;
                //    Four parallel arrays describe the segments (one segment for each contiguous range of codes);
                //    A variable-length array of glyph IDs (unsigned words).
                long tableStartEndAt = input.BaseStream.Position + length;

                ushort language = input.ReadUInt16();
                //Note on the language field in 'cmap' subtables:
                //Note on the language field in 'cmap' subtables:
                //The language field must be set to zero for all cmap subtables whose platform IDs are other than Macintosh (platform ID 1).
                //For cmap subtables whose platform IDs are Macintosh, set this field to the Macintosh language ID of the cmap subtable plus one,
                //or to zero if the cmap subtable is not language-specific.
                //For example, a Mac OS Turkish cmap subtable must set this field to 18, since the Macintosh language ID for Turkish is 17.
                //A Mac OS Roman cmap subtable must set this field to 0, since Mac OS Roman is not a language-specific encoding.

                ushort   segCountX2    = input.ReadUInt16();                     //2 * segCount
                ushort   searchRange   = input.ReadUInt16();                     //2 * (2**FLOOR(log2(segCount)))
                ushort   entrySelector = input.ReadUInt16();                     //2 * (2**FLOOR(log2(segCount)))
                ushort   rangeShift    = input.ReadUInt16();                     //2 * (2**FLOOR(log2(segCount)))
                int      segCount      = segCountX2 / 2;
                ushort[] endCode       = Utils.ReadUInt16Array(input, segCount); //Ending character code for each segment, last = 0xFFFF.
                //>To ensure that the search will terminate, the final endCode value must be 0xFFFF.
                //>This segment need not contain any valid mappings. It can simply map the single character code 0xFFFF to the missing character glyph, glyph 0.

                input.ReadUInt16();                                              // Reserved = 0
                ushort[] startCode     = Utils.ReadUInt16Array(input, segCount); //Starting character code for each segment
                ushort[] idDelta       = Utils.ReadUInt16Array(input, segCount); //Delta for all character codes in segment
                ushort[] idRangeOffset = Utils.ReadUInt16Array(input, segCount); //Offset in bytes to glyph indexArray, or 0
                //------------------------------------------------------------------------------------
                long     remainingLen = tableStartEndAt - input.BaseStream.Position;
                int      recordNum2   = (int)(remainingLen / 2);
                ushort[] glyphIdArray = Utils.ReadUInt16Array(input, recordNum2);        //Glyph index array
                return(CharacterMap.BuildFromFormat4(segCount, startCode, endCode, idDelta, idRangeOffset, glyphIdArray));
            }

            case 6:
            {
                //Format 6: Trimmed table mapping
                //Type    Name Description
                //USHORT format  Format number is set to 6.
                //USHORT  length This is the length in bytes of the subtable.
                //USHORT language    Please see “Note on the language field in 'cmap' subtables“ in this document.
                //USHORT firstCode   First character code of subrange.
                //USHORT entryCount  Number of character codes in subrange.
                //USHORT glyphIdArray[entryCount]   Array of glyph index values for character codes in the range.

                //The firstCode and entryCount values specify a subrange(beginning at firstCode, length = entryCount) within the range of possible character codes.
                //Codes outside of this subrange are mapped to glyph index 0.
                //The offset of the code(from the first code) within this subrange is used as index to the glyphIdArray,
                //which provides the glyph index value.

                long     tableStartEndAt = input.BaseStream.Position + length;
                ushort   language        = input.ReadUInt16();
                ushort   firstCode       = input.ReadUInt16();
                ushort   entryCount      = input.ReadUInt16();
                ushort[] glyphIdArray    = Utils.ReadUInt16Array(input, entryCount);

                return(CharacterMap.BuildFromFormat6(firstCode, glyphIdArray));
            }
            }
        }