/** * <summary>Loads format-4 cmap subtable (Segment mapping to delta values, i.e. Microsoft standard * character to glyph index mapping table for fonts that support Unicode ranges other than the * range [U+D800 - U+DFFF] (defined as Surrogates Area, in Unicode v 3.0)).</summary> */ private void LoadCMapFormat4( ) { /* * NOTE: This format is used when the character codes for the characters represented by a font * fall into several contiguous ranges, possibly with holes in some or all of the ranges (i.e. * some of the codes in a range may not have a representation in the font). * The format-dependent data is divided into three parts, which must occur in the following * order: * 1. A header gives parameters for an optimized search of the segment list; * 2. Four parallel arrays (end characters, start characters, deltas and range offsets) * describe the segments (one segment for each contiguous range of codes); * 3. A variable-length array of glyph IDs. */ Symbolic = false; // 1. Header. // Get the table length! int tableLength = FontData.ReadUnsignedShort(); // USHORT. // Skip to the segment count! FontData.Skip(2); // Get the segment count! int segmentCount = FontData.ReadUnsignedShort() / 2; // 2. Arrays describing the segments. // Skip to the array of end character code for each segment! FontData.Skip(6); // End character code for each segment. int[] endCodes = new int[segmentCount]; // USHORT. for ( int index = 0; index < segmentCount; index++ ) { endCodes[index] = FontData.ReadUnsignedShort(); } // Skip to the array of start character code for each segment! FontData.Skip(2); // Start character code for each segment. int[] startCodes = new int[segmentCount]; // USHORT. for ( int index = 0; index < segmentCount; index++ ) { startCodes[index] = FontData.ReadUnsignedShort(); } // Delta for all character codes in segment. short[] deltas = new short[segmentCount]; for ( int index = 0; index < segmentCount; index++ ) { deltas[index] = FontData.ReadShort(); } // Offsets into glyph index array. int[] rangeOffsets = new int[segmentCount]; // USHORT. for ( int index = 0; index < segmentCount; index++ ) { rangeOffsets[index] = FontData.ReadUnsignedShort(); } // 3. Glyph ID array. /* * NOTE: There's no explicit field defining the array length; * it must be inferred from the space left by the known fields. */ int glyphIndexCount = tableLength / 2 // Number of 16-bit words inside the table. - 8 // Number of single-word header fields (8 fields: format, length, language, segCountX2, searchRange, entrySelector, rangeShift, reservedPad). - segmentCount * 4; // Number of single-word items in the arrays describing the segments (4 arrays of segmentCount items). int[] glyphIds = new int[glyphIndexCount]; // USHORT. for ( int index = 0; index < glyphIds.Length; index++ ) { glyphIds[index] = FontData.ReadUnsignedShort(); } GlyphIndexes = new Dictionary <int, int>(glyphIndexCount); // Iterating through the segments... for ( int segmentIndex = 0; segmentIndex < segmentCount; segmentIndex++ ) { int endCode = endCodes[segmentIndex]; // Is it NOT the last end character code? /* * NOTE: The final segment's endCode MUST be 0xFFFF. This segment need not (but MAY) * contain any valid mappings (it can just map the single character code 0xFFFF to * missing glyph). However, the segment MUST be present. */ if (endCode < 0xFFFF) { endCode++; } // Iterating inside the current segment... for ( int code = startCodes[segmentIndex]; code < endCode; code++ ) { int glyphIndex; // Doesn't the mapping of character codes rely on glyph ID? if (rangeOffsets[segmentIndex] == 0) // No glyph-ID reliance. { /* * NOTE: If the range offset is 0, the delta value is added directly to the character * code to get the corresponding glyph index. The delta arithmetic is modulo 65536. */ glyphIndex = (code + deltas[segmentIndex]) & 0xFFFF; } else // Glyph-ID reliance. { /* * NOTE: If the range offset is NOT 0, the mapping of character codes relies on glyph ID. * The character code offset from start code is added to the range offset. This sum is * used as an offset from the current location within range offset itself to index out * the correct glyph ID. This obscure indexing trick (sic!) works because glyph ID * immediately follows range offset in the font file. The C expression that yields the * address to the glyph ID is: *(rangeOffsets[segmentIndex]/2 + (code - startCodes[segmentIndex]) + &idRangeOffset[segmentIndex]) + As safe C# semantics don't deal directly with pointers, we have to further + exploit such a trick reasoning with 16-bit displacements in order to yield an index + instead of an address (sooo-good!). */ // Retrieve the glyph index! int glyphIdIndex = rangeOffsets[segmentIndex] / 2 // 16-bit word range offset. + (code - startCodes[segmentIndex]) // Character code offset from start code. - (segmentCount - segmentIndex); // Physical offset between the offsets into glyph index array and the glyph index array. /* * NOTE: The delta value is added to the glyph ID to get the corresponding glyph index. * The delta arithmetic is modulo 65536. */ glyphIndex = (glyphIds[glyphIdIndex] + deltas[segmentIndex]) & 0xFFFF; } GlyphIndexes[ code // Character code. ] = glyphIndex; // Glyph index. } } }