private void ParseCIDRange(CMap codes, IList <object> operands) { for (int itemIndex = 0, itemCount = (int)operands[0]; itemIndex < itemCount; itemIndex++) { // 1. Beginning input code. MoveNext(); byte[] beginInputCode = ParseInputCode(); int beginInput = ConvertUtils.ByteArrayToInt(beginInputCode); // 2. Ending input code. MoveNext(); byte[] endInputCode = ParseInputCode(); int entInput = ConvertUtils.ByteArrayToInt(endInputCode); MoveNext(); int mappedCode = ParseUnicode(); // 3. Character codes. if (beginInputCode.Length <= 2 && endInputCode.Length <= 2) { // some CMaps are using CID ranges to map single values if (beginInput == entInput) { codes.AddCIDMapping(mappedCode, beginInput); } else { codes.AddCIDRange((char)beginInput, (char)entInput, mappedCode); } } else { // TODO Is this even possible? int endOfMappings = mappedCode + entInput - beginInput; while (mappedCode <= endOfMappings) { int mappedCID = ConvertUtils.ByteArrayToInt(beginInputCode); codes.AddCIDMapping(mappedCode++, mappedCID); OperationUtils.Increment(beginInputCode); } } } }
private void ParseBFRange(CMap codes, IList <object> operands) { //NOTE: The first and second elements in each line are the beginning and //ending valid input codes for the template font; the third element is //the beginning character code for the range. for (int itemIndex = 0, itemCount = (int)operands[0]; itemIndex < itemCount; itemIndex++) { // 1. Beginning input code. MoveNext(); byte[] beginInputCode = ParseInputCode(); int beginInput = ConvertUtils.ByteArrayToInt(beginInputCode); // 2. Ending input code. MoveNext(); byte[] endInputCode = ParseInputCode(); int entInput = ConvertUtils.ByteArrayToInt(endInputCode); MoveNext(); switch (TokenType) { case TokenTypeEnum.ArrayBegin: { byte[] inputCode = beginInputCode; while (MoveNext() && TokenType != TokenTypeEnum.ArrayEnd) { // FIXME: Unicode character sequences (such as ligatures) have not been supported yet [BUG:72]. try { codes.AddCharMapping(inputCode, ParseUnicode()); } catch (OverflowException) { Debug.WriteLine($"WARN: Unable to process Unicode sequence from {codes.CMapName} CMap: {Token}"); } OperationUtils.Increment(inputCode); } break; } default: { var tokenBytes = ParseInputCode(); if (tokenBytes.Length > 0) { // some pdfs use the malformed bfrange <0000> <FFFF> <0000>. Add support by adding a identity // mapping for the whole range instead of cutting it after 255 entries // TODO find a more efficient method to represent all values for a identity mapping if (tokenBytes.Length == 2 && beginInput == 0 && entInput == 0xffff && tokenBytes[0] == 0 && tokenBytes[1] == 0) { for (int i = 0; i < 256; i++) { beginInputCode[1] = (byte)i; tokenBytes[1] = (byte)i; AddMappingFrombfrange(codes, beginInputCode, 0xff, tokenBytes); } } else { // PDFBOX-4661: avoid overflow of the last byte, all following values are undefined int values = Math.Min(entInput - beginInput, 255 - (tokenBytes[tokenBytes.Length - 1] & 0xFF)) + 1; AddMappingFrombfrange(codes, beginInputCode, values, tokenBytes); } } break; } } } }
/** * <summary>Parses the character-code-to-unicode mapping [PDF:1.6:5.9.1].</summary> */ public IDictionary <ByteArray, int> Parse( ) { Stream.Position = 0; IDictionary <ByteArray, int> codes = new Dictionary <ByteArray, int>(); { int itemCount = 0; while (MoveNext()) { switch (TokenType) { case TokenTypeEnum.Keyword: { string operator_ = (string)Token; if (operator_.Equals(BeginBaseFontCharOperator) || operator_.Equals(BeginCIDCharOperator)) { /* * NOTE: The first element on each line is the input code of the template font; * the second element is the code or name of the character. */ for ( int itemIndex = 0; itemIndex < itemCount; itemIndex++ ) { MoveNext(); ByteArray inputCode = new ByteArray(ParseInputCode()); MoveNext(); codes[inputCode] = ParseUnicode(); } } else if (operator_.Equals(BeginBaseFontRangeOperator) || operator_.Equals(BeginCIDRangeOperator)) { /* * NOTE: The first and second elements in each line are the beginning and * ending valid input codes for the template font; the third element is * the beginning character code for the range. */ for ( int itemIndex = 0; itemIndex < itemCount; itemIndex++ ) { // 1. Beginning input code. MoveNext(); byte[] beginInputCode = ParseInputCode(); // 2. Ending input code. MoveNext(); byte[] endInputCode = ParseInputCode(); // 3. Character codes. MoveNext(); switch (TokenType) { case TokenTypeEnum.ArrayBegin: { byte[] inputCode = beginInputCode; while (MoveNext() && TokenType != TokenTypeEnum.ArrayEnd) { codes[new ByteArray(inputCode)] = ParseUnicode(); OperationUtils.Increment(inputCode); } break; } default: { byte[] inputCode = beginInputCode; int charCode = ParseUnicode(); int endCharCode = charCode + (ConvertUtils.ByteArrayToInt(endInputCode) - ConvertUtils.ByteArrayToInt(beginInputCode)); while (true) { codes[new ByteArray(inputCode)] = charCode; if (charCode == endCharCode) { break; } OperationUtils.Increment(inputCode); charCode++; } break; } } } } break; } case TokenTypeEnum.Integer: { itemCount = (int)Token; break; } } } } return(codes); }
/** * <summary>Parses the character-code-to-unicode mapping [PDF:1.6:5.9.1].</summary> */ public IDictionary <ByteArray, int> Parse( ) { Stream.Seek(0); IDictionary <ByteArray, int> codes = new Dictionary <ByteArray, int>(); { IList <object> operands = new List <object>(); string cmapName = null; while (MoveNext()) { switch (TokenType) { case TokenTypeEnum.Keyword: { string @operator = (string)Token; if (@operator.Equals(BeginBaseFontCharOperator) || @operator.Equals(BeginCIDCharOperator)) { /* * NOTE: The first element on each line is the input code of the template font; * the second element is the code or name of the character. */ for (int itemIndex = 0, itemCount = (int)operands[0]; itemIndex < itemCount; itemIndex++) { MoveNext(); ByteArray inputCode = new ByteArray(ParseInputCode()); MoveNext(); // FIXME: Unicode character sequences (such as ligatures) have not been supported yet [BUG:72]. try { codes[inputCode] = ParseUnicode(); } catch (OverflowException) { Debug.WriteLine(String.Format("WARN: Unable to process Unicode sequence from {0} CMap: {1}", cmapName, Token)); } } } else if (@operator.Equals(BeginBaseFontRangeOperator) || @operator.Equals(BeginCIDRangeOperator)) { /* * NOTE: The first and second elements in each line are the beginning and * ending valid input codes for the template font; the third element is * the beginning character code for the range. */ for (int itemIndex = 0, itemCount = (int)operands[0]; itemIndex < itemCount; itemIndex++) { // 1. Beginning input code. MoveNext(); byte[] beginInputCode = ParseInputCode(); // 2. Ending input code. MoveNext(); byte[] endInputCode = ParseInputCode(); // 3. Character codes. MoveNext(); switch (TokenType) { case TokenTypeEnum.ArrayBegin: { byte[] inputCode = beginInputCode; while (MoveNext() && TokenType != TokenTypeEnum.ArrayEnd) { // FIXME: Unicode character sequences (such as ligatures) have not been supported yet [BUG:72]. try { codes[new ByteArray(inputCode)] = ParseUnicode(); } catch (OverflowException) { Debug.WriteLine(String.Format("WARN: Unable to process Unicode sequence from {0} CMap: {1}", cmapName, Token)); } OperationUtils.Increment(inputCode); } break; } default: { byte[] inputCode = beginInputCode; int charCode = ParseUnicode(); int endCharCode = charCode + (ConvertUtils.ByteArrayToInt(endInputCode) - ConvertUtils.ByteArrayToInt(beginInputCode)); while (true) { codes[new ByteArray(inputCode)] = charCode; if (charCode == endCharCode) { break; } OperationUtils.Increment(inputCode); charCode++; } break; } } } } else if (@operator.Equals(UseCMapOperator)) { codes = CMap.Get((string)operands[0]); } else if (@operator.Equals(DefOperator) && operands.Count != 0) { if (CMapName.Equals(operands[0])) { cmapName = (string)operands[1]; } } operands.Clear(); break; } case TokenTypeEnum.ArrayBegin: case TokenTypeEnum.DictionaryBegin: { // Skip. while (MoveNext()) { if (TokenType == TokenTypeEnum.ArrayEnd || TokenType == TokenTypeEnum.DictionaryEnd) { break; } } break; } case TokenTypeEnum.Comment: // Skip. break; default: { operands.Add(Token); break; } } } } return(codes); }
/** * <summary>Parses the character-code-to-unicode mapping [PDF:1.6:5.9.1].</summary> */ public IDictionary <ByteArray, int> Parse( ) { stream.Position = 0; IDictionary <ByteArray, int> codes = new Dictionary <ByteArray, int>(); { int itemCount = 0; try { while (MoveNext()) { switch (tokenType) { case TokenTypeEnum.Keyword: { string operator_ = (String)token; if (operator_.Equals(BeginBaseFontCharOperator) || operator_.Equals(BeginCIDCharOperator)) { /* * NOTE: The first element on each line is the input code of the template font; * the second element is the code or name of the character. */ for ( int itemIndex = 0; itemIndex < itemCount; itemIndex++ ) { // 1. Input code. MoveNext(); ByteArray inputCode = new ByteArray((byte[])token); // 2. Character... MoveNext(); switch (tokenType) { case TokenTypeEnum.Hex: // ...code (hex). codes[inputCode] = ConvertUtils.ByteArrayToInt((byte[])token); break; case TokenTypeEnum.Integer: // ...code (plain). codes[inputCode] = (int)token; break; case TokenTypeEnum.Name: // ...name. codes[inputCode] = GlyphMapping.NameToCode((String)token); break; default: throw new Exception( operator_ + " section syntax error: hex string, integer or name expected instead of " + tokenType ); } } } else if (operator_.Equals(BeginBaseFontRangeOperator) || operator_.Equals(BeginCIDRangeOperator)) { /* * NOTE: The first and second elements in each line are the beginning and * ending valid input codes for the template font; the third element is * the beginning character code for the range. */ for ( int itemIndex = 0; itemIndex < itemCount; itemIndex++ ) { // 1. Beginning input code. MoveNext(); byte[] beginInputCode = (byte[])token; // 2. Ending input code. MoveNext(); byte[] endInputCode = (byte[])token; // 3. Character codes. MoveNext(); switch (tokenType) { case TokenTypeEnum.Hex: case TokenTypeEnum.Integer: { byte[] inputCode = beginInputCode; int charCode; switch (tokenType) { case TokenTypeEnum.Hex: charCode = ConvertUtils.ByteArrayToInt((byte[])token); break; case TokenTypeEnum.Integer: charCode = (int)token; break; default: throw new Exception( operator_ + " section syntax error: hex string or integer expected instead of " + tokenType ); } int endCharCode = charCode + (ConvertUtils.ByteArrayToInt(endInputCode) - ConvertUtils.ByteArrayToInt(beginInputCode)); while (true) { codes[new ByteArray(inputCode)] = charCode; if (charCode == endCharCode) { break; } OperationUtils.Increment(inputCode); charCode++; } break; } case TokenTypeEnum.ArrayBegin: { byte[] inputCode = beginInputCode; while (MoveNext() && tokenType != TokenTypeEnum.ArrayEnd) { codes[new ByteArray(inputCode)] = GlyphMapping.NameToCode((String)token); OperationUtils.Increment(inputCode); } break; } default: throw new Exception( operator_ + " section syntax error: hex string, integer or name array expected instead of " + tokenType ); } } } break; } case TokenTypeEnum.Integer: { itemCount = (int)token; break; } } } } catch (FileFormatException fileFormatException) { throw new Exception("Failed character map parsing.", fileFormatException); } } return(codes); }