/// <summary> /// This will parse a PDF HEX string with fail fast semantic meaning that we stop if a not allowed character is found. /// This is necessary in order to detect malformed input and be able to skip to next object start. /// We assume starting '<' was already read. /// </summary> private static CosString ParseHexString(IRandomAccessRead reader) { var sBuf = new StringBuilder(); while (true) { int c = reader.Read(); if (ReadHelper.IsHexDigit((char)c)) { sBuf.Append((char)c); } else if (c == '>') { break; } else if (c < 0) { throw new IOException("Missing closing bracket for hex string. Reached EOS."); } else if (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\b' || c == '\f') { } else { // if invalid chars was found: discard last // hex character if it is not part of a pair if (sBuf.Length % 2 != 0) { sBuf.Remove(sBuf.Length - 1, 1); } // read till the closing bracket was found do { c = reader.Read(); }while (c != '>' && c >= 0); // might have reached EOF while looking for the closing bracket // this can happen for malformed PDFs only. Make sure that there is // no endless loop. if (c < 0) { throw new IOException("Missing closing bracket for hex string. Reached EOS."); } // exit loop break; } } return(CosString.ParseHex(sBuf.ToString())); }
public CosName Parse([NotNull] IRandomAccessRead reader) { if (reader == null) { throw new ArgumentNullException(nameof(reader)); } ReadHelper.ReadExpectedChar(reader, '/'); using (var memoryStream = new MemoryStream()) using (var writer = new BinaryWriter(memoryStream)) { int c = reader.Read(); while (c != -1) { byte ch = (byte)c; if (ch == '#') { int ch1 = reader.Read(); int ch2 = reader.Read(); // Prior to PDF v1.2, the # was not a special character. Also, // it has been observed that various PDF tools do not follow the // spec with respect to the # escape, even though they report // PDF versions of 1.2 or later. The solution here is that we // interpret the # as an escape only when it is followed by two // valid hex digits. if (ReadHelper.IsHexDigit((char)ch1) && ReadHelper.IsHexDigit((char)ch2)) { string hex = "" + (char)ch1 + (char)ch2; try { var byteToWrite = (byte)Convert.ToInt32(hex, 16); writer.Write(byteToWrite); } catch (FormatException e) { throw new IOException("Error: expected hex digit, actual='" + hex + "'", e); } c = reader.Read(); } else { // check for premature EOF if (ch2 == -1 || ch1 == -1) { //LOG.error("Premature EOF in BaseParser#parseCosName"); c = -1; break; } reader.Unread(ch2); c = ch1; writer.Write(ch); } } else if (ReadHelper.IsEndOfName(ch)) { break; } else { writer.Write(ch); c = reader.Read(); } } if (c != -1) { reader.Unread(c); } byte[] bytes = memoryStream.ToArray(); var str = ReadHelper.IsValidUtf8(bytes) ? Encoding.UTF8.GetString(memoryStream.ToArray()) : Encoding.GetEncoding("windows-1252").GetString(memoryStream.ToArray()); return(CosName.Create(str)); } }