private static PdfDocument OpenDocument(IRandomAccessRead reader, IInputBytes inputBytes, ISeekableTokenScanner scanner, IContainer container, bool isLenientParsing) { var log = container.Get <ILog>(); var version = container.Get <FileHeaderParser>().Parse(scanner, isLenientParsing); var crossReferenceOffset = container.Get <FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing); var pool = new CosObjectPool(); // TODO: make this use the scanner. var validator = new CrossReferenceOffsetValidator(new XrefOffsetValidator(log, reader, container.Get <CosDictionaryParser>(), container.Get <CosBaseParser>(), pool)); crossReferenceOffset = validator.Validate(crossReferenceOffset, isLenientParsing); var crossReferenceTable = container.Get <CrossReferenceParser>() .Parse(reader, isLenientParsing, crossReferenceOffset, pool); container.Get <CrossReferenceParser>().ParseNew(crossReferenceOffset, scanner, isLenientParsing); var filterProvider = container.Get <IFilterProvider>(); var bruteForceSearcher = new BruteForceSearcher(reader); var pdfObjectParser = new PdfObjectParser(container.Get <ILog>(), container.Get <CosBaseParser>(), container.Get <CosStreamParser>(), crossReferenceTable, bruteForceSearcher, pool, container.Get <ObjectStreamParser>()); var trueTypeFontParser = new TrueTypeFontParser(); var fontDescriptorFactory = new FontDescriptorFactory(); var cidFontFactory = new CidFontFactory(fontDescriptorFactory, trueTypeFontParser, pdfObjectParser, filterProvider); var cMapCache = new CMapCache(new CMapParser()); var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory, cMapCache, filterProvider, pdfObjectParser), new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser)); var dynamicParser = container.Get <DynamicParser>(); var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory); var pageFactory = new PageFactory(resourceContainer, pdfObjectParser, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory())); var informationFactory = new DocumentInformationFactory(); var catalogFactory = new CatalogFactory(pdfObjectParser); var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool, isLenientParsing); if (!(root is PdfDictionary rootDictionary)) { throw new InvalidOperationException("Expected root dictionary, but got this: " + root); } // in some pdfs the type value "Catalog" is missing in the root object if (isLenientParsing && !rootDictionary.ContainsKey(CosName.TYPE)) { rootDictionary.Set(CosName.TYPE, CosName.CATALOG); } var information = informationFactory.Create(pdfObjectParser, crossReferenceTable.Dictionary, reader, isLenientParsing); var catalog = catalogFactory.Create(rootDictionary, reader, isLenientParsing); var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer); return(new PdfDocument(log, reader, version, crossReferenceTable, isLenientParsing, caching, pageFactory, pdfObjectParser, catalog, information)); }
private long CalculateXRefFixedOffset(long objectOffset, ISeekableTokenScanner scanner, IInputBytes inputBytes) { if (objectOffset < 0) { log.Error($"Invalid object offset {objectOffset} when searching for a xref table/stream"); return(0); } // start a brute force search for all xref tables and try to find the offset we are looking for var newOffset = BruteForceSearchForXref(objectOffset, scanner, inputBytes); if (newOffset > -1) { log.Debug($"Fixed reference for xref table/stream {objectOffset} -> {newOffset}"); return(newOffset); } log.Error($"Can\'t find the object xref table/stream at offset {objectOffset}"); return(0); }
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { token = null; if (inputBytes == null) { return(false); } if (currentByte != '(') { return(false); } var builder = stringBuilder; var numberOfBrackets = 1; var isEscapeActive = false; var isLineBreaking = false; var octalModeActive = false; short[] octal = { 0, 0, 0 }; var octalsRead = 0; while (inputBytes.MoveNext()) { var b = inputBytes.CurrentByte; var c = (char)b; if (octalModeActive) { var nextCharacterOctal = c >= '0' && c <= '7'; if (nextCharacterOctal) { // left shift the octals. LeftShiftOctal(c, octalsRead, octal); octalsRead++; } if (octalsRead == 3 || !nextCharacterOctal) { var characterCode = OctalHelpers.FromOctalDigits(octal); // For now :( // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers builder.Append((char)characterCode); octal[0] = 0; octal[1] = 0; octal[2] = 0; octalsRead = 0; octalModeActive = false; } if (nextCharacterOctal) { continue; } } switch (c) { case ')': isLineBreaking = false; if (!isEscapeActive) { numberOfBrackets--; } isEscapeActive = false; if (numberOfBrackets > 0) { builder.Append(c); } // TODO: Check for other ends of string where the string is improperly formatted. See commented method numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes); break; case '(': isLineBreaking = false; if (!isEscapeActive) { numberOfBrackets++; } isEscapeActive = false; builder.Append(c); break; // Escape case '\\': isLineBreaking = false; // Escaped backslash if (isEscapeActive) { builder.Append(c); isEscapeActive = false; } else { isEscapeActive = true; } break; default: if (isLineBreaking) { if (ReadHelper.IsEndOfLine(c)) { continue; } isLineBreaking = false; builder.Append(c); } else if (isEscapeActive) { ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); isEscapeActive = false; } else { builder.Append(c); } break; } if (numberOfBrackets <= 0) { break; } } StringToken.Encoding encodedWith; string tokenStr; if (builder.Length >= 2) { if (builder[0] == 0xFE && builder[1] == 0xFF) { var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1); encodedWith = StringToken.Encoding.Utf16BE; } else if (builder[0] == 0xFF && builder[1] == 0xFE) { var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1); encodedWith = StringToken.Encoding.Utf16; } else { tokenStr = builder.ToString(); encodedWith = StringToken.Encoding.Iso88591; } } else { tokenStr = builder.ToString(); encodedWith = StringToken.Encoding.Iso88591; } builder.Clear(); token = new StringToken(tokenStr, encodedWith); return(true); }
public Type1Font Parse(IInputBytes inputBytes) { var scanner = new CoreTokenScanner(inputBytes); if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!")) { throw new InvalidFontFormatException("The Type1 program did not start with '%!'."); } string name; var parts = comment.Data.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length == 3) { name = parts[1]; } else { name = "Unknown"; } var comments = new List <string>(); while (scanner.MoveNext() && scanner.CurrentToken is CommentToken commentToken) { comments.Add(commentToken.Data); } var dictionaries = new List <DictionaryToken>(); // Override arrays and names since type 1 handles these differently. var arrayTokenizer = new Type1ArrayTokenizer(); var nameTokenizer = new Type1NameTokenizer(); scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer); scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer); try { var tokenSet = new PreviousTokenSet(); tokenSet.Add(scanner.CurrentToken); while (scanner.MoveNext()) { if (scanner.CurrentToken is OperatorToken operatorToken) { HandleOperator(operatorToken, inputBytes, scanner, tokenSet, dictionaries); } tokenSet.Add(scanner.CurrentToken); } } finally { scanner.DeregisterCustomTokenizer(arrayTokenizer); scanner.DeregisterCustomTokenizer(nameTokenizer); } var encoding = GetEncoding(dictionaries); var matrix = GetFontMatrix(dictionaries); var boundingBox = GetBoundingBox(dictionaries); return(new Type1Font(name, encoding, matrix, boundingBox)); }
private void BfSearchForXRefStreams(IInputBytes bytes) { if (bfSearchXRefStreamsOffsets != null) { return; } // a pdf may contain more than one /XRef entry bfSearchXRefStreamsOffsets = new List <long>(); var startOffset = bytes.CurrentOffset; bytes.Seek(MinimumSearchOffset); // search for XRef streams var objString = " obj"; while (bytes.MoveNext() && !bytes.IsAtEnd()) { if (!ReadHelper.IsString(bytes, "xref")) { continue; } // search backwards for the beginning of the stream long newOffset = -1; long xrefOffset = bytes.CurrentOffset; bool objFound = false; for (var i = 1; i < 40; i++) { if (objFound) { break; } long currentOffset = xrefOffset - (i * 10); if (currentOffset > 0) { bytes.Seek(currentOffset); for (int j = 0; j < 10; j++) { if (ReadHelper.IsString(bytes, objString)) { long tempOffset = currentOffset - 1; bytes.Seek(tempOffset); var generationNumber = bytes.Peek(); // is the next char a digit? if (generationNumber.HasValue && ReadHelper.IsDigit(generationNumber.Value)) { tempOffset--; bytes.Seek(tempOffset); // is the digit preceded by a space? if (ReadHelper.IsSpace(bytes.CurrentByte)) { int length = 0; bytes.Seek(--tempOffset); while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(bytes.CurrentByte)) { bytes.Seek(--tempOffset); length++; } if (length > 0) { bytes.MoveNext(); newOffset = bytes.CurrentOffset; } } } objFound = true; break; } currentOffset++; bytes.MoveNext(); } } } if (newOffset > -1) { bfSearchXRefStreamsOffsets.Add(newOffset); } bytes.Seek(xrefOffset + 5); } bytes.Seek(startOffset); }
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { var builder = new StringBuilder(); token = null; if (inputBytes == null) { return(false); } if (currentByte != '(') { return(false); } int numberOfBrackets = 1; bool isEscapeActive = false; bool isLineBreaking = false; bool octalModeActive = false; short[] octal = { 0, 0, 0 }; int octalsRead = 0; while (inputBytes.MoveNext()) { var b = inputBytes.CurrentByte; var c = (char)b; if (octalModeActive) { var nextCharacterOctal = c >= '0' && c <= '7'; if (nextCharacterOctal) { // left shift the octals. LeftShiftOctal(c, octalsRead, octal); octalsRead++; } if (octalsRead == 3 || !nextCharacterOctal) { var characterCode = OctalHelpers.FromOctalDigits(octal); // For now :( // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers builder.Append((char)characterCode); octal[0] = 0; octal[1] = 0; octal[2] = 0; octalsRead = 0; octalModeActive = false; } if (nextCharacterOctal) { continue; } } switch (c) { case ')': isLineBreaking = false; if (!isEscapeActive) { numberOfBrackets--; } isEscapeActive = false; if (numberOfBrackets > 0) { builder.Append(c); } // TODO: Check for other ends of string where the string is improperly formatted. See commented method // numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets); break; case '(': isLineBreaking = false; if (!isEscapeActive) { numberOfBrackets++; } isEscapeActive = false; builder.Append(c); break; // Escape case '\\': isLineBreaking = false; // Escaped backslash if (isEscapeActive) { builder.Append(c); } else { isEscapeActive = true; } break; default: if (isLineBreaking) { if (ReadHelper.IsEndOfLine(c)) { continue; } isLineBreaking = false; builder.Append(c); } else if (isEscapeActive) { ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); isEscapeActive = false; } else { builder.Append(c); } break; } if (numberOfBrackets <= 0) { break; } } token = new StringToken(builder.ToString()); return(true); }
public void ShowText(IInputBytes bytes) { var currentState = GetCurrentState(); var font = currentState.FontState.FromExtendedGraphicsState ? activeExtendedGraphicsStateFont : resourceStore.GetFont(currentState.FontState.FontName); if (font == null) { throw new InvalidOperationException($"Could not find the font with name {currentState.FontState.FontName} in the resource store. It has not been loaded yet."); } var fontSize = currentState.FontState.FontSize; var horizontalScaling = currentState.FontState.HorizontalScaling / 100.0; var characterSpacing = currentState.FontState.CharacterSpacing; var rise = currentState.FontState.Rise; var transformationMatrix = currentState.CurrentTransformationMatrix; var renderingMatrix = TransformationMatrix.FromValues(fontSize * horizontalScaling, 0, 0, fontSize, 0, rise); // TODO: this does not seem correct, produces the correct result for now but we need to revisit. // see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points var pointSize = Math.Round(rotation.Rotate(transformationMatrix).Multiply(TextMatrices.TextMatrix).Multiply(fontSize).A, 2); if (pointSize < 0) { pointSize *= -1; } while (bytes.MoveNext()) { var code = font.ReadCharacterCode(bytes, out int codeLength); var foundUnicode = font.TryGetUnicode(code, out var unicode); if (!foundUnicode || unicode == null) { log.Warn($"We could not find the corresponding character with code {code} in font {font.Name}."); // Try casting directly to string as in PDFBox 1.8. unicode = new string((char)code, 1); } var wordSpacing = 0.0; if (code == ' ' && codeLength == 1) { wordSpacing += GetCurrentState().FontState.WordSpacing; } var textMatrix = TextMatrices.TextMatrix; if (font.IsVertical) { if (!(font is IVerticalWritingSupported verticalFont)) { throw new InvalidOperationException($"Font {font.Name} was in vertical writing mode but did not implement {nameof(IVerticalWritingSupported)}."); } var positionVector = verticalFont.GetPositionVector(code); textMatrix = textMatrix.Translate(positionVector.X, positionVector.Y); } var boundingBox = font.GetBoundingBox(code); var transformedGlyphBounds = rotation.Rotate(transformationMatrix) .Transform(textMatrix .Transform(renderingMatrix .Transform(boundingBox.GlyphBounds))); var transformedPdfBounds = rotation.Rotate(transformationMatrix) .Transform(textMatrix .Transform(renderingMatrix .Transform(new PdfRectangle(0, 0, boundingBox.Width, 0)))); // If the text rendering mode calls for filling, the current nonstroking color in the graphics state is used; // if it calls for stroking, the current stroking color is used. // In modes that perform both filling and stroking, the effect is as if each glyph outline were filled and then stroked in separate operations. // TODO: expose color as something more advanced var color = currentState.FontState.TextRenderingMode != TextRenderingMode.Stroke ? currentState.CurrentNonStrokingColor : currentState.CurrentStrokingColor; var letter = new Letter(unicode, transformedGlyphBounds, transformedPdfBounds.BottomLeft, transformedPdfBounds.BottomRight, transformedPdfBounds.Width, fontSize, font.Name.Data, color, pointSize, textSequence); letters.Add(letter); markedContentStack.AddLetter(letter); double tx, ty; if (font.IsVertical) { var verticalFont = (IVerticalWritingSupported)font; var displacement = verticalFont.GetDisplacementVector(code); tx = 0; ty = (displacement.Y * fontSize) + characterSpacing + wordSpacing; } else { tx = (boundingBox.Width * fontSize + characterSpacing + wordSpacing) * horizontalScaling; ty = 0; } TextMatrices.TextMatrix = TextMatrices.TextMatrix.Translate(tx, ty); } }
public CMap Parse(IInputBytes inputBytes, bool isLenientParsing) { var scanner = new CoreTokenScanner(inputBytes); var builder = new CharacterMapBuilder(); IToken previousToken = null; while (scanner.MoveNext()) { var token = scanner.CurrentToken; if (token is OperatorToken operatorToken) { switch (operatorToken.Data) { case "usecmap": { if (previousToken is NameToken name) { var external = ParseExternal(name.Data); builder.UseCMap(external); } else { throw new InvalidOperationException("Unexpected token preceding external cmap call: " + previousToken); } break; } case "begincodespacerange": { if (previousToken is NumericToken numeric) { CodespaceRangeParser.Parse(numeric, scanner, builder, isLenientParsing); } else { throw new InvalidOperationException("Unexpected token preceding start of codespace range: " + previousToken); } } break; case "beginbfchar": { if (previousToken is NumericToken numeric) { BaseFontCharacterParser.Parse(numeric, scanner, builder, isLenientParsing); } else { throw new InvalidOperationException("Unexpected token preceding start of base font characters: " + previousToken); } } break; case "beginbfrange": { if (previousToken is NumericToken numeric) { BaseFontRangeParser.Parse(numeric, scanner, builder, isLenientParsing); } else { throw new InvalidOperationException("Unexpected token preceding start of base font character ranges: " + previousToken); } } break; case "begincidchar": { if (previousToken is NumericToken numeric) { CidCharacterParser.Parse(numeric, scanner, builder, isLenientParsing); } else { throw new InvalidOperationException("Unexpected token preceding start of Cid character mapping: " + previousToken); } break; } case "begincidrange": { if (previousToken is NumericToken numeric) { CidRangeParser.Parse(numeric, scanner, builder, isLenientParsing); } else { throw new InvalidOperationException("Unexpected token preceding start of Cid ranges: " + previousToken); } } break; } } else if (token is NameToken name) { CidFontNameParser.Parse(name, scanner, builder, isLenientParsing); } previousToken = token; } return(builder.Build()); }
public TrueTypeDataBytes(IInputBytes inputBytes) { this.inputBytes = inputBytes; }
public Type1Tokenizer(IInputBytes bytes) { this.bytes = bytes; comments = new List <string>(); CurrentToken = ReadNextToken(); }
public int ReadCode(IInputBytes inputBytes) { return(cMap.ReadCode(inputBytes)); }
public void ShowText(IInputBytes bytes) { var currentState = GetCurrentState(); var font = currentState.FontState.FromExtendedGraphicsState ? activeExtendedGraphicsStateFont : resourceStore.GetFont(currentState.FontState.FontName); if (font == null) { throw new InvalidOperationException($"Could not find the font with name {currentState.FontState.FontName} in the resource store. It has not been loaded yet."); } var fontSize = currentState.FontState.FontSize; var horizontalScaling = currentState.FontState.HorizontalScaling / 100m; var characterSpacing = currentState.FontState.CharacterSpacing; var rise = currentState.FontState.Rise; var transformationMatrix = currentState.CurrentTransformationMatrix; var renderingMatrix = TransformationMatrix.FromValues(fontSize * horizontalScaling, 0, 0, fontSize, 0, rise); // TODO: this does not seem correct, produces the correct result for now but we need to revisit. // see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points var pointSize = decimal.Round(rotation.Rotate(transformationMatrix).Multiply(TextMatrices.TextMatrix).Multiply(fontSize).A, 2); while (bytes.MoveNext()) { var code = font.ReadCharacterCode(bytes, out int codeLength); var foundUnicode = font.TryGetUnicode(code, out var unicode); if (!foundUnicode || unicode == null) { log.Warn($"We could not find the corresponding character with code {code} in font {font.Name}."); // Try casting directly to string as in PDFBox 1.8. unicode = new string((char)code, 1); } var wordSpacing = 0m; if (code == ' ' && codeLength == 1) { wordSpacing += GetCurrentState().FontState.WordSpacing; } if (font.IsVertical) { throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file."); } var boundingBox = font.GetBoundingBox(code); var transformedGlyphBounds = rotation.Rotate(transformationMatrix) .Transform(TextMatrices.TextMatrix .Transform(renderingMatrix .Transform(boundingBox.GlyphBounds))); var transformedPdfBounds = rotation.Rotate(transformationMatrix) .Transform(TextMatrices.TextMatrix .Transform(renderingMatrix.Transform(new PdfRectangle(0, 0, boundingBox.Width, 0)))); ShowGlyph(font, transformedGlyphBounds, transformedPdfBounds.BottomLeft, transformedPdfBounds.BottomRight, transformedPdfBounds.Width, unicode, fontSize, pointSize); decimal tx, ty; if (font.IsVertical) { tx = 0; ty = boundingBox.GlyphBounds.Height * fontSize + characterSpacing + wordSpacing; } else { tx = (boundingBox.Width * fontSize + characterSpacing + wordSpacing) * horizontalScaling; ty = 0; } var translate = TransformationMatrix.GetTranslationMatrix(tx, ty); TextMatrices.TextMatrix = translate.Multiply(TextMatrices.TextMatrix); } }
/// <summary> /// Check that the offsets in the cross reference are correct. /// </summary> public void CheckCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable xrefTrailerResolver, bool isLenientParsing) { // repair mode isn't available in non-lenient mode if (!isLenientParsing) { return; } Dictionary <IndirectReference, long> xrefOffset = xrefTrailerResolver.ObjectOffsets.ToDictionary(x => x.Key, x => x.Value); if (ValidateXrefOffsets(bytes, xrefOffset)) { return; } IReadOnlyDictionary <IndirectReference, long> bfCOSObjectKeyOffsets = getBFCosObjectOffsets(); if (bfCOSObjectKeyOffsets.Count > 0) { List <IndirectReference> objStreams = new List <IndirectReference>(); // find all object streams foreach (var entry in xrefOffset) { long offset = entry.Value; if (offset < 0) { IndirectReference objStream = new IndirectReference(-offset, 0); if (!objStreams.Contains(objStream)) { objStreams.Add(new IndirectReference(-offset, 0)); } } } // remove all found object streams if (objStreams.Count > 0) { foreach (IndirectReference key in objStreams) { if (bfCOSObjectKeyOffsets.ContainsKey(key)) { // remove all parsed objects which are part of an object stream //ISet<long> objects = xrefTrailerResolver // .getContainedObjectNumbers((int)(key.Number)); //foreach (long objNr in objects) //{ // CosObjectKey streamObjectKey = new CosObjectKey(objNr, 0); // if (bfCOSObjectKeyOffsets.TryGetValue(streamObjectKey, out long streamObjectOffset) && streamObjectOffset > 0) // { // bfCOSObjectKeyOffsets.Remove(streamObjectKey); // } //} } else { // remove all objects which are part of an object stream which wasn't found //ISet<long> objects = xrefTrailerResolver // .getContainedObjectNumbers((int)(key.Number)); //foreach (long objNr in objects) //{ // xrefOffset.Remove(new CosObjectKey(objNr, 0)); //} } } } foreach (var item in bfCOSObjectKeyOffsets) { xrefOffset[item.Key] = item.Value; } } }
public void ShowText(IInputBytes bytes) { }
private static double ReadDouble(IInputBytes input, StringBuilder stringBuilder) { var dec = ReadDecimal(input, stringBuilder); return((double)dec); }
/// <summary> /// Where an entire PFB file has been embedded in the PDF we read the header first. /// </summary> private static (byte[] ascii, byte[] binary) ReadPfbHeader(IInputBytes bytes) { /* * The header is a 6 byte sequence. The first byte is 0x80 followed by 0x01 for the ASCII record indicator. * The following 4 bytes determine the size/length of the ASCII part of the PFB file. * After the ASCII part another 6 byte sequence is present, this time 0x80 0x02 for the Binary part length. * A 3rd sequence is present at the end re-stating the ASCII length but this is surplus to requirements. */ // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Local int ReadSize(byte recordType) { bytes.MoveNext(); if (bytes.CurrentByte != PfbFileIndicator) { throw new InvalidOperationException($"File does not start with 0x80, which indicates a full PFB file. Instead got: {bytes.CurrentByte}"); } bytes.MoveNext(); if (bytes.CurrentByte != recordType) { throw new InvalidOperationException($"Encountered unexpected header type in the PFB file: {bytes.CurrentByte}"); } bytes.MoveNext(); int size = bytes.CurrentByte; bytes.MoveNext(); size += bytes.CurrentByte << 8; bytes.MoveNext(); size += bytes.CurrentByte << 16; bytes.MoveNext(); size += bytes.CurrentByte << 24; return(size); } var asciiSize = ReadSize(0x01); var asciiPart = new byte[asciiSize]; int i = 0; while (i < asciiSize) { bytes.MoveNext(); asciiPart[i] = bytes.CurrentByte; i++; } var binarySize = ReadSize(0x02); var binaryPart = new byte[binarySize]; i = 0; while (i < binarySize) { bytes.MoveNext(); binaryPart[i] = bytes.CurrentByte; i++; } return(asciiPart, binaryPart); }
private static AdobeFontMetricsIndividualCharacterMetric ReadCharacterMetric(IInputBytes bytes, StringBuilder stringBuilder) { var line = ReadLine(bytes, stringBuilder); var split = line.Split(IndividualCharmetricsSplit, StringSplitOptions.RemoveEmptyEntries); var metric = new AdobeFontMetricsIndividualCharacterMetricBuilder(); foreach (var s in split) { var parts = s.Split(CharmetricsKeySplit, StringSplitOptions.RemoveEmptyEntries); switch (parts[0]) { case CharmetricsC: { var code = int.Parse(parts[1], CultureInfo.InvariantCulture); metric.CharacterCode = code; break; } case CharmetricsCh: { var code = int.Parse(parts[1], NumberStyles.HexNumber, CultureInfo.InvariantCulture); metric.CharacterCode = code; break; } case CharmetricsWx: { metric.WidthX = double.Parse(parts[1], CultureInfo.InvariantCulture); break; } case CharmetricsW0X: { metric.WidthXDirection0 = double.Parse(parts[1], CultureInfo.InvariantCulture); break; } case CharmetricsW1X: { metric.WidthXDirection1 = double.Parse(parts[1], CultureInfo.InvariantCulture); break; } case CharmetricsWy: { metric.WidthY = double.Parse(parts[1], CultureInfo.InvariantCulture); break; } case CharmetricsW0Y: { metric.WidthYDirection0 = double.Parse(parts[1], CultureInfo.InvariantCulture); break; } case CharmetricsW1Y: { metric.WidthYDirection1 = double.Parse(parts[1], CultureInfo.InvariantCulture); break; } case CharmetricsW: { metric.WidthX = double.Parse(parts[1], CultureInfo.InvariantCulture); metric.WidthY = double.Parse(parts[2], CultureInfo.InvariantCulture); break; } case CharmetricsW0: { metric.WidthXDirection0 = double.Parse(parts[1], CultureInfo.InvariantCulture); metric.WidthYDirection0 = double.Parse(parts[2], CultureInfo.InvariantCulture); break; } case CharmetricsW1: { metric.WidthXDirection1 = double.Parse(parts[1], CultureInfo.InvariantCulture); metric.WidthYDirection1 = double.Parse(parts[2], CultureInfo.InvariantCulture); break; } case CharmetricsVv: { metric.VVector = new AdobeFontMetricsVector(double.Parse(parts[1], CultureInfo.InvariantCulture), double.Parse(parts[2], CultureInfo.InvariantCulture)); break; } case CharmetricsN: { metric.Name = parts[1]; break; } case CharmetricsB: { metric.BoundingBox = new PdfRectangle(double.Parse(parts[1], CultureInfo.InvariantCulture), double.Parse(parts[2], CultureInfo.InvariantCulture), double.Parse(parts[3], CultureInfo.InvariantCulture), double.Parse(parts[4], CultureInfo.InvariantCulture)); break; } case CharmetricsL: { metric.Ligature = new AdobeFontMetricsLigature(parts[1], parts[2]); break; } default: throw new InvalidFontFormatException($"Unknown CharMetrics command '{parts[0]}'."); } } return(metric.Build()); }
/// <summary> /// Parses an embedded Adobe Type 1 font file. /// </summary> /// <param name="inputBytes">The bytes of the font program.</param> /// <param name="length1">The length in bytes of the clear text portion of the font program.</param> /// <param name="length2">The length in bytes of the encrypted portion of the font program.</param> /// <returns>The parsed type 1 font.</returns> public Type1Font Parse(IInputBytes inputBytes, int length1, int length2) { // Sometimes the entire PFB file including the header bytes can be included which prevents parsing in the normal way. var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator; IReadOnlyList <byte> eexecPortion = new byte[0]; if (isEntirePfbFile) { var(ascii, binary) = ReadPfbHeader(inputBytes); eexecPortion = binary; inputBytes = new ByteArrayInputBytes(ascii); } var scanner = new CoreTokenScanner(inputBytes); if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!")) { throw new InvalidFontFormatException("The Type1 program did not start with '%!'."); } string name; var parts = comment.Data.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length == 3) { name = parts[1]; } else { name = "Unknown"; } var comments = new List <string>(); while (scanner.MoveNext() && scanner.CurrentToken is CommentToken commentToken) { comments.Add(commentToken.Data); } var dictionaries = new List <DictionaryToken>(); // Override arrays and names since type 1 handles these differently. var arrayTokenizer = new Type1ArrayTokenizer(); var nameTokenizer = new Type1NameTokenizer(); scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer); scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer); try { var tempEexecPortion = new List <byte>(); var tokenSet = new PreviousTokenSet(); tokenSet.Add(scanner.CurrentToken); while (scanner.MoveNext()) { if (scanner.CurrentToken is OperatorToken operatorToken) { if (Equals(scanner.CurrentToken, OperatorToken.Eexec)) { int offset = 0; while (inputBytes.MoveNext()) { if (inputBytes.CurrentByte == (byte)ClearToMark[offset]) { offset++; } else { if (offset > 0) { for (int i = 0; i < offset; i++) { tempEexecPortion.Add((byte)ClearToMark[i]); } } offset = 0; } if (offset == ClearToMark.Length) { break; } if (offset > 0) { continue; } tempEexecPortion.Add(inputBytes.CurrentByte); } } else { HandleOperator(operatorToken, scanner, tokenSet, dictionaries); } } tokenSet.Add(scanner.CurrentToken); } if (!isEntirePfbFile) { eexecPortion = tempEexecPortion; } } finally { scanner.DeregisterCustomTokenizer(arrayTokenizer); scanner.DeregisterCustomTokenizer(nameTokenizer); } var encoding = GetEncoding(dictionaries); var matrix = GetFontMatrix(dictionaries); var boundingBox = GetBoundingBox(dictionaries); encryptedPortionParser.Parse(eexecPortion); return(new Type1Font(name, encoding, matrix, boundingBox ?? new PdfRectangle())); }
public BruteForceSearcher([NotNull] IInputBytes bytes) { this.bytes = bytes ?? throw new ArgumentNullException(nameof(bytes)); }
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { token = null; StringBuilder characters; if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.') { characters = StringBuilderPool.Borrow(); characters.Append((char)currentByte); } else { return(false); } while (inputBytes.MoveNext()) { var b = inputBytes.CurrentByte; if ((b >= Zero && b <= Nine) || b == '-' || b == '+' || b == '.' || b == 'E' || b == 'e') { characters.Append((char)b); } else { break; } } try { var str = characters.ToString(); StringBuilderPool.Return(characters); switch (str) { case "-": case ".": case "0": token = NumericToken.Zero; return(true); case "1": token = NumericToken.One; return(true); case "2": token = NumericToken.Two; return(true); case "3": token = NumericToken.Three; return(true); case "4": token = NumericToken.Four; return(true); case "5": token = NumericToken.Five; return(true); case "6": token = NumericToken.Six; return(true); case "7": token = NumericToken.Seven; return(true); case "8": token = NumericToken.Eight; return(true); case "9": token = NumericToken.Nine; return(true); case "10": token = NumericToken.Ten; return(true); case "100": token = NumericToken.OneHundred; return(true); case "1000": token = NumericToken.OneThousand; return(true); default: if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value)) { return(false); } token = new NumericToken(value); return(true); } } catch (FormatException) { return(false); } catch (OverflowException) { return(false); } }
public IReadOnlyList <IGraphicsStateOperation> Parse(int pageNumber, IInputBytes inputBytes) { var scanner = new CoreTokenScanner(inputBytes); var precedingTokens = new List <IToken>(); var graphicsStateOperations = new List <IGraphicsStateOperation>(); var lastEndImageOffset = new long?(); while (scanner.MoveNext()) { var token = scanner.CurrentToken; if (token is InlineImageDataToken inlineImageData) { var dictionary = new Dictionary <NameToken, IToken>(); for (var i = 0; i < precedingTokens.Count - 1; i++) { var t = precedingTokens[i]; if (!(t is NameToken n)) { continue; } i++; dictionary[n] = precedingTokens[i]; } graphicsStateOperations.Add(new BeginInlineImageData(dictionary)); graphicsStateOperations.Add(new EndInlineImage(inlineImageData.Data)); lastEndImageOffset = scanner.CurrentPosition - 2; precedingTokens.Clear(); } else if (token is OperatorToken op) { // Handle an end image where the stream of image data contained EI but was not actually a real end image operator. if (op.Data == "EI") { // Check an end image operation was the last thing that happened. IGraphicsStateOperation lastOperation = graphicsStateOperations.Count > 0 ? graphicsStateOperations[graphicsStateOperations.Count - 1] : null; if (lastEndImageOffset == null || lastOperation == null || !(lastOperation is EndInlineImage lastEndImage)) { throw new PdfDocumentFormatException("Encountered End Image token outside an inline image on " + $"page {pageNumber} at offset in content: {scanner.CurrentPosition}."); } // Work out how much data we missed between the false EI operator and the actual one. var actualEndImageOffset = scanner.CurrentPosition - 3; var gap = (int)(actualEndImageOffset - lastEndImageOffset); var from = inputBytes.CurrentOffset; inputBytes.Seek(lastEndImageOffset.Value); // Recover the full image data. { var missingData = new byte[gap]; var read = inputBytes.Read(missingData); if (read != gap) { throw new InvalidOperationException($"Failed to read expected buffer length {gap} on page {pageNumber} " + $"when reading inline image at offset in content: {lastEndImageOffset.Value}."); } // Replace the last end image operator with one containing the full set of data. graphicsStateOperations.Remove(lastEndImage); graphicsStateOperations.Add(new EndInlineImage(lastEndImage.ImageData.Concat(missingData).ToArray())); } lastEndImageOffset = actualEndImageOffset; inputBytes.Seek(from); } else { var operation = operationFactory.Create(op, precedingTokens); if (operation != null) { graphicsStateOperations.Add(operation); } } precedingTokens.Clear(); } else if (token is CommentToken) { } else { precedingTokens.Add(token); } } return(graphicsStateOperations); }
public int ReadCharacterCode(IInputBytes bytes, out int codeLength) { codeLength = 1; return(bytes.CurrentByte); }
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation, long offsetCorrection, IPdfTokenScanner pdfScanner, ISeekableTokenScanner tokenScanner) { long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing); if (fixedOffset > -1) { crossReferenceLocation = fixedOffset; log.Debug($"Found the first cross reference table or stream at {fixedOffset}."); } var table = new CrossReferenceTableBuilder(); var prevSet = new HashSet <long>(); long previousCrossReferenceLocation = crossReferenceLocation; var missedAttempts = 0; // Parse all cross reference tables and streams. while (previousCrossReferenceLocation > 0 && missedAttempts < 100) { log.Debug($"Reading cross reference table or stream at {previousCrossReferenceLocation}."); if (previousCrossReferenceLocation >= bytes.Length) { break; } // seek to xref table tokenScanner.Seek(previousCrossReferenceLocation); tokenScanner.MoveNext(); if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref") { missedAttempts = 0; log.Debug("Element was cross reference table."); CrossReferenceTablePart tablePart = CrossReferenceTableParser.Parse(tokenScanner, previousCrossReferenceLocation, isLenientParsing); var nextOffset = tablePart.GetPreviousOffset(); if (nextOffset >= 0) { nextOffset += offsetCorrection; } previousCrossReferenceLocation = nextOffset; DictionaryToken tableDictionary = tablePart.Dictionary; CrossReferenceTablePart streamPart = null; // check for a XRef stream, it may contain some object ids of compressed objects if (tableDictionary.ContainsKey(NameToken.XrefStm)) { log.Debug("Cross reference table contained referenced to stream. Reading the stream."); int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int; // check the xref stream reference fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing); if (fixedOffset > -1 && fixedOffset != streamOffset) { log.Warn($"/XRefStm offset {streamOffset} is incorrect, corrected to {fixedOffset}"); streamOffset = (int)fixedOffset; // Update the cross reference table to be a stream instead. tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset)); tablePart = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset, tablePart.Previous, tableDictionary, tablePart.Type); } // Read the stream from the table. if (streamOffset > 0) { try { TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart); } catch (InvalidOperationException ex) { if (isLenientParsing) { log.Error("Failed to parse /XRefStm at offset " + streamOffset, ex); } else { throw; } } } else { if (isLenientParsing) { log.Error("Skipped XRef stream due to a corrupt offset:" + streamOffset); } else { throw new PdfDocumentFormatException("Skipped XRef stream due to a corrupt offset:" + streamOffset); } } } table.Add(tablePart); if (streamPart != null) { table.Add(streamPart); } }
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { token = null; StringBuilder characters; if ((currentByte >= '0' && currentByte <= '9') || currentByte == '-' || currentByte == '+' || currentByte == '.') { characters = new StringBuilder(); characters.Append((char)currentByte); } else { return(false); } while (inputBytes.MoveNext()) { var b = inputBytes.CurrentByte; var c = (char)b; if (char.IsDigit(c) || c == '-' || c == '+' || c == '.' || c == 'E' || c == 'e') { characters.Append(c); } else { break; } } decimal value; try { if (characters.Length == 1 && (characters[0] == '-' || characters[0] == '.')) { value = 0; } else { value = decimal.Parse(characters.ToString(), NumberStyles.Any, CultureInfo.InvariantCulture); } } catch (FormatException) { return(false); } catch (OverflowException) { return(false); } token = new NumericToken(value); return(true); }
public long CheckXRefOffset(long startXRefOffset, ISeekableTokenScanner scanner, IInputBytes inputBytes, bool isLenientParsing) { // repair mode isn't available in non-lenient mode if (!isLenientParsing) { return(startXRefOffset); } scanner.Seek(startXRefOffset); scanner.MoveNext(); if (ReferenceEquals(scanner.CurrentToken, OperatorToken.Xref)) { return(startXRefOffset); } if (startXRefOffset > 0) { if (CheckXRefStreamOffset(startXRefOffset, scanner, true)) { return(startXRefOffset); } return(CalculateXRefFixedOffset(startXRefOffset, scanner, inputBytes)); } // can't find a valid offset return(-1); }
/// <summary> /// Parse the font metrics from the input bytes. /// </summary> public static AdobeFontMetrics Parse(IInputBytes bytes, bool useReducedDataSet) { var stringBuilder = new StringBuilder(); var token = ReadString(bytes, stringBuilder); if (!string.Equals(StartFontMetrics, token, StringComparison.OrdinalIgnoreCase)) { throw new InvalidFontFormatException($"The AFM file was not valid, it did not start with {StartFontMetrics}."); } var version = ReadDecimal(bytes, stringBuilder); var builder = new AdobeFontMetricsBuilder(version); while ((token = ReadString(bytes, stringBuilder)) != EndFontMetrics) { switch (token) { case Comment: builder.Comments.Add(ReadLine(bytes, stringBuilder)); break; case FontName: builder.FontName = ReadLine(bytes, stringBuilder); break; case FullName: builder.FullName = ReadLine(bytes, stringBuilder); break; case FamilyName: builder.FamilyName = ReadLine(bytes, stringBuilder); break; case Weight: builder.Weight = ReadLine(bytes, stringBuilder); break; case ItalicAngle: builder.ItalicAngle = ReadDecimal(bytes, stringBuilder); break; case IsFixedPitch: builder.IsFixedPitch = ReadBool(bytes, stringBuilder); break; case FontBbox: builder.SetBoundingBox(ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder)); break; case UnderlinePosition: builder.UnderlinePosition = ReadDecimal(bytes, stringBuilder); break; case UnderlineThickness: builder.UnderlineThickness = ReadDecimal(bytes, stringBuilder); break; case Version: builder.Version = ReadLine(bytes, stringBuilder); break; case Notice: builder.Notice = ReadLine(bytes, stringBuilder); break; case EncodingScheme: builder.EncodingScheme = ReadLine(bytes, stringBuilder); break; case MappingScheme: builder.MappingScheme = (int)ReadDecimal(bytes, stringBuilder); break; case CharacterSet: builder.CharacterSet = ReadLine(bytes, stringBuilder); break; case EscChar: builder.EscapeCharacter = (int)ReadDecimal(bytes, stringBuilder); break; case Characters: builder.Characters = (int)ReadDecimal(bytes, stringBuilder); break; case IsBaseFont: builder.IsBaseFont = ReadBool(bytes, stringBuilder); break; case CapHeight: builder.CapHeight = ReadDecimal(bytes, stringBuilder); break; case XHeight: builder.XHeight = ReadDecimal(bytes, stringBuilder); break; case Ascender: builder.Ascender = ReadDecimal(bytes, stringBuilder); break; case Descender: builder.Descender = ReadDecimal(bytes, stringBuilder); break; case StdHw: builder.StdHw = ReadDecimal(bytes, stringBuilder); break; case StdVw: builder.StdVw = ReadDecimal(bytes, stringBuilder); break; case CharWidth: builder.SetCharacterWidth(ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder)); break; case VVector: builder.SetVVector(ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder)); break; case IsFixedV: builder.IsFixedV = ReadBool(bytes, stringBuilder); break; case StartCharMetrics: var count = (int)ReadDecimal(bytes, stringBuilder); for (var i = 0; i < count; i++) { var metric = ReadCharacterMetric(bytes, stringBuilder); builder.CharacterMetrics.Add(metric); } var end = ReadString(bytes, stringBuilder); if (end != EndCharMetrics) { throw new InvalidFontFormatException($"The character metrics section did not end with {EndCharMetrics} instead it was {end}."); } break; case StartKernData: break; } } return(builder.Build()); }
private long BruteForceSearchForXref(long xrefOffset, ISeekableTokenScanner scanner, IInputBytes reader) { long newOffset = -1; long newOffsetTable = -1; long newOffsetStream = -1; BruteForceSearchForTables(reader); BfSearchForXRefStreams(reader); if (bfSearchXRefTablesOffsets != null) { // TODO to be optimized, this won't work in every case newOffsetTable = SearchNearestValue(bfSearchXRefTablesOffsets, xrefOffset); } if (bfSearchXRefStreamsOffsets != null) { // TODO to be optimized, this won't work in every case newOffsetStream = SearchNearestValue(bfSearchXRefStreamsOffsets, xrefOffset); } // choose the nearest value if (newOffsetTable > -1 && newOffsetStream > -1) { long differenceTable = xrefOffset - newOffsetTable; long differenceStream = xrefOffset - newOffsetStream; if (Math.Abs(differenceTable) > Math.Abs(differenceStream)) { newOffset = newOffsetStream; bfSearchXRefStreamsOffsets.Remove(newOffsetStream); } else { newOffset = newOffsetTable; bfSearchXRefTablesOffsets.Remove(newOffsetTable); } } else if (newOffsetTable > -1) { newOffset = newOffsetTable; bfSearchXRefTablesOffsets.Remove(newOffsetTable); } else if (newOffsetStream > -1) { newOffset = newOffsetStream; bfSearchXRefStreamsOffsets.Remove(newOffsetStream); } return(newOffset); }
private static decimal ReadDecimal(IInputBytes input, StringBuilder stringBuilder) { var str = ReadString(input, stringBuilder); return(decimal.Parse(str, CultureInfo.InvariantCulture)); }
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing, IReadOnlyList <string> passwords, bool clipPaths) { var filterProvider = DefaultFilterProvider.Instance; CrossReferenceTable crossReferenceTable = null; var xrefValidator = new XrefOffsetValidator(log); // We're ok with this since our intent is to lazily load the cross reference table. // ReSharper disable once AccessToModifiedClosure var locationProvider = new ObjectLocationProvider(() => crossReferenceTable, inputBytes); var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance); var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider); var crossReferenceParser = new CrossReferenceParser(log, xrefValidator, crossReferenceStreamParser); var version = FileHeaderParser.Parse(scanner, isLenientParsing, log); var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing) + version.OffsetInFile; // TODO: make this use the scanner. var validator = new CrossReferenceOffsetValidator(xrefValidator); crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing); crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, scanner); var(rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing, pdfScanner, out var encryptionDictionary); var encryptionHandler = encryptionDictionary != null ? (IEncryptionHandler) new EncryptionHandler(encryptionDictionary, crossReferenceTable.Trailer, passwords) : NoOpEncryptionHandler.Instance; pdfScanner.UpdateEncryptionHandler(encryptionHandler); var cidFontFactory = new CidFontFactory(pdfScanner, filterProvider); var encodingReader = new EncodingReader(pdfScanner); var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader); var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory, filterProvider, pdfScanner), new TrueTypeFontHandler(log, pdfScanner, filterProvider, encodingReader, SystemFontFinder.Instance, type1Handler), type1Handler, new Type3FontHandler(pdfScanner, filterProvider, encodingReader)); var resourceContainer = new ResourceStore(pdfScanner, fontFactory); var information = DocumentInformationFactory.Create(pdfScanner, crossReferenceTable.Trailer); var catalog = CatalogFactory.Create(rootReference, rootDictionary, pdfScanner, isLenientParsing); var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()), log); var caching = new ParsingCachingProviders(resourceContainer); var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable); var bookmarksProvider = new BookmarksProvider(log, pdfScanner); return(new PdfDocument(log, inputBytes, version, crossReferenceTable, caching, pageFactory, catalog, information, encryptionDictionary, pdfScanner, filterProvider, acroFormFactory, bookmarksProvider, clipPaths)); }
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { token = null; if (currentByte != '/') { return(false); } var bytes = new List <byte>(); bool escapeActive = false; int postEscapeRead = 0; var escapedChars = new char[2]; while (inputBytes.MoveNext()) { var b = inputBytes.CurrentByte; if (b == '#') { escapeActive = true; } else if (escapeActive) { if (ReadHelper.IsHex((char)b)) { escapedChars[postEscapeRead] = (char)b; postEscapeRead++; if (postEscapeRead == 2) { var hex = new string(escapedChars); var characterToWrite = (byte)Convert.ToInt32(hex, 16); bytes.Add(characterToWrite); escapeActive = false; postEscapeRead = 0; } } else { bytes.Add((byte)'#'); if (postEscapeRead == 1) { bytes.Add((byte)escapedChars[0]); } if (ReadHelper.IsEndOfName(b)) { break; } if (b == '#') { // Make it clear what's going on, we read something like #m#AE // ReSharper disable once RedundantAssignment escapeActive = true; postEscapeRead = 0; continue; } bytes.Add(b); escapeActive = false; postEscapeRead = 0; } } else if (ReadHelper.IsEndOfName(b)) { break; } else { bytes.Add(b); } } byte[] byteArray = bytes.ToArray(); var str = ReadHelper.IsValidUtf8(byteArray) ? Encoding.UTF8.GetString(byteArray) : Encoding.GetEncoding("windows-1252").GetString(byteArray); token = NameToken.Create(str); return(true); }