Exemple #1
0
        private static PdfDocument OpenDocument(IRandomAccessRead reader, IInputBytes inputBytes, ISeekableTokenScanner scanner, IContainer container, bool isLenientParsing)
        {
            var log = container.Get <ILog>();

            var version = container.Get <FileHeaderParser>().Parse(scanner, isLenientParsing);

            var crossReferenceOffset = container.Get <FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);

            var pool = new CosObjectPool();

            // TODO: make this use the scanner.
            var validator = new CrossReferenceOffsetValidator(new XrefOffsetValidator(log, reader, container.Get <CosDictionaryParser>(),
                                                                                      container.Get <CosBaseParser>(), pool));

            crossReferenceOffset = validator.Validate(crossReferenceOffset, isLenientParsing);

            var crossReferenceTable = container.Get <CrossReferenceParser>()
                                      .Parse(reader, isLenientParsing, crossReferenceOffset, pool);

            container.Get <CrossReferenceParser>().ParseNew(crossReferenceOffset, scanner, isLenientParsing);

            var filterProvider     = container.Get <IFilterProvider>();
            var bruteForceSearcher = new BruteForceSearcher(reader);
            var pdfObjectParser    = new PdfObjectParser(container.Get <ILog>(), container.Get <CosBaseParser>(),
                                                         container.Get <CosStreamParser>(), crossReferenceTable, bruteForceSearcher, pool, container.Get <ObjectStreamParser>());

            var trueTypeFontParser    = new TrueTypeFontParser();
            var fontDescriptorFactory = new FontDescriptorFactory();

            var cidFontFactory = new CidFontFactory(fontDescriptorFactory, trueTypeFontParser, pdfObjectParser, filterProvider);

            var cMapCache = new CMapCache(new CMapParser());

            var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
                                                                        cMapCache,
                                                                        filterProvider,
                                                                        pdfObjectParser),
                                              new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser));

            var dynamicParser     = container.Get <DynamicParser>();
            var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory);

            var pageFactory        = new PageFactory(resourceContainer, pdfObjectParser, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
            var informationFactory = new DocumentInformationFactory();
            var catalogFactory     = new CatalogFactory(pdfObjectParser);

            var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool,
                                    isLenientParsing);

            if (!(root is PdfDictionary rootDictionary))
            {
                throw new InvalidOperationException("Expected root dictionary, but got this: " + root);
            }

            // in some pdfs the type value "Catalog" is missing in the root object
            if (isLenientParsing && !rootDictionary.ContainsKey(CosName.TYPE))
            {
                rootDictionary.Set(CosName.TYPE, CosName.CATALOG);
            }

            var information = informationFactory.Create(pdfObjectParser, crossReferenceTable.Dictionary, reader, isLenientParsing);

            var catalog = catalogFactory.Create(rootDictionary, reader, isLenientParsing);

            var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);

            return(new PdfDocument(log, reader, version, crossReferenceTable, isLenientParsing, caching, pageFactory, pdfObjectParser, catalog, information));
        }
Exemple #2
0
        private long CalculateXRefFixedOffset(long objectOffset, ISeekableTokenScanner scanner, IInputBytes inputBytes)
        {
            if (objectOffset < 0)
            {
                log.Error($"Invalid object offset {objectOffset} when searching for a xref table/stream");
                return(0);
            }

            // start a brute force search for all xref tables and try to find the offset we are looking for
            var newOffset = BruteForceSearchForXref(objectOffset, scanner, inputBytes);

            if (newOffset > -1)
            {
                log.Debug($"Fixed reference for xref table/stream {objectOffset} -> {newOffset}");

                return(newOffset);
            }

            log.Error($"Can\'t find the object xref table/stream at offset {objectOffset}");

            return(0);
        }
Exemple #3
0
        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
        {
            token = null;

            if (inputBytes == null)
            {
                return(false);
            }

            if (currentByte != '(')
            {
                return(false);
            }

            var builder          = stringBuilder;
            var numberOfBrackets = 1;
            var isEscapeActive   = false;
            var isLineBreaking   = false;

            var octalModeActive = false;

            short[] octal      = { 0, 0, 0 };
            var     octalsRead = 0;

            while (inputBytes.MoveNext())
            {
                var b = inputBytes.CurrentByte;
                var c = (char)b;

                if (octalModeActive)
                {
                    var nextCharacterOctal = c >= '0' && c <= '7';

                    if (nextCharacterOctal)
                    {
                        // left shift the octals.
                        LeftShiftOctal(c, octalsRead, octal);
                        octalsRead++;
                    }

                    if (octalsRead == 3 || !nextCharacterOctal)
                    {
                        var characterCode = OctalHelpers.FromOctalDigits(octal);

                        // For now :(
                        // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
                        builder.Append((char)characterCode);

                        octal[0]        = 0;
                        octal[1]        = 0;
                        octal[2]        = 0;
                        octalsRead      = 0;
                        octalModeActive = false;
                    }

                    if (nextCharacterOctal)
                    {
                        continue;
                    }
                }

                switch (c)
                {
                case ')':
                    isLineBreaking = false;
                    if (!isEscapeActive)
                    {
                        numberOfBrackets--;
                    }

                    isEscapeActive = false;
                    if (numberOfBrackets > 0)
                    {
                        builder.Append(c);
                    }

                    // TODO: Check for other ends of string where the string is improperly formatted. See commented method
                    numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes);

                    break;

                case '(':
                    isLineBreaking = false;

                    if (!isEscapeActive)
                    {
                        numberOfBrackets++;
                    }

                    isEscapeActive = false;
                    builder.Append(c);
                    break;

                // Escape
                case '\\':
                    isLineBreaking = false;
                    // Escaped backslash
                    if (isEscapeActive)
                    {
                        builder.Append(c);
                        isEscapeActive = false;
                    }
                    else
                    {
                        isEscapeActive = true;
                    }
                    break;

                default:
                    if (isLineBreaking)
                    {
                        if (ReadHelper.IsEndOfLine(c))
                        {
                            continue;
                        }

                        isLineBreaking = false;
                        builder.Append(c);
                    }
                    else if (isEscapeActive)
                    {
                        ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking);
                        isEscapeActive = false;
                    }
                    else
                    {
                        builder.Append(c);
                    }

                    break;
                }

                if (numberOfBrackets <= 0)
                {
                    break;
                }
            }

            StringToken.Encoding encodedWith;
            string tokenStr;

            if (builder.Length >= 2)
            {
                if (builder[0] == 0xFE && builder[1] == 0xFF)
                {
                    var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());

                    tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1);

                    encodedWith = StringToken.Encoding.Utf16BE;
                }
                else if (builder[0] == 0xFF && builder[1] == 0xFE)
                {
                    var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());

                    tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1);

                    encodedWith = StringToken.Encoding.Utf16;
                }
                else
                {
                    tokenStr = builder.ToString();

                    encodedWith = StringToken.Encoding.Iso88591;
                }
            }
            else
            {
                tokenStr = builder.ToString();

                encodedWith = StringToken.Encoding.Iso88591;
            }

            builder.Clear();

            token = new StringToken(tokenStr, encodedWith);

            return(true);
        }
Exemple #4
0
        public Type1Font Parse(IInputBytes inputBytes)
        {
            var scanner = new CoreTokenScanner(inputBytes);

            if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
            {
                throw new InvalidFontFormatException("The Type1 program did not start with '%!'.");
            }

            string name;
            var    parts = comment.Data.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

            if (parts.Length == 3)
            {
                name = parts[1];
            }
            else
            {
                name = "Unknown";
            }

            var comments = new List <string>();

            while (scanner.MoveNext() && scanner.CurrentToken is CommentToken commentToken)
            {
                comments.Add(commentToken.Data);
            }

            var dictionaries = new List <DictionaryToken>();

            // Override arrays and names since type 1 handles these differently.
            var arrayTokenizer = new Type1ArrayTokenizer();
            var nameTokenizer  = new Type1NameTokenizer();

            scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
            scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);

            try
            {
                var tokenSet = new PreviousTokenSet();
                tokenSet.Add(scanner.CurrentToken);
                while (scanner.MoveNext())
                {
                    if (scanner.CurrentToken is OperatorToken operatorToken)
                    {
                        HandleOperator(operatorToken, inputBytes, scanner, tokenSet, dictionaries);
                    }

                    tokenSet.Add(scanner.CurrentToken);
                }
            }
            finally
            {
                scanner.DeregisterCustomTokenizer(arrayTokenizer);
                scanner.DeregisterCustomTokenizer(nameTokenizer);
            }

            var encoding    = GetEncoding(dictionaries);
            var matrix      = GetFontMatrix(dictionaries);
            var boundingBox = GetBoundingBox(dictionaries);

            return(new Type1Font(name, encoding, matrix, boundingBox));
        }
Exemple #5
0
        private void BfSearchForXRefStreams(IInputBytes bytes)
        {
            if (bfSearchXRefStreamsOffsets != null)
            {
                return;
            }

            // a pdf may contain more than one /XRef entry
            bfSearchXRefStreamsOffsets = new List <long>();

            var startOffset = bytes.CurrentOffset;

            bytes.Seek(MinimumSearchOffset);

            // search for XRef streams
            var objString = " obj";

            while (bytes.MoveNext() && !bytes.IsAtEnd())
            {
                if (!ReadHelper.IsString(bytes, "xref"))
                {
                    continue;
                }

                // search backwards for the beginning of the stream
                long newOffset  = -1;
                long xrefOffset = bytes.CurrentOffset;

                bool objFound = false;
                for (var i = 1; i < 40; i++)
                {
                    if (objFound)
                    {
                        break;
                    }

                    long currentOffset = xrefOffset - (i * 10);

                    if (currentOffset > 0)
                    {
                        bytes.Seek(currentOffset);

                        for (int j = 0; j < 10; j++)
                        {
                            if (ReadHelper.IsString(bytes, objString))
                            {
                                long tempOffset = currentOffset - 1;

                                bytes.Seek(tempOffset);

                                var generationNumber = bytes.Peek();

                                // is the next char a digit?
                                if (generationNumber.HasValue && ReadHelper.IsDigit(generationNumber.Value))
                                {
                                    tempOffset--;
                                    bytes.Seek(tempOffset);

                                    // is the digit preceded by a space?
                                    if (ReadHelper.IsSpace(bytes.CurrentByte))
                                    {
                                        int length = 0;
                                        bytes.Seek(--tempOffset);

                                        while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(bytes.CurrentByte))
                                        {
                                            bytes.Seek(--tempOffset);
                                            length++;
                                        }

                                        if (length > 0)
                                        {
                                            bytes.MoveNext();
                                            newOffset = bytes.CurrentOffset;
                                        }
                                    }
                                }

                                objFound = true;

                                break;
                            }

                            currentOffset++;
                            bytes.MoveNext();
                        }
                    }
                }

                if (newOffset > -1)
                {
                    bfSearchXRefStreamsOffsets.Add(newOffset);
                }

                bytes.Seek(xrefOffset + 5);
            }

            bytes.Seek(startOffset);
        }
Exemple #6
0
        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
        {
            var builder = new StringBuilder();

            token = null;

            if (inputBytes == null)
            {
                return(false);
            }

            if (currentByte != '(')
            {
                return(false);
            }

            int  numberOfBrackets = 1;
            bool isEscapeActive   = false;
            bool isLineBreaking   = false;

            bool octalModeActive = false;

            short[] octal      = { 0, 0, 0 };
            int     octalsRead = 0;

            while (inputBytes.MoveNext())
            {
                var b = inputBytes.CurrentByte;
                var c = (char)b;

                if (octalModeActive)
                {
                    var nextCharacterOctal = c >= '0' && c <= '7';

                    if (nextCharacterOctal)
                    {
                        // left shift the octals.
                        LeftShiftOctal(c, octalsRead, octal);
                        octalsRead++;
                    }

                    if (octalsRead == 3 || !nextCharacterOctal)
                    {
                        var characterCode = OctalHelpers.FromOctalDigits(octal);

                        // For now :(
                        // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
                        builder.Append((char)characterCode);

                        octal[0]        = 0;
                        octal[1]        = 0;
                        octal[2]        = 0;
                        octalsRead      = 0;
                        octalModeActive = false;
                    }

                    if (nextCharacterOctal)
                    {
                        continue;
                    }
                }

                switch (c)
                {
                case ')':
                    isLineBreaking = false;
                    if (!isEscapeActive)
                    {
                        numberOfBrackets--;
                    }

                    isEscapeActive = false;
                    if (numberOfBrackets > 0)
                    {
                        builder.Append(c);
                    }

                    // TODO: Check for other ends of string where the string is improperly formatted. See commented method
                    // numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets);


                    break;

                case '(':
                    isLineBreaking = false;

                    if (!isEscapeActive)
                    {
                        numberOfBrackets++;
                    }

                    isEscapeActive = false;
                    builder.Append(c);
                    break;

                // Escape
                case '\\':
                    isLineBreaking = false;
                    // Escaped backslash
                    if (isEscapeActive)
                    {
                        builder.Append(c);
                    }
                    else
                    {
                        isEscapeActive = true;
                    }
                    break;

                default:
                    if (isLineBreaking)
                    {
                        if (ReadHelper.IsEndOfLine(c))
                        {
                            continue;
                        }

                        isLineBreaking = false;
                        builder.Append(c);
                    }
                    else if (isEscapeActive)
                    {
                        ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking);
                        isEscapeActive = false;
                    }
                    else
                    {
                        builder.Append(c);
                    }

                    break;
                }

                if (numberOfBrackets <= 0)
                {
                    break;
                }
            }

            token = new StringToken(builder.ToString());

            return(true);
        }
Exemple #7
0
        public void ShowText(IInputBytes bytes)
        {
            var currentState = GetCurrentState();

            var font = currentState.FontState.FromExtendedGraphicsState ? activeExtendedGraphicsStateFont : resourceStore.GetFont(currentState.FontState.FontName);

            if (font == null)
            {
                throw new InvalidOperationException($"Could not find the font with name {currentState.FontState.FontName} in the resource store. It has not been loaded yet.");
            }

            var fontSize          = currentState.FontState.FontSize;
            var horizontalScaling = currentState.FontState.HorizontalScaling / 100.0;
            var characterSpacing  = currentState.FontState.CharacterSpacing;
            var rise = currentState.FontState.Rise;

            var transformationMatrix = currentState.CurrentTransformationMatrix;

            var renderingMatrix =
                TransformationMatrix.FromValues(fontSize * horizontalScaling, 0, 0, fontSize, 0, rise);

            // TODO: this does not seem correct, produces the correct result for now but we need to revisit.
            // see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points
            var pointSize = Math.Round(rotation.Rotate(transformationMatrix).Multiply(TextMatrices.TextMatrix).Multiply(fontSize).A, 2);

            if (pointSize < 0)
            {
                pointSize *= -1;
            }

            while (bytes.MoveNext())
            {
                var code = font.ReadCharacterCode(bytes, out int codeLength);

                var foundUnicode = font.TryGetUnicode(code, out var unicode);

                if (!foundUnicode || unicode == null)
                {
                    log.Warn($"We could not find the corresponding character with code {code} in font {font.Name}.");
                    // Try casting directly to string as in PDFBox 1.8.
                    unicode = new string((char)code, 1);
                }

                var wordSpacing = 0.0;
                if (code == ' ' && codeLength == 1)
                {
                    wordSpacing += GetCurrentState().FontState.WordSpacing;
                }

                var textMatrix = TextMatrices.TextMatrix;

                if (font.IsVertical)
                {
                    if (!(font is IVerticalWritingSupported verticalFont))
                    {
                        throw new InvalidOperationException($"Font {font.Name} was in vertical writing mode but did not implement {nameof(IVerticalWritingSupported)}.");
                    }

                    var positionVector = verticalFont.GetPositionVector(code);

                    textMatrix = textMatrix.Translate(positionVector.X, positionVector.Y);
                }

                var boundingBox = font.GetBoundingBox(code);

                var transformedGlyphBounds = rotation.Rotate(transformationMatrix)
                                             .Transform(textMatrix
                                                        .Transform(renderingMatrix
                                                                   .Transform(boundingBox.GlyphBounds)));

                var transformedPdfBounds = rotation.Rotate(transformationMatrix)
                                           .Transform(textMatrix
                                                      .Transform(renderingMatrix
                                                                 .Transform(new PdfRectangle(0, 0, boundingBox.Width, 0))));

                // If the text rendering mode calls for filling, the current nonstroking color in the graphics state is used;
                // if it calls for stroking, the current stroking color is used.
                // In modes that perform both filling and stroking, the effect is as if each glyph outline were filled and then stroked in separate operations.
                // TODO: expose color as something more advanced
                var color = currentState.FontState.TextRenderingMode != TextRenderingMode.Stroke
                    ? currentState.CurrentNonStrokingColor
                    : currentState.CurrentStrokingColor;

                var letter = new Letter(unicode, transformedGlyphBounds,
                                        transformedPdfBounds.BottomLeft,
                                        transformedPdfBounds.BottomRight,
                                        transformedPdfBounds.Width,
                                        fontSize,
                                        font.Name.Data,
                                        color,
                                        pointSize,
                                        textSequence);

                letters.Add(letter);

                markedContentStack.AddLetter(letter);

                double tx, ty;
                if (font.IsVertical)
                {
                    var verticalFont = (IVerticalWritingSupported)font;
                    var displacement = verticalFont.GetDisplacementVector(code);
                    tx = 0;
                    ty = (displacement.Y * fontSize) + characterSpacing + wordSpacing;
                }
                else
                {
                    tx = (boundingBox.Width * fontSize + characterSpacing + wordSpacing) * horizontalScaling;
                    ty = 0;
                }

                TextMatrices.TextMatrix = TextMatrices.TextMatrix.Translate(tx, ty);
            }
        }
Exemple #8
0
        public CMap Parse(IInputBytes inputBytes, bool isLenientParsing)
        {
            var scanner = new CoreTokenScanner(inputBytes);

            var builder = new CharacterMapBuilder();

            IToken previousToken = null;

            while (scanner.MoveNext())
            {
                var token = scanner.CurrentToken;

                if (token is OperatorToken operatorToken)
                {
                    switch (operatorToken.Data)
                    {
                    case "usecmap":
                    {
                        if (previousToken is NameToken name)
                        {
                            var external = ParseExternal(name.Data);

                            builder.UseCMap(external);
                        }
                        else
                        {
                            throw new InvalidOperationException("Unexpected token preceding external cmap call: " + previousToken);
                        }
                        break;
                    }

                    case "begincodespacerange":
                    {
                        if (previousToken is NumericToken numeric)
                        {
                            CodespaceRangeParser.Parse(numeric, scanner, builder, isLenientParsing);
                        }
                        else
                        {
                            throw new InvalidOperationException("Unexpected token preceding start of codespace range: " + previousToken);
                        }
                    }
                    break;

                    case "beginbfchar":
                    {
                        if (previousToken is NumericToken numeric)
                        {
                            BaseFontCharacterParser.Parse(numeric, scanner, builder, isLenientParsing);
                        }
                        else
                        {
                            throw new InvalidOperationException("Unexpected token preceding start of base font characters: " + previousToken);
                        }
                    }
                    break;

                    case "beginbfrange":
                    {
                        if (previousToken is NumericToken numeric)
                        {
                            BaseFontRangeParser.Parse(numeric, scanner, builder, isLenientParsing);
                        }
                        else
                        {
                            throw new InvalidOperationException("Unexpected token preceding start of base font character ranges: " + previousToken);
                        }
                    }
                    break;

                    case "begincidchar":
                    {
                        if (previousToken is NumericToken numeric)
                        {
                            CidCharacterParser.Parse(numeric, scanner, builder, isLenientParsing);
                        }
                        else
                        {
                            throw new InvalidOperationException("Unexpected token preceding start of Cid character mapping: " + previousToken);
                        }
                        break;
                    }

                    case "begincidrange":
                    {
                        if (previousToken is NumericToken numeric)
                        {
                            CidRangeParser.Parse(numeric, scanner, builder, isLenientParsing);
                        }
                        else
                        {
                            throw new InvalidOperationException("Unexpected token preceding start of Cid ranges: " + previousToken);
                        }
                    }
                    break;
                    }
                }
                else if (token is NameToken name)
                {
                    CidFontNameParser.Parse(name, scanner, builder, isLenientParsing);
                }

                previousToken = token;
            }

            return(builder.Build());
        }
 public TrueTypeDataBytes(IInputBytes inputBytes)
 {
     this.inputBytes = inputBytes;
 }
Exemple #10
0
 public Type1Tokenizer(IInputBytes bytes)
 {
     this.bytes   = bytes;
     comments     = new List <string>();
     CurrentToken = ReadNextToken();
 }
Exemple #11
0
 public int ReadCode(IInputBytes inputBytes)
 {
     return(cMap.ReadCode(inputBytes));
 }
        public void ShowText(IInputBytes bytes)
        {
            var currentState = GetCurrentState();

            var font = currentState.FontState.FromExtendedGraphicsState ? activeExtendedGraphicsStateFont : resourceStore.GetFont(currentState.FontState.FontName);

            if (font == null)
            {
                throw new InvalidOperationException($"Could not find the font with name {currentState.FontState.FontName} in the resource store. It has not been loaded yet.");
            }

            var fontSize          = currentState.FontState.FontSize;
            var horizontalScaling = currentState.FontState.HorizontalScaling / 100m;
            var characterSpacing  = currentState.FontState.CharacterSpacing;
            var rise = currentState.FontState.Rise;

            var transformationMatrix = currentState.CurrentTransformationMatrix;

            var renderingMatrix =
                TransformationMatrix.FromValues(fontSize * horizontalScaling, 0, 0, fontSize, 0, rise);

            // TODO: this does not seem correct, produces the correct result for now but we need to revisit.
            // see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points
            var pointSize = decimal.Round(rotation.Rotate(transformationMatrix).Multiply(TextMatrices.TextMatrix).Multiply(fontSize).A, 2);

            while (bytes.MoveNext())
            {
                var code = font.ReadCharacterCode(bytes, out int codeLength);

                var foundUnicode = font.TryGetUnicode(code, out var unicode);

                if (!foundUnicode || unicode == null)
                {
                    log.Warn($"We could not find the corresponding character with code {code} in font {font.Name}.");
                    // Try casting directly to string as in PDFBox 1.8.
                    unicode = new string((char)code, 1);
                }

                var wordSpacing = 0m;
                if (code == ' ' && codeLength == 1)
                {
                    wordSpacing += GetCurrentState().FontState.WordSpacing;
                }

                if (font.IsVertical)
                {
                    throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file.");
                }

                var boundingBox = font.GetBoundingBox(code);

                var transformedGlyphBounds = rotation.Rotate(transformationMatrix)
                                             .Transform(TextMatrices.TextMatrix
                                                        .Transform(renderingMatrix
                                                                   .Transform(boundingBox.GlyphBounds)));
                var transformedPdfBounds = rotation.Rotate(transformationMatrix)
                                           .Transform(TextMatrices.TextMatrix
                                                      .Transform(renderingMatrix.Transform(new PdfRectangle(0, 0, boundingBox.Width, 0))));

                ShowGlyph(font, transformedGlyphBounds, transformedPdfBounds.BottomLeft, transformedPdfBounds.BottomRight, transformedPdfBounds.Width, unicode, fontSize, pointSize);

                decimal tx, ty;
                if (font.IsVertical)
                {
                    tx = 0;
                    ty = boundingBox.GlyphBounds.Height * fontSize + characterSpacing + wordSpacing;
                }
                else
                {
                    tx = (boundingBox.Width * fontSize + characterSpacing + wordSpacing) * horizontalScaling;
                    ty = 0;
                }

                var translate = TransformationMatrix.GetTranslationMatrix(tx, ty);

                TextMatrices.TextMatrix = translate.Multiply(TextMatrices.TextMatrix);
            }
        }
Exemple #13
0
        /// <summary>
        /// Check that the offsets in the cross reference are correct.
        /// </summary>
        public void CheckCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable xrefTrailerResolver, bool isLenientParsing)
        {
            // repair mode isn't available in non-lenient mode
            if (!isLenientParsing)
            {
                return;
            }

            Dictionary <IndirectReference, long> xrefOffset = xrefTrailerResolver.ObjectOffsets.ToDictionary(x => x.Key, x => x.Value);

            if (ValidateXrefOffsets(bytes, xrefOffset))
            {
                return;
            }

            IReadOnlyDictionary <IndirectReference, long> bfCOSObjectKeyOffsets = getBFCosObjectOffsets();

            if (bfCOSObjectKeyOffsets.Count > 0)
            {
                List <IndirectReference> objStreams = new List <IndirectReference>();
                // find all object streams
                foreach (var entry in xrefOffset)
                {
                    long offset = entry.Value;
                    if (offset < 0)
                    {
                        IndirectReference objStream = new IndirectReference(-offset, 0);
                        if (!objStreams.Contains(objStream))
                        {
                            objStreams.Add(new IndirectReference(-offset, 0));
                        }
                    }
                }
                // remove all found object streams
                if (objStreams.Count > 0)
                {
                    foreach (IndirectReference key in objStreams)
                    {
                        if (bfCOSObjectKeyOffsets.ContainsKey(key))
                        {
                            // remove all parsed objects which are part of an object stream
                            //ISet<long> objects = xrefTrailerResolver
                            //    .getContainedObjectNumbers((int)(key.Number));
                            //foreach (long objNr in objects)
                            //{
                            //    CosObjectKey streamObjectKey = new CosObjectKey(objNr, 0);

                            //    if (bfCOSObjectKeyOffsets.TryGetValue(streamObjectKey, out long streamObjectOffset) && streamObjectOffset > 0)
                            //    {
                            //        bfCOSObjectKeyOffsets.Remove(streamObjectKey);
                            //    }
                            //}
                        }
                        else
                        {
                            // remove all objects which are part of an object stream which wasn't found
                            //ISet<long> objects = xrefTrailerResolver
                            //    .getContainedObjectNumbers((int)(key.Number));
                            //foreach (long objNr in objects)
                            //{
                            //    xrefOffset.Remove(new CosObjectKey(objNr, 0));
                            //}
                        }
                    }
                }

                foreach (var item in bfCOSObjectKeyOffsets)
                {
                    xrefOffset[item.Key] = item.Value;
                }
            }
        }
Exemple #14
0
 public void ShowText(IInputBytes bytes)
 {
 }
        private static double ReadDouble(IInputBytes input, StringBuilder stringBuilder)
        {
            var dec = ReadDecimal(input, stringBuilder);

            return((double)dec);
        }
Exemple #16
0
        /// <summary>
        /// Where an entire PFB file has been embedded in the PDF we read the header first.
        /// </summary>
        private static (byte[] ascii, byte[] binary) ReadPfbHeader(IInputBytes bytes)
        {
            /*
             * The header is a 6 byte sequence. The first byte is 0x80 followed by 0x01 for the ASCII record indicator.
             * The following 4 bytes determine the size/length of the ASCII part of the PFB file.
             * After the ASCII part another 6 byte sequence is present, this time 0x80 0x02 for the Binary part length.
             * A 3rd sequence is present at the end re-stating the ASCII length but this is surplus to requirements.
             */

            // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Local
            int ReadSize(byte recordType)
            {
                bytes.MoveNext();

                if (bytes.CurrentByte != PfbFileIndicator)
                {
                    throw new InvalidOperationException($"File does not start with 0x80, which indicates a full PFB file. Instead got: {bytes.CurrentByte}");
                }

                bytes.MoveNext();

                if (bytes.CurrentByte != recordType)
                {
                    throw new InvalidOperationException($"Encountered unexpected header type in the PFB file: {bytes.CurrentByte}");
                }

                bytes.MoveNext();
                int size = bytes.CurrentByte;

                bytes.MoveNext();
                size += bytes.CurrentByte << 8;
                bytes.MoveNext();
                size += bytes.CurrentByte << 16;
                bytes.MoveNext();
                size += bytes.CurrentByte << 24;

                return(size);
            }

            var asciiSize = ReadSize(0x01);
            var asciiPart = new byte[asciiSize];

            int i = 0;

            while (i < asciiSize)
            {
                bytes.MoveNext();
                asciiPart[i] = bytes.CurrentByte;
                i++;
            }

            var binarySize = ReadSize(0x02);

            var binaryPart = new byte[binarySize];

            i = 0;

            while (i < binarySize)
            {
                bytes.MoveNext();
                binaryPart[i] = bytes.CurrentByte;
                i++;
            }

            return(asciiPart, binaryPart);
        }
        private static AdobeFontMetricsIndividualCharacterMetric ReadCharacterMetric(IInputBytes bytes, StringBuilder stringBuilder)
        {
            var line = ReadLine(bytes, stringBuilder);

            var split = line.Split(IndividualCharmetricsSplit, StringSplitOptions.RemoveEmptyEntries);

            var metric = new AdobeFontMetricsIndividualCharacterMetricBuilder();

            foreach (var s in split)
            {
                var parts = s.Split(CharmetricsKeySplit, StringSplitOptions.RemoveEmptyEntries);

                switch (parts[0])
                {
                case CharmetricsC:
                {
                    var code = int.Parse(parts[1], CultureInfo.InvariantCulture);
                    metric.CharacterCode = code;
                    break;
                }

                case CharmetricsCh:
                {
                    var code = int.Parse(parts[1], NumberStyles.HexNumber, CultureInfo.InvariantCulture);
                    metric.CharacterCode = code;
                    break;
                }

                case CharmetricsWx:
                {
                    metric.WidthX = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsW0X:
                {
                    metric.WidthXDirection0 = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsW1X:
                {
                    metric.WidthXDirection1 = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsWy:
                {
                    metric.WidthY = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsW0Y:
                {
                    metric.WidthYDirection0 = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsW1Y:
                {
                    metric.WidthYDirection1 = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsW:
                {
                    metric.WidthX = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    metric.WidthY = double.Parse(parts[2], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsW0:
                {
                    metric.WidthXDirection0 = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    metric.WidthYDirection0 = double.Parse(parts[2], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsW1:
                {
                    metric.WidthXDirection1 = double.Parse(parts[1], CultureInfo.InvariantCulture);
                    metric.WidthYDirection1 = double.Parse(parts[2], CultureInfo.InvariantCulture);
                    break;
                }

                case CharmetricsVv:
                {
                    metric.VVector = new AdobeFontMetricsVector(double.Parse(parts[1], CultureInfo.InvariantCulture),
                                                                double.Parse(parts[2], CultureInfo.InvariantCulture));
                    break;
                }

                case CharmetricsN:
                {
                    metric.Name = parts[1];
                    break;
                }

                case CharmetricsB:
                {
                    metric.BoundingBox = new PdfRectangle(double.Parse(parts[1], CultureInfo.InvariantCulture),
                                                          double.Parse(parts[2], CultureInfo.InvariantCulture),
                                                          double.Parse(parts[3], CultureInfo.InvariantCulture),
                                                          double.Parse(parts[4], CultureInfo.InvariantCulture));
                    break;
                }

                case CharmetricsL:
                {
                    metric.Ligature = new AdobeFontMetricsLigature(parts[1], parts[2]);
                    break;
                }

                default:
                    throw new InvalidFontFormatException($"Unknown CharMetrics command '{parts[0]}'.");
                }
            }

            return(metric.Build());
        }
Exemple #18
0
        /// <summary>
        /// Parses an embedded Adobe Type 1 font file.
        /// </summary>
        /// <param name="inputBytes">The bytes of the font program.</param>
        /// <param name="length1">The length in bytes of the clear text portion of the font program.</param>
        /// <param name="length2">The length in bytes of the encrypted portion of the font program.</param>
        /// <returns>The parsed type 1 font.</returns>
        public Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
        {
            // Sometimes the entire PFB file including the header bytes can be included which prevents parsing in the normal way.
            var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator;

            IReadOnlyList <byte> eexecPortion = new byte[0];

            if (isEntirePfbFile)
            {
                var(ascii, binary) = ReadPfbHeader(inputBytes);

                eexecPortion = binary;
                inputBytes   = new ByteArrayInputBytes(ascii);
            }

            var scanner = new CoreTokenScanner(inputBytes);

            if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
            {
                throw new InvalidFontFormatException("The Type1 program did not start with '%!'.");
            }

            string name;
            var    parts = comment.Data.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

            if (parts.Length == 3)
            {
                name = parts[1];
            }
            else
            {
                name = "Unknown";
            }

            var comments = new List <string>();

            while (scanner.MoveNext() && scanner.CurrentToken is CommentToken commentToken)
            {
                comments.Add(commentToken.Data);
            }

            var dictionaries = new List <DictionaryToken>();

            // Override arrays and names since type 1 handles these differently.
            var arrayTokenizer = new Type1ArrayTokenizer();
            var nameTokenizer  = new Type1NameTokenizer();

            scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
            scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);

            try
            {
                var tempEexecPortion = new List <byte>();
                var tokenSet         = new PreviousTokenSet();
                tokenSet.Add(scanner.CurrentToken);
                while (scanner.MoveNext())
                {
                    if (scanner.CurrentToken is OperatorToken operatorToken)
                    {
                        if (Equals(scanner.CurrentToken, OperatorToken.Eexec))
                        {
                            int offset = 0;

                            while (inputBytes.MoveNext())
                            {
                                if (inputBytes.CurrentByte == (byte)ClearToMark[offset])
                                {
                                    offset++;
                                }
                                else
                                {
                                    if (offset > 0)
                                    {
                                        for (int i = 0; i < offset; i++)
                                        {
                                            tempEexecPortion.Add((byte)ClearToMark[i]);
                                        }
                                    }

                                    offset = 0;
                                }

                                if (offset == ClearToMark.Length)
                                {
                                    break;
                                }

                                if (offset > 0)
                                {
                                    continue;
                                }

                                tempEexecPortion.Add(inputBytes.CurrentByte);
                            }
                        }
                        else
                        {
                            HandleOperator(operatorToken, scanner, tokenSet, dictionaries);
                        }
                    }

                    tokenSet.Add(scanner.CurrentToken);
                }

                if (!isEntirePfbFile)
                {
                    eexecPortion = tempEexecPortion;
                }
            }
            finally
            {
                scanner.DeregisterCustomTokenizer(arrayTokenizer);
                scanner.DeregisterCustomTokenizer(nameTokenizer);
            }

            var encoding    = GetEncoding(dictionaries);
            var matrix      = GetFontMatrix(dictionaries);
            var boundingBox = GetBoundingBox(dictionaries);

            encryptedPortionParser.Parse(eexecPortion);

            return(new Type1Font(name, encoding, matrix, boundingBox ?? new PdfRectangle()));
        }
Exemple #19
0
 public BruteForceSearcher([NotNull] IInputBytes bytes)
 {
     this.bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
 }
Exemple #20
0
        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
        {
            token = null;

            StringBuilder characters;

            if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.')
            {
                characters = StringBuilderPool.Borrow();
                characters.Append((char)currentByte);
            }
            else
            {
                return(false);
            }

            while (inputBytes.MoveNext())
            {
                var b = inputBytes.CurrentByte;

                if ((b >= Zero && b <= Nine) ||
                    b == '-' ||
                    b == '+' ||
                    b == '.' ||
                    b == 'E' ||
                    b == 'e')
                {
                    characters.Append((char)b);
                }
                else
                {
                    break;
                }
            }

            try
            {
                var str = characters.ToString();
                StringBuilderPool.Return(characters);

                switch (str)
                {
                case "-":
                case ".":
                case "0":
                    token = NumericToken.Zero;
                    return(true);

                case "1":
                    token = NumericToken.One;
                    return(true);

                case "2":
                    token = NumericToken.Two;
                    return(true);

                case "3":
                    token = NumericToken.Three;
                    return(true);

                case "4":
                    token = NumericToken.Four;
                    return(true);

                case "5":
                    token = NumericToken.Five;
                    return(true);

                case "6":
                    token = NumericToken.Six;
                    return(true);

                case "7":
                    token = NumericToken.Seven;
                    return(true);

                case "8":
                    token = NumericToken.Eight;
                    return(true);

                case "9":
                    token = NumericToken.Nine;
                    return(true);

                case "10":
                    token = NumericToken.Ten;
                    return(true);

                case "100":
                    token = NumericToken.OneHundred;
                    return(true);

                case "1000":
                    token = NumericToken.OneThousand;
                    return(true);

                default:
                    if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value))
                    {
                        return(false);
                    }

                    token = new NumericToken(value);
                    return(true);
                }
            }
            catch (FormatException)
            {
                return(false);
            }
            catch (OverflowException)
            {
                return(false);
            }
        }
Exemple #21
0
        public IReadOnlyList <IGraphicsStateOperation> Parse(int pageNumber, IInputBytes inputBytes)
        {
            var scanner = new CoreTokenScanner(inputBytes);

            var precedingTokens         = new List <IToken>();
            var graphicsStateOperations = new List <IGraphicsStateOperation>();

            var lastEndImageOffset = new long?();

            while (scanner.MoveNext())
            {
                var token = scanner.CurrentToken;

                if (token is InlineImageDataToken inlineImageData)
                {
                    var dictionary = new Dictionary <NameToken, IToken>();

                    for (var i = 0; i < precedingTokens.Count - 1; i++)
                    {
                        var t = precedingTokens[i];
                        if (!(t is NameToken n))
                        {
                            continue;
                        }

                        i++;

                        dictionary[n] = precedingTokens[i];
                    }

                    graphicsStateOperations.Add(new BeginInlineImageData(dictionary));
                    graphicsStateOperations.Add(new EndInlineImage(inlineImageData.Data));

                    lastEndImageOffset = scanner.CurrentPosition - 2;

                    precedingTokens.Clear();
                }
                else if (token is OperatorToken op)
                {
                    // Handle an end image where the stream of image data contained EI but was not actually a real end image operator.
                    if (op.Data == "EI")
                    {
                        // Check an end image operation was the last thing that happened.
                        IGraphicsStateOperation lastOperation = graphicsStateOperations.Count > 0
                            ? graphicsStateOperations[graphicsStateOperations.Count - 1]
                            : null;

                        if (lastEndImageOffset == null || lastOperation == null || !(lastOperation is EndInlineImage lastEndImage))
                        {
                            throw new PdfDocumentFormatException("Encountered End Image token outside an inline image on " +
                                                                 $"page {pageNumber} at offset in content: {scanner.CurrentPosition}.");
                        }

                        // Work out how much data we missed between the false EI operator and the actual one.
                        var actualEndImageOffset = scanner.CurrentPosition - 3;

                        var gap = (int)(actualEndImageOffset - lastEndImageOffset);

                        var from = inputBytes.CurrentOffset;
                        inputBytes.Seek(lastEndImageOffset.Value);

                        // Recover the full image data.
                        {
                            var missingData = new byte[gap];
                            var read        = inputBytes.Read(missingData);
                            if (read != gap)
                            {
                                throw new InvalidOperationException($"Failed to read expected buffer length {gap} on page {pageNumber} " +
                                                                    $"when reading inline image at offset in content: {lastEndImageOffset.Value}.");
                            }

                            // Replace the last end image operator with one containing the full set of data.
                            graphicsStateOperations.Remove(lastEndImage);
                            graphicsStateOperations.Add(new EndInlineImage(lastEndImage.ImageData.Concat(missingData).ToArray()));
                        }

                        lastEndImageOffset = actualEndImageOffset;

                        inputBytes.Seek(from);
                    }
                    else
                    {
                        var operation = operationFactory.Create(op, precedingTokens);

                        if (operation != null)
                        {
                            graphicsStateOperations.Add(operation);
                        }
                    }

                    precedingTokens.Clear();
                }
                else if (token is CommentToken)
                {
                }
                else
                {
                    precedingTokens.Add(token);
                }
            }

            return(graphicsStateOperations);
        }
 public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
 {
     codeLength = 1;
     return(bytes.CurrentByte);
 }
        public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation,
                                         long offsetCorrection,
                                         IPdfTokenScanner pdfScanner,
                                         ISeekableTokenScanner tokenScanner)
        {
            long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing);

            if (fixedOffset > -1)
            {
                crossReferenceLocation = fixedOffset;

                log.Debug($"Found the first cross reference table or stream at {fixedOffset}.");
            }

            var table = new CrossReferenceTableBuilder();

            var  prevSet = new HashSet <long>();
            long previousCrossReferenceLocation = crossReferenceLocation;

            var missedAttempts = 0;

            // Parse all cross reference tables and streams.
            while (previousCrossReferenceLocation > 0 && missedAttempts < 100)
            {
                log.Debug($"Reading cross reference table or stream at {previousCrossReferenceLocation}.");

                if (previousCrossReferenceLocation >= bytes.Length)
                {
                    break;
                }

                // seek to xref table
                tokenScanner.Seek(previousCrossReferenceLocation);

                tokenScanner.MoveNext();

                if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref")
                {
                    missedAttempts = 0;
                    log.Debug("Element was cross reference table.");

                    CrossReferenceTablePart tablePart = CrossReferenceTableParser.Parse(tokenScanner,
                                                                                        previousCrossReferenceLocation, isLenientParsing);

                    var nextOffset = tablePart.GetPreviousOffset();

                    if (nextOffset >= 0)
                    {
                        nextOffset += offsetCorrection;
                    }

                    previousCrossReferenceLocation = nextOffset;

                    DictionaryToken tableDictionary = tablePart.Dictionary;

                    CrossReferenceTablePart streamPart = null;

                    // check for a XRef stream, it may contain some object ids of compressed objects
                    if (tableDictionary.ContainsKey(NameToken.XrefStm))
                    {
                        log.Debug("Cross reference table contained referenced to stream. Reading the stream.");

                        int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int;

                        // check the xref stream reference
                        fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing);
                        if (fixedOffset > -1 && fixedOffset != streamOffset)
                        {
                            log.Warn($"/XRefStm offset {streamOffset} is incorrect, corrected to {fixedOffset}");

                            streamOffset = (int)fixedOffset;

                            // Update the cross reference table to be a stream instead.
                            tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset));
                            tablePart       = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset,
                                                                          tablePart.Previous, tableDictionary, tablePart.Type);
                        }

                        // Read the stream from the table.
                        if (streamOffset > 0)
                        {
                            try
                            {
                                TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart);
                            }
                            catch (InvalidOperationException ex)
                            {
                                if (isLenientParsing)
                                {
                                    log.Error("Failed to parse /XRefStm at offset " + streamOffset, ex);
                                }
                                else
                                {
                                    throw;
                                }
                            }
                        }
                        else
                        {
                            if (isLenientParsing)
                            {
                                log.Error("Skipped XRef stream due to a corrupt offset:" + streamOffset);
                            }
                            else
                            {
                                throw new PdfDocumentFormatException("Skipped XRef stream due to a corrupt offset:" + streamOffset);
                            }
                        }
                    }

                    table.Add(tablePart);

                    if (streamPart != null)
                    {
                        table.Add(streamPart);
                    }
                }
Exemple #24
0
        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
        {
            token = null;

            StringBuilder characters;

            if ((currentByte >= '0' && currentByte <= '9') || currentByte == '-' || currentByte == '+' || currentByte == '.')
            {
                characters = new StringBuilder();
                characters.Append((char)currentByte);
            }
            else
            {
                return(false);
            }

            while (inputBytes.MoveNext())
            {
                var b = inputBytes.CurrentByte;
                var c = (char)b;

                if (char.IsDigit(c) ||
                    c == '-' ||
                    c == '+' ||
                    c == '.' ||
                    c == 'E' ||
                    c == 'e')
                {
                    characters.Append(c);
                }
                else
                {
                    break;
                }
            }

            decimal value;

            try
            {
                if (characters.Length == 1 && (characters[0] == '-' || characters[0] == '.'))
                {
                    value = 0;
                }
                else
                {
                    value = decimal.Parse(characters.ToString(), NumberStyles.Any, CultureInfo.InvariantCulture);
                }
            }
            catch (FormatException)
            {
                return(false);
            }
            catch (OverflowException)
            {
                return(false);
            }

            token = new NumericToken(value);

            return(true);
        }
Exemple #25
0
        public long CheckXRefOffset(long startXRefOffset, ISeekableTokenScanner scanner, IInputBytes inputBytes, bool isLenientParsing)
        {
            // repair mode isn't available in non-lenient mode
            if (!isLenientParsing)
            {
                return(startXRefOffset);
            }

            scanner.Seek(startXRefOffset);

            scanner.MoveNext();

            if (ReferenceEquals(scanner.CurrentToken, OperatorToken.Xref))
            {
                return(startXRefOffset);
            }

            if (startXRefOffset > 0)
            {
                if (CheckXRefStreamOffset(startXRefOffset, scanner, true))
                {
                    return(startXRefOffset);
                }

                return(CalculateXRefFixedOffset(startXRefOffset, scanner, inputBytes));
            }

            // can't find a valid offset
            return(-1);
        }
        /// <summary>
        /// Parse the font metrics from the input bytes.
        /// </summary>
        public static AdobeFontMetrics Parse(IInputBytes bytes, bool useReducedDataSet)
        {
            var stringBuilder = new StringBuilder();

            var token = ReadString(bytes, stringBuilder);

            if (!string.Equals(StartFontMetrics, token, StringComparison.OrdinalIgnoreCase))
            {
                throw new InvalidFontFormatException($"The AFM file was not valid, it did not start with {StartFontMetrics}.");
            }

            var version = ReadDecimal(bytes, stringBuilder);

            var builder = new AdobeFontMetricsBuilder(version);

            while ((token = ReadString(bytes, stringBuilder)) != EndFontMetrics)
            {
                switch (token)
                {
                case Comment:
                    builder.Comments.Add(ReadLine(bytes, stringBuilder));
                    break;

                case FontName:
                    builder.FontName = ReadLine(bytes, stringBuilder);
                    break;

                case FullName:
                    builder.FullName = ReadLine(bytes, stringBuilder);
                    break;

                case FamilyName:
                    builder.FamilyName = ReadLine(bytes, stringBuilder);
                    break;

                case Weight:
                    builder.Weight = ReadLine(bytes, stringBuilder);
                    break;

                case ItalicAngle:
                    builder.ItalicAngle = ReadDecimal(bytes, stringBuilder);
                    break;

                case IsFixedPitch:
                    builder.IsFixedPitch = ReadBool(bytes, stringBuilder);
                    break;

                case FontBbox:
                    builder.SetBoundingBox(ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder),
                                           ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder));
                    break;

                case UnderlinePosition:
                    builder.UnderlinePosition = ReadDecimal(bytes, stringBuilder);
                    break;

                case UnderlineThickness:
                    builder.UnderlineThickness = ReadDecimal(bytes, stringBuilder);
                    break;

                case Version:
                    builder.Version = ReadLine(bytes, stringBuilder);
                    break;

                case Notice:
                    builder.Notice = ReadLine(bytes, stringBuilder);
                    break;

                case EncodingScheme:
                    builder.EncodingScheme = ReadLine(bytes, stringBuilder);
                    break;

                case MappingScheme:
                    builder.MappingScheme = (int)ReadDecimal(bytes, stringBuilder);
                    break;

                case CharacterSet:
                    builder.CharacterSet = ReadLine(bytes, stringBuilder);
                    break;

                case EscChar:
                    builder.EscapeCharacter = (int)ReadDecimal(bytes, stringBuilder);
                    break;

                case Characters:
                    builder.Characters = (int)ReadDecimal(bytes, stringBuilder);
                    break;

                case IsBaseFont:
                    builder.IsBaseFont = ReadBool(bytes, stringBuilder);
                    break;

                case CapHeight:
                    builder.CapHeight = ReadDecimal(bytes, stringBuilder);
                    break;

                case XHeight:
                    builder.XHeight = ReadDecimal(bytes, stringBuilder);
                    break;

                case Ascender:
                    builder.Ascender = ReadDecimal(bytes, stringBuilder);
                    break;

                case Descender:
                    builder.Descender = ReadDecimal(bytes, stringBuilder);
                    break;

                case StdHw:
                    builder.StdHw = ReadDecimal(bytes, stringBuilder);
                    break;

                case StdVw:
                    builder.StdVw = ReadDecimal(bytes, stringBuilder);
                    break;

                case CharWidth:
                    builder.SetCharacterWidth(ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder));
                    break;

                case VVector:
                    builder.SetVVector(ReadDouble(bytes, stringBuilder), ReadDouble(bytes, stringBuilder));
                    break;

                case IsFixedV:
                    builder.IsFixedV = ReadBool(bytes, stringBuilder);
                    break;

                case StartCharMetrics:
                    var count = (int)ReadDecimal(bytes, stringBuilder);
                    for (var i = 0; i < count; i++)
                    {
                        var metric = ReadCharacterMetric(bytes, stringBuilder);
                        builder.CharacterMetrics.Add(metric);
                    }

                    var end = ReadString(bytes, stringBuilder);
                    if (end != EndCharMetrics)
                    {
                        throw new InvalidFontFormatException($"The character metrics section did not end with {EndCharMetrics} instead it was {end}.");
                    }

                    break;

                case StartKernData:
                    break;
                }
            }

            return(builder.Build());
        }
Exemple #27
0
        private long BruteForceSearchForXref(long xrefOffset, ISeekableTokenScanner scanner, IInputBytes reader)
        {
            long newOffset       = -1;
            long newOffsetTable  = -1;
            long newOffsetStream = -1;

            BruteForceSearchForTables(reader);

            BfSearchForXRefStreams(reader);

            if (bfSearchXRefTablesOffsets != null)
            {
                // TODO to be optimized, this won't work in every case
                newOffsetTable = SearchNearestValue(bfSearchXRefTablesOffsets, xrefOffset);
            }
            if (bfSearchXRefStreamsOffsets != null)
            {
                // TODO to be optimized, this won't work in every case
                newOffsetStream = SearchNearestValue(bfSearchXRefStreamsOffsets, xrefOffset);
            }
            // choose the nearest value
            if (newOffsetTable > -1 && newOffsetStream > -1)
            {
                long differenceTable  = xrefOffset - newOffsetTable;
                long differenceStream = xrefOffset - newOffsetStream;
                if (Math.Abs(differenceTable) > Math.Abs(differenceStream))
                {
                    newOffset = newOffsetStream;
                    bfSearchXRefStreamsOffsets.Remove(newOffsetStream);
                }
                else
                {
                    newOffset = newOffsetTable;
                    bfSearchXRefTablesOffsets.Remove(newOffsetTable);
                }
            }
            else if (newOffsetTable > -1)
            {
                newOffset = newOffsetTable;
                bfSearchXRefTablesOffsets.Remove(newOffsetTable);
            }
            else if (newOffsetStream > -1)
            {
                newOffset = newOffsetStream;
                bfSearchXRefStreamsOffsets.Remove(newOffsetStream);
            }
            return(newOffset);
        }
        private static decimal ReadDecimal(IInputBytes input, StringBuilder stringBuilder)
        {
            var str = ReadString(input, stringBuilder);

            return(decimal.Parse(str, CultureInfo.InvariantCulture));
        }
Exemple #29
0
        private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing,
                                                IReadOnlyList <string> passwords, bool clipPaths)
        {
            var filterProvider = DefaultFilterProvider.Instance;

            CrossReferenceTable crossReferenceTable = null;

            var xrefValidator = new XrefOffsetValidator(log);

            // We're ok with this since our intent is to lazily load the cross reference table.
            // ReSharper disable once AccessToModifiedClosure
            var locationProvider = new ObjectLocationProvider(() => crossReferenceTable, inputBytes);
            var pdfScanner       = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance);

            var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
            var crossReferenceParser       = new CrossReferenceParser(log, xrefValidator, crossReferenceStreamParser);

            var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);

            var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner,
                                                                                      isLenientParsing) + version.OffsetInFile;

            // TODO: make this use the scanner.
            var validator = new CrossReferenceOffsetValidator(xrefValidator);

            crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing);

            crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing,
                                                             crossReferenceOffset,
                                                             version.OffsetInFile,
                                                             pdfScanner,
                                                             scanner);

            var(rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing,
                                                              pdfScanner,
                                                              out var encryptionDictionary);

            var encryptionHandler = encryptionDictionary != null ?
                                    (IEncryptionHandler) new EncryptionHandler(encryptionDictionary, crossReferenceTable.Trailer, passwords)
                : NoOpEncryptionHandler.Instance;

            pdfScanner.UpdateEncryptionHandler(encryptionHandler);

            var cidFontFactory = new CidFontFactory(pdfScanner, filterProvider);
            var encodingReader = new EncodingReader(pdfScanner);

            var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader);

            var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
                                                                        filterProvider, pdfScanner),
                                              new TrueTypeFontHandler(log, pdfScanner, filterProvider, encodingReader, SystemFontFinder.Instance,
                                                                      type1Handler),
                                              type1Handler,
                                              new Type3FontHandler(pdfScanner, filterProvider, encodingReader));

            var resourceContainer = new ResourceStore(pdfScanner, fontFactory);

            var information = DocumentInformationFactory.Create(pdfScanner, crossReferenceTable.Trailer);

            var catalog = CatalogFactory.Create(rootReference, rootDictionary, pdfScanner, isLenientParsing);

            var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
                                              new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
                                              log);

            var caching = new ParsingCachingProviders(resourceContainer);

            var acroFormFactory   = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable);
            var bookmarksProvider = new BookmarksProvider(log, pdfScanner);

            return(new PdfDocument(log, inputBytes, version, crossReferenceTable, caching, pageFactory, catalog, information,
                                   encryptionDictionary,
                                   pdfScanner,
                                   filterProvider,
                                   acroFormFactory,
                                   bookmarksProvider,
                                   clipPaths));
        }
Exemple #30
0
        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
        {
            token = null;

            if (currentByte != '/')
            {
                return(false);
            }

            var bytes = new List <byte>();

            bool escapeActive   = false;
            int  postEscapeRead = 0;
            var  escapedChars   = new char[2];

            while (inputBytes.MoveNext())
            {
                var b = inputBytes.CurrentByte;

                if (b == '#')
                {
                    escapeActive = true;
                }
                else if (escapeActive)
                {
                    if (ReadHelper.IsHex((char)b))
                    {
                        escapedChars[postEscapeRead] = (char)b;
                        postEscapeRead++;

                        if (postEscapeRead == 2)
                        {
                            var hex = new string(escapedChars);

                            var characterToWrite = (byte)Convert.ToInt32(hex, 16);
                            bytes.Add(characterToWrite);

                            escapeActive   = false;
                            postEscapeRead = 0;
                        }
                    }
                    else
                    {
                        bytes.Add((byte)'#');

                        if (postEscapeRead == 1)
                        {
                            bytes.Add((byte)escapedChars[0]);
                        }

                        if (ReadHelper.IsEndOfName(b))
                        {
                            break;
                        }

                        if (b == '#')
                        {
                            // Make it clear what's going on, we read something like #m#AE
                            // ReSharper disable once RedundantAssignment
                            escapeActive   = true;
                            postEscapeRead = 0;
                            continue;
                        }

                        bytes.Add(b);
                        escapeActive   = false;
                        postEscapeRead = 0;
                    }
                }
                else if (ReadHelper.IsEndOfName(b))
                {
                    break;
                }
                else
                {
                    bytes.Add(b);
                }
            }

            byte[] byteArray = bytes.ToArray();

            var str = ReadHelper.IsValidUtf8(byteArray)
                ? Encoding.UTF8.GetString(byteArray)
                : Encoding.GetEncoding("windows-1252").GetString(byteArray);

            token = NameToken.Create(str);

            return(true);
        }