コード例 #1
0
        public bool MoveNext()
        {
            // Read until we find object-number generation obj, e.g. "69 420 obj".
            int tokensRead = 0;

            while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.StartObject))
            {
                if (coreTokenScanner.CurrentToken is CommentToken)
                {
                    continue;
                }

                tokensRead++;

                previousTokens[0]         = previousTokens[1];
                previousTokenPositions[0] = previousTokenPositions[1];

                previousTokens[1]         = coreTokenScanner.CurrentToken;
                previousTokenPositions[1] = coreTokenScanner.CurrentTokenStart;
            }

            // We only read partial tokens.
            if (tokensRead < 2)
            {
                return(false);
            }

            var startPosition = previousTokenPositions[0];
            var objectNumber  = previousTokens[0] as NumericToken;
            var generation    = previousTokens[1] as NumericToken;

            if (objectNumber == null || generation == null)
            {
                throw new PdfDocumentFormatException("The obj operator (start object) was not preceded by a 2 numbers." +
                                                     $"Instead got: {previousTokens[0]} {previousTokens[1]} obj");
            }

            // Read all tokens between obj and endobj.
            while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.EndObject))
            {
                if (coreTokenScanner.CurrentToken is CommentToken)
                {
                    continue;
                }

                if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartObject))
                {
                    // This should never happen.
                    Debug.Assert(false, "Encountered a start object 'obj' operator before the end of the previous object.");
                    return(false);
                }

                if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartStream))
                {
                    // Read stream: special case.
                    if (TryReadStream(coreTokenScanner.CurrentTokenStart, out var stream))
                    {
                        readTokens.Clear();
                        readTokens.Add(stream);
                    }
                }
                else
                {
                    readTokens.Add(coreTokenScanner.CurrentToken);
                }

                previousTokens[0]         = previousTokens[1];
                previousTokenPositions[0] = previousTokenPositions[1];

                previousTokens[1]         = coreTokenScanner.CurrentToken;
                previousTokenPositions[1] = coreTokenScanner.CurrentPosition;
            }

            if (!ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.EndObject))
            {
                readTokens.Clear();
                return(false);
            }

            var reference = new IndirectReference(objectNumber.Long, generation.Int);

            IToken token;

            if (readTokens.Count == 3 && readTokens[0] is NumericToken objNum &&
                readTokens[1] is NumericToken genNum &&
                ReferenceEquals(readTokens[2], OperatorToken.R))
            {
                // I have no idea if this can ever happen.
                token = new IndirectReferenceToken(new IndirectReference(objNum.Long, genNum.Int));
            }
コード例 #2
0
ファイル: PdfTokenScanner.cs プロジェクト: huzhiguan/PdfPig
        public bool MoveNext()
        {
            if (isDisposed)
            {
                throw new ObjectDisposedException(nameof(PdfTokenScanner));
            }

            // Read until we find object-number generation obj, e.g. "69 420 obj".
            int tokensRead = 0;

            while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.StartObject))
            {
                if (coreTokenScanner.CurrentToken is CommentToken)
                {
                    continue;
                }

                tokensRead++;

                previousTokens[0]         = previousTokens[1];
                previousTokenPositions[0] = previousTokenPositions[1];

                previousTokens[1]         = coreTokenScanner.CurrentToken;
                previousTokenPositions[1] = coreTokenScanner.CurrentTokenStart;
            }

            // We only read partial tokens.
            if (tokensRead < 2)
            {
                return(false);
            }

            var startPosition = previousTokenPositions[0];
            var objectNumber  = previousTokens[0] as NumericToken;
            var generation    = previousTokens[1] as NumericToken;

            if (objectNumber == null || generation == null)
            {
                // Handle case where the scanner correctly reads most of an object token but includes too much of the first token
                // specifically %%EOF1 0 obj where scanning starts from 'F'.
                if (generation != null && previousTokens[0] is OperatorToken op)
                {
                    var match = EndsWithNumberRegex.Match(op.Data);

                    if (match.Success && int.TryParse(match.Value, NumberStyles.Any, CultureInfo.InvariantCulture, out var number))
                    {
                        startPosition = previousTokenPositions[0] + match.Index;
                        objectNumber  = new NumericToken(number);
                    }
                    else
                    {
                        return(false);
                    }
                }
                else
                {
                    return(false);
                }
            }

            // Read all tokens between obj and endobj.
            while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.EndObject))
            {
                if (coreTokenScanner.CurrentToken is CommentToken)
                {
                    continue;
                }

                if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartObject))
                {
                    // This should never happen.
                    Debug.Assert(false, "Encountered a start object 'obj' operator before the end of the previous object.");
                    return(false);
                }

                if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartStream))
                {
                    var streamIdentifier = new IndirectReference(objectNumber.Long, generation.Int);

                    // Prevent an infinite loop where a stream's length references the stream or the stream's offset.
                    var getLengthFromFile = !(callingObject.HasValue && callingObject.Value.Equals(streamIdentifier));

                    var outerCallingObject = callingObject;

                    try
                    {
                        callingObject = streamIdentifier;

                        // Read stream: special case.
                        if (TryReadStream(coreTokenScanner.CurrentTokenStart, getLengthFromFile, out var stream))
                        {
                            readTokens.Clear();
                            readTokens.Add(stream);
                        }
                    }
                    finally
                    {
                        callingObject = outerCallingObject;
                    }
                }
                else
                {
                    readTokens.Add(coreTokenScanner.CurrentToken);
                }

                previousTokens[0]         = previousTokens[1];
                previousTokenPositions[0] = previousTokenPositions[1];

                previousTokens[1]         = coreTokenScanner.CurrentToken;
                previousTokenPositions[1] = coreTokenScanner.CurrentPosition;
            }

            if (!ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.EndObject))
            {
                readTokens.Clear();
                return(false);
            }

            var reference = new IndirectReference(objectNumber.Long, generation.Int);

            IToken token;

            if (readTokens.Count == 3 && readTokens[0] is NumericToken objNum &&
                readTokens[1] is NumericToken genNum &&
                ReferenceEquals(readTokens[2], OperatorToken.R))
            {
                // I have no idea if this can ever happen.
                token = new IndirectReferenceToken(new IndirectReference(objNum.Long, genNum.Int));
            }