예제 #1
0
        /// <summary>
        /// Parses through the unfiltered stream and populates the xrefTable HashMap.
        /// </summary>
        public CrossReferenceTablePart Parse(long streamOffset, PdfRawStream stream)
        {
            var w = stream.Dictionary.GetDictionaryObject(CosName.W);

            if (!(w is COSArray format))
            {
                throw new IOException("/W array is missing in Xref stream");
            }

            var objNums = GetObjectNumbers(stream);

            /*
             * Calculating the size of the line in bytes
             */
            int w0       = format.getInt(0);
            int w1       = format.getInt(1);
            int w2       = format.getInt(2);
            int lineSize = w0 + w1 + w2;

            var decoded = stream.Decode(filterProvider);

            var lineCount  = decoded.Length / lineSize;
            var lineNumber = 0;

            var builder = new CrossReferenceTablePartBuilder
            {
                Offset     = streamOffset,
                Previous   = stream.Dictionary.GetLongOrDefault(CosName.PREV),
                Dictionary = stream.Dictionary,
                XRefType   = CrossReferenceType.Stream
            };

            using (IEnumerator <long> objIter = objNums.GetEnumerator())
            {
                var currLine = new byte[lineSize];

                while (lineNumber < lineCount && objIter.MoveNext())
                {
                    var byteOffset = lineNumber * lineSize;
                    for (int i = 0; i < lineSize; i++)
                    {
                        currLine[i] = decoded[byteOffset + i];
                    }

                    int type;
                    if (w0 == 0)
                    {
                        // "If the first element is zero,
                        // the type field shall not be present, and shall default to type 1"
                        type = 1;
                    }
                    else
                    {
                        type = 0;

                        /*
                         * Grabs the number of bytes specified for the first column in
                         * the W array and stores it.
                         */
                        for (int i = 0; i < w0; i++)
                        {
                            type += (currLine[i] & 0x00ff) << ((w0 - i - 1) * 8);
                        }
                    }
                    //Need to remember the current objID
                    long objectId = objIter.Current;

                    /*
                     * 3 different types of entries.
                     */
                    switch (type)
                    {
                    case 0:
                        /*
                         * Skipping free objects
                         */
                        break;

                    case 1:
                        int offset = 0;
                        for (int i = 0; i < w1; i++)
                        {
                            offset += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
                        }
                        int genNum = 0;
                        for (int i = 0; i < w2; i++)
                        {
                            genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8);
                        }

                        builder.Add(objectId, genNum, offset);

                        break;

                    case 2:
                        /*
                         * object stored in object stream:
                         * 2nd argument is object number of object stream
                         * 3rd argument is index of object within object stream
                         *
                         * For sequential PDFParser we do not need this information
                         * because
                         * These objects are handled by the dereferenceObjects() method
                         * since they're only pointing to object numbers
                         *
                         * However for XRef aware parsers we have to know which objects contain
                         * object streams. We will store this information in normal xref mapping
                         * table but add object stream number with minus sign in order to
                         * distinguish from file offsets
                         */
                        int objstmObjNr = 0;
                        for (int i = 0; i < w1; i++)
                        {
                            objstmObjNr += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
                        }

                        builder.Add(objectId, 0, -objstmObjNr);

                        break;
                    }

                    lineNumber++;
                }
            }

            return(builder.Build());
        }
예제 #2
0
        public CrossReferenceTablePart Parse(ISeekableTokenScanner scanner, long offset, bool isLenientParsing)
        {
            var builder = new CrossReferenceTablePartBuilder
            {
                Offset   = offset,
                XRefType = CrossReferenceType.Table
            };

            if (scanner.CurrentPosition != offset)
            {
                scanner.Seek(offset);
            }

            scanner.MoveNext();

            if (scanner.CurrentToken is OperatorToken operatorToken)
            {
                if (operatorToken.Data == "xref")
                {
                    scanner.MoveNext();
                }
                else
                {
                    throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
                }
            }

            if (scanner.CurrentToken is NumericToken firstObjectNumber)
            {
                if (!scanner.TryReadToken(out NumericToken objectCount))
                {
                    throw new PdfDocumentFormatException($"Unexpected token following xref and {firstObjectNumber}. We found: {scanner.CurrentToken}.");
                }

                var definition = new TableSubsectionDefinition(firstObjectNumber.Long, objectCount.Int);

                var tokenizer = new EndOfLineTokenizer();

                scanner.RegisterCustomTokenizer((byte)'\r', tokenizer);
                scanner.RegisterCustomTokenizer((byte)'\n', tokenizer);

                var readingLine = false;
                var tokens      = new List <IToken>();
                var count       = 0;
                while (scanner.MoveNext())
                {
                    if (scanner.CurrentToken is EndOfLineToken)
                    {
                        if (!readingLine)
                        {
                            continue;
                        }

                        readingLine = false;

                        count = ProcessTokens(tokens, scanner, builder, isLenientParsing, count, ref definition);

                        tokens.Clear();

                        continue;
                    }

                    if (scanner.CurrentToken is CommentToken)
                    {
                        continue;
                    }

                    var isLineOperator = scanner.CurrentToken is OperatorToken op && (op.Data == FreeEntry || op.Data == InUseEntry);

                    if (!(scanner.CurrentToken is NumericToken) && !isLineOperator)
                    {
                        break;
                    }

                    readingLine = true;
                    tokens.Add(scanner.CurrentToken);
                }

                if (tokens.Count > 0)
                {
                    ProcessTokens(tokens, scanner, builder, isLenientParsing, count, ref definition);
                }

                scanner.DeregisterCustomTokenizer(tokenizer);
            }

            builder.Dictionary = ParseTrailer(scanner, isLenientParsing);

            return(builder.Build());
        }
예제 #3
0
        /// <summary>
        /// Parses through the unfiltered stream and populates the xrefTable HashMap.
        /// </summary>
        public CrossReferenceTablePart Parse(long streamOffset, StreamToken stream)
        {
            var decoded = stream.Decode(filterProvider);

            var fieldSizes = new CrossReferenceStreamFieldSize(stream.StreamDictionary);

            var lineCount = decoded.Count / fieldSizes.LineLength;

            long previousOffset = -1;

            if (stream.StreamDictionary.TryGet(NameToken.Prev, out var prevToken) && prevToken is NumericToken prevNumeric)
            {
                previousOffset = prevNumeric.Long;
            }

            var builder = new CrossReferenceTablePartBuilder
            {
                Offset     = streamOffset,
                Previous   = previousOffset,
                Dictionary = stream.StreamDictionary,
                XRefType   = CrossReferenceType.Stream
            };

            var objectNumbers = GetObjectNumbers(stream.StreamDictionary);

            var lineNumber = 0;
            var lineBuffer = new byte[fieldSizes.LineLength];

            foreach (var objectNumber in objectNumbers)
            {
                if (lineNumber >= lineCount)
                {
                    break;
                }

                var byteOffset = lineNumber * fieldSizes.LineLength;

                for (var i = 0; i < fieldSizes.LineLength; i++)
                {
                    lineBuffer[i] = decoded[byteOffset + i];
                }

                int type;
                if (fieldSizes.Field1Size == 0)
                {
                    type = 1;
                }
                else
                {
                    type = 0;

                    for (var i = 0; i < fieldSizes.Field1Size; i++)
                    {
                        type += (lineBuffer[i] & 0x00ff) << ((fieldSizes.Field1Size - i - 1) * 8);
                    }
                }

                ReadNextStreamObject(type, objectNumber, fieldSizes, builder, lineBuffer);

                lineNumber++;
            }

            return(builder.Build());
        }