public bool TryParse(IRandomAccessRead source, long offset, bool isLenientParsing, CosObjectPool pool, out CrossReferenceTablePartBuilder builder)
            {
                builder = null;

                var tableStartOffset = source.GetPosition();

                if (source.Peek() != 'x')
                {
                    return(false);
                }

                var xref = ReadHelper.ReadString(source);

                if (!xref.Trim().Equals("xref"))
                {
                    return(false);
                }

                // check for trailer after xref
                var str = ReadHelper.ReadString(source);

                byte[] b = OtherEncodings.StringAsLatin1Bytes(str);

                source.Rewind(b.Length);

                if (str.StartsWith("trailer"))
                {
                    log.Warn("skipping empty xref table");
                    return(false);
                }

                builder = new CrossReferenceTablePartBuilder
                {
                    Offset   = offset,
                    XRefType = CrossReferenceType.Table
                };

                // Tables can have multiple sections. Each starts with a starting object id and a count.
                while (true)
                {
                    if (!TableSubsectionDefinition.TryRead(log, source, out var subsectionDefinition))
                    {
                        log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}");

                        if (isLenientParsing)
                        {
                            break;
                        }

                        return(false);
                    }

                    var currentObjectId = subsectionDefinition.FirstNumber;

                    ReadHelper.SkipSpaces(source);
                    for (var i = 0; i < subsectionDefinition.Count; i++)
                    {
                        if (source.IsEof() || ReadHelper.IsEndOfName((char)source.Peek()))
                        {
                            break;
                        }

                        if (source.Peek() == 't')
                        {
                            break;
                        }

                        //Ignore table contents
                        var currentLine = ReadHelper.ReadLine(source);
                        var splitString = currentLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                        if (splitString.Length < 3)
                        {
                            log.Warn("invalid xref line: " + currentLine);
                            break;
                        }

                        // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n)
                        if (splitString[splitString.Length - 1].Equals(InUseEntry))
                        {
                            try
                            {
                                var objectOffset = long.Parse(splitString[0]);

                                if (objectOffset >= tableStartOffset && objectOffset <= source.GetPosition())
                                {
                                    // PDFBOX-3923: offset points inside this table - that can't be good
                                    throw new InvalidOperationException(
                                              $"Object offset {objectOffset} is within its own cross-reference table for object {currentObjectId}");
                                }

                                var generation = int.Parse(splitString[1]);
                                builder.Add(currentObjectId, generation, objectOffset);
                            }
                            catch (FormatException e)
                            {
                                throw new InvalidOperationException("Bad", e);
                            }
                        }
                        else if (!splitString[2].Equals(FreeEntry))
                        {
                            throw new InvalidOperationException(
                                      $"Corrupt cross-reference table entry for object {currentObjectId}. The indicator was not 'n' or 'f' but {splitString[2]}.");
                        }

                        currentObjectId++;

                        ReadHelper.SkipSpaces(source);
                    }

                    ReadHelper.SkipSpaces(source);
                    if (!ReadHelper.IsDigit(source))
                    {
                        break;
                    }
                }

                if (!TryParseTrailer(source, isLenientParsing, pool, out var trailer))
                {
                    throw new InvalidOperationException($"Something went wrong trying to read the XREF table at {offset}.");
                }

                builder.Dictionary = trailer;
                builder.Previous   = trailer.GetLongOrDefault(CosName.PREV);

                return(true);
            }
Beispiel #2
0
        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
        {
            token = null;

            if (currentByte != '/')
            {
                return(false);
            }

            var bytes = new List <byte>();

            bool escapeActive   = false;
            int  postEscapeRead = 0;
            var  escapedChars   = new char[2];

            while (inputBytes.MoveNext())
            {
                var b = inputBytes.CurrentByte;

                if (b == '#')
                {
                    escapeActive = true;
                }
                else if (escapeActive)
                {
                    if (ReadHelper.IsHex((char)b))
                    {
                        escapedChars[postEscapeRead] = (char)b;
                        postEscapeRead++;

                        if (postEscapeRead == 2)
                        {
                            var hex = new string(escapedChars);

                            var characterToWrite = (byte)Convert.ToInt32(hex, 16);
                            bytes.Add(characterToWrite);

                            escapeActive   = false;
                            postEscapeRead = 0;
                        }
                    }
                    else
                    {
                        bytes.Add((byte)'#');

                        if (postEscapeRead == 1)
                        {
                            bytes.Add((byte)escapedChars[0]);
                        }

                        if (ReadHelper.IsEndOfName(b))
                        {
                            break;
                        }

                        if (b == '#')
                        {
                            // Make it clear what's going on, we read something like #m#AE
                            // ReSharper disable once RedundantAssignment
                            escapeActive   = true;
                            postEscapeRead = 0;
                            continue;
                        }

                        bytes.Add(b);
                        escapeActive   = false;
                        postEscapeRead = 0;
                    }
                }
                else if (ReadHelper.IsEndOfName(b))
                {
                    break;
                }
                else
                {
                    bytes.Add(b);
                }
            }

            byte[] byteArray = bytes.ToArray();

            var str = ReadHelper.IsValidUtf8(byteArray)
                ? Encoding.UTF8.GetString(byteArray)
                : Encoding.GetEncoding("windows-1252").GetString(byteArray);

            token = NameToken.Create(str);

            return(true);
        }