Пример #1
0
        private int ReadXrefTable(byte[] pdf, string[] parts, int xrefStartPointer, Dictionary <int, XrefItem> Xref)
        {
            // el array de partes es mas grande, cada elemento es una posición
            // 0 6  <- position apunta al 0
            // 0000000003 65535 f
            // 0000000017 00000 n
            // 0000000081 00000 n
            // 0000000000 00007 f
            // 0000000331 00000 n
            // 0000000409 00000 n
            int count = 0;

            int kk;

            if (!Int32.TryParse(parts[xrefStartPointer], NumberStyles.None, CultureInfo.InvariantCulture, out kk))
            {
                return(parts.Length);
            }

            for (int xrefIndex = Convert.ToInt32(parts[xrefStartPointer]);
                 xrefIndex < Convert.ToInt32(parts[xrefStartPointer + 1]);
                 xrefIndex++)
            {
                uint offset = Convert.ToUInt32(parts[xrefStartPointer + 2 + count * 3]);
                bool IsUsed = parts[xrefStartPointer + 4 + count * 3] == "n" ? true : false;

                if (IsUsed)
                {
                    XrefItem item = new XrefItem()
                    {
                        id  = GetReferenceNumberForward(pdf, offset),
                        pos = offset,
                        // iteration is ignored, the new pdf will have 0 iteration
                        IsUsed = parts[xrefStartPointer + 4 + count * 3] == "n" ? true : false
                    };

                    Xref.Add(item.id, item);
                }
                count++;
            }

            return(xrefStartPointer + 2 + count * 3);
        }
Пример #2
0
        private void ReadXRefStream(byte[] pdf, Dictionary <int, XrefItem> Xref, uint XrefPos)
        {
            // cross-reference stream

            // 12 0 obj        % Cross-reference stream
            // << /Type /XRef  % Cross-reference stream dictionary
            //     /Size ...
            //     /Root ...
            //     >>
            // stream
            // ...
            // Stream data containing cross-reference information ...
            // endstream
            // endobj
            bool continueWorking = true;
            uint currentIndex    = XrefPos;
            int  predictor       = 1;

            while (continueWorking)
            {
                if (currentIndex >= pdf.Length - 5)
                {
                    throw new Exception("xref stream table ends abruptly");
                }

                if (pdf[currentIndex] == 's' &&
                    pdf[currentIndex + 1] == 't' &&
                    pdf[currentIndex + 2] == 'r' &&
                    pdf[currentIndex + 3] == 'e' &&
                    pdf[currentIndex + 4] == 'a' &&
                    pdf[currentIndex + 5] == 'm')
                {
                    currentIndex += 6;
                    uint startStream = currentIndex;

                    if (pdf[startStream] == '\r' && pdf[startStream + 1] == '\n')
                    {
                        startStream += 2;
                    }
                    else if (pdf[startStream] == '\n')
                    {
                        startStream++;
                    }

                    string   dictionary      = GetString(pdf, XrefPos, startStream);
                    string[] partsXrefStream = dictionary.Split(separator, StringSplitOptions.RemoveEmptyEntries);
                    int      i            = 0;
                    uint?    streamLength = null;
                    int      w1           = 1;
                    int      w2           = 1;
                    int      w3           = 1;
                    int      size         = 1;
                    uint     prev;
                    string   filter;

                    List <int> indexArray = new List <int>();

                    while (i < partsXrefStream.Length)
                    {
                        if (partsXrefStream[i] == "Length")
                        {
                            streamLength = Convert.ToUInt32(partsXrefStream[i + 1]);
                        }

                        if (partsXrefStream[i] == "W")
                        {
                            w1 = Convert.ToInt16(partsXrefStream[i + 1]);
                            w2 = Convert.ToInt16(partsXrefStream[i + 2]);
                            w3 = Convert.ToInt16(partsXrefStream[i + 3]);
                        }

                        if (partsXrefStream[i] == "Index")
                        {
                            int j = i + 1;
                            int num;
                            while (Int32.TryParse(partsXrefStream[j], out num))
                            {
                                indexArray.Add(num);
                                j++;
                            }
                        }

                        if (partsXrefStream[i] == "Size")
                        {
                            size = Convert.ToInt16(partsXrefStream[i + 1]);
                        }

                        if (partsXrefStream[i] == "Predictor")
                        {
                            predictor = Convert.ToInt16(partsXrefStream[i + 1]);
                        }

                        if (partsXrefStream[i] == "Prev")
                        {
                            prev = Convert.ToUInt32(partsXrefStream[i + 1]);
                            ReadXRefStream(pdf, Xref, prev);
                        }

                        if (partsXrefStream[i] == "Filter")
                        {
                            filter = partsXrefStream[i + 1];
                            if (filter != "FlateDecode")
                            {
                                throw new Exception("only flatedecode filter implemented, but " + filter + " found");
                            }
                        }

                        i++;
                    }

                    if (indexArray.Count == 0)
                    {
                        indexArray.Add(0);
                        indexArray.Add(size);
                    }

                    if (streamLength == null)
                    {
                        throw new Exception("stream without length definition");
                    }

                    uint endStream = startStream + streamLength.Value;
                    currentIndex = endStream + 9;   // 9 = endstream text length

                    byte[] stream   = pdf.Slice(startStream, endStream);
                    byte[] deflated = Deflate(stream, predictor, w1 + w2 + w3);

                    int deflatedIndex = 0;
                    int elementIndex  = 0;
                    List <StmObjSubItem> lstObjStm = new List <StmObjSubItem>();
                    for (int indexArrayIndex = 0; indexArrayIndex < indexArray.Count; indexArrayIndex += 2)
                    {
                        i = indexArray[indexArrayIndex];
                        for (int j = 0; j < indexArray[indexArrayIndex + 1]; j++)
                        {
                            uint type   = GetUInt(deflated, w1, ref deflatedIndex, 1);
                            uint value2 = GetUInt(deflated, w2, ref deflatedIndex, 0);
                            uint value3 = GetUInt(deflated, w3, ref deflatedIndex, 0);

                            // type 0 => free item, they are ignored
                            // type 1 object is in position value2, with generation value3
                            if (type == 1)
                            {
                                XrefItem item = new XrefItem()
                                {
                                    id     = GetReferenceNumberForward(pdf, value2),
                                    pos    = value2,
                                    IsUsed = true
                                };

                                Xref.Add(item.id, item);
                            }

                            // type 2 object is in position value2, with generation value3
                            else if (type == 2)
                            {
                                StmObjSubItem item = new StmObjSubItem()
                                {
                                    StmObjId = (int)value2,
                                    Position = (int)value3
                                };

                                lstObjStm.Add(item);
                            }

                            elementIndex++;
                        }
                    }

                    // mark in stmobj whose are used
                    foreach (StmObjSubItem item in lstObjStm)
                    {
                        Xref[item.StmObjId].stmobjUsed.Add(item.Position);
                    }

                    break;
                }

                if (pdf[currentIndex] == 'e' &&
                    pdf[currentIndex + 1] == 'n' &&
                    pdf[currentIndex + 2] == 'd' &&
                    pdf[currentIndex + 3] == 'o' &&
                    pdf[currentIndex + 4] == 'b' &&
                    pdf[currentIndex + 5] == 'j')
                {
                    throw new Exception("xref stream without stream");
                }
                else
                {
                    currentIndex++;
                }
            }
        }
Пример #3
0
        private void ReadContent(byte[] pdf, XrefItem item, List <XrefItem> newItems, Dictionary <int, XrefItem> dctXref1)
        {
            // |--TEXT--||- optional binary-||--TEXT-..-|
            // OBJ.......STREAM......ENDSTREM.TEXT.ENDOBJ
            uint startIndex      = item.pos;
            uint startTextIndex  = startIndex;
            uint currentIndex    = startIndex;
            bool continueWorking = true;

            // already read
            if (item.text.Count > 0)
            {
                return;
            }

            while (continueWorking)
            {
                if (currentIndex + 5 < pdf.Length &&
                    pdf[currentIndex] == 's' &&
                    pdf[currentIndex + 1] == 't' &&
                    pdf[currentIndex + 2] == 'r' &&
                    pdf[currentIndex + 3] == 'e' &&
                    pdf[currentIndex + 4] == 'a' &&
                    pdf[currentIndex + 5] == 'm')
                {
                    currentIndex += 6;

                    item.text.Add(GetString(pdf, startTextIndex, currentIndex));

                    uint startStream = currentIndex;

                    if (pdf[startStream] == '\r' && pdf[startStream + 1] == '\n')
                    {
                        startStream += 2;
                    }
                    else if (pdf[startStream] == '\n')
                    {
                        startStream++;
                    }

                    string   dictionary   = GetString(pdf, startIndex, startStream);
                    string[] parts        = dictionary.Split(separator, StringSplitOptions.RemoveEmptyEntries);
                    int      i            = 0;
                    uint?    streamLength = null;
                    while (i < parts.Length)
                    {
                        if (parts[i].ToLower() == "length")
                        {
                            // easy: /Length 1234 => 1234 bytes
                            // hard: /Length 3 0 R => Look in object 3, to obtain the 1234
                            if (i + 3 < parts.Length && parts[i + 2] == "0" && parts[i + 3] == "R")
                            {
                                int lengthPointer = Convert.ToInt32(parts[i + 1]);

                                ReadContent(pdf, dctXref1[lengthPointer], newItems, dctXref1);

                                if (dctXref1[lengthPointer].text.Count != 1)
                                {
                                    throw new Exception("Length points to an object that is not a number");
                                }

                                string[] partsLength = dctXref1[lengthPointer].text[0].Split(separator, StringSplitOptions.RemoveEmptyEntries);

                                // 3 0 Obj 1234 endobj
                                streamLength = Convert.ToUInt32(partsLength[3]);
                            }
                            else
                            {
                                streamLength = Convert.ToUInt32(parts[i + 1]);
                            }
                            break;
                        }

                        i++;
                    }

                    if (streamLength == null)
                    {
                        throw new Exception("stream without length definition");
                    }

                    uint endStream = startStream + (streamLength ?? 0) + 1;
                    startTextIndex     = endStream;
                    currentIndex       = endStream + 9; // 9 = endstream text length
                    item.streamContent = pdf.Slice(startStream, endStream);
                }

                if (currentIndex + 5 < pdf.Length &&
                    pdf[currentIndex] == 'e' &&
                    pdf[currentIndex + 1] == 'n' &&
                    pdf[currentIndex + 2] == 'd' &&
                    pdf[currentIndex + 3] == 'o' &&
                    pdf[currentIndex + 4] == 'b' &&
                    pdf[currentIndex + 5] == 'j')
                {
                    currentIndex   += 6;
                    continueWorking = false;
                }
                else
                {
                    currentIndex++;
                }

                if (currentIndex >= pdf.Length)
                {
                    continueWorking = false;
                }
            }

            item.text.Add(GetString(pdf, startTextIndex, currentIndex));


            // ObjStm
            // 10 0 obj
            // <</Filter/FlateDecode/First 94/Length 773/N 13/Type/ObjStm>>stream
            // 11 0 12 547 13 665    <- num obj, offset, num obj, offset...
            // << obj1 >>
            // << obj2 >>
            // ...
            // endstream endobj
            if (item.text[0].Contains("/ObjStm"))
            {
                item.IsUsed = false;

                string[] partsXrefStream = item.text[0].Split(separator, StringSplitOptions.RemoveEmptyEntries);
                int      i                  = 0;
                uint?    streamLength       = null;
                uint     FirstElementOffset = 0;
                uint     NumberOfElements   = 0;
                string   filter;

                while (i < partsXrefStream.Length)
                {
                    if (partsXrefStream[i] == "Length")
                    {
                        streamLength = Convert.ToUInt32(partsXrefStream[i + 1]);
                    }

                    if (partsXrefStream[i] == "First")
                    {
                        FirstElementOffset = Convert.ToUInt32(partsXrefStream[i + 1]);
                    }

                    if (partsXrefStream[i] == "N")
                    {
                        NumberOfElements = Convert.ToUInt32(partsXrefStream[i + 1]);
                    }

                    if (partsXrefStream[i] == "Filter")
                    {
                        filter = partsXrefStream[i + 1];
                        if (filter != "FlateDecode")
                        {
                            throw new Exception("only flatedecode filter implemented, but " + filter + " found");
                        }
                    }

                    i++;
                }


                byte[]   deflated      = Deflate(item.streamContent, 1, 0);
                string   preamble      = GetString(deflated, 0, FirstElementOffset);
                string[] preambleParts = preamble.Split(separator, StringSplitOptions.RemoveEmptyEntries);

                foreach (int subitemUsed in item.stmobjUsed)
                {
                    uint offset = Convert.ToUInt32(preambleParts[subitemUsed * 2 + 1]) + FirstElementOffset;
                    uint offsetContinuous;
                    int  objectIndex = Convert.ToInt32(preambleParts[subitemUsed * 2]);
                    if (subitemUsed < NumberOfElements - 1)
                    {
                        offsetContinuous = Convert.ToUInt32(preambleParts[(subitemUsed + 1) * 2 + 1]) + FirstElementOffset;
                    }
                    else
                    {
                        offsetContinuous = (uint)deflated.Length;
                    }

                    XrefItem newItem = new XrefItem()
                    {
                        id     = objectIndex,
                        pos    = 0,
                        IsUsed = true
                    };

                    newItems.Add(newItem);

                    ReadContent(deflated.Slice(offset, offsetContinuous), newItem, newItems, dctXref1);

                    newItem.text[0] = objectIndex.ToString() + " 0 obj\n" + newItem.text[0] + "\nendobj";
                }
            }
        }