PdfTokenizer C# (CSharp) 코드 예제들

예제 #1

0

파일 보기

        public virtual void InnerArraysInContentStreamTest()
        {
            String      inputFileName = sourceFolder + "innerArraysInContentStream.pdf";
            PdfDocument pdfDocument   = new PdfDocument(new PdfReader(inputFileName));

            byte[] docInBytes = pdfDocument.GetFirstPage().GetContentBytes();
            RandomAccessSourceFactory factory = new RandomAccessSourceFactory();
            PdfTokenizer      tokeniser       = new PdfTokenizer(new RandomAccessFileOrArray(factory.CreateSource(docInBytes)));
            PdfResources      resources       = pdfDocument.GetPage(1).GetResources();
            PdfCanvasParser   ps       = new PdfCanvasParser(tokeniser, resources);
            IList <PdfObject> actual   = ps.Parse(null);
            IList <PdfObject> expected = new List <PdfObject>();

            expected.Add(new PdfString("Cyan"));
            expected.Add(new PdfArray(new int[] { 1, 0, 0, 0 }));
            expected.Add(new PdfString("Magenta"));
            expected.Add(new PdfArray(new int[] { 0, 1, 0, 0 }));
            expected.Add(new PdfString("Yellow"));
            expected.Add(new PdfArray(new int[] { 0, 0, 1, 0 }));
            PdfArray cmpArray = new PdfArray(expected);

            NUnit.Framework.Assert.IsTrue(new CompareTool().CompareArrays(cmpArray, (((PdfDictionary)actual[1]).GetAsArray
                                                                                         (new PdfName("ColorantsDef")))));
        }

예제 #2

0

파일 보기

파일: ASCII85DecodeFilter.cs 프로젝트: wjzhwht/itext7-dotnet

        /// <summary>Decodes the input bytes according to ASCII85.</summary>
        /// <param name="in">the byte[] to be decoded</param>
        /// <returns>the decoded byte[]</returns>
        public static byte[] ASCII85Decode(byte[] @in)
        {
            MemoryStream @out  = new MemoryStream();
            int          state = 0;

            int[] chn = new int[5];
            for (int k = 0; k < @in.Length; ++k)
            {
                int ch = @in[k] & 0xff;
                if (ch == '~')
                {
                    break;
                }
                if (PdfTokenizer.IsWhitespace(ch))
                {
                    continue;
                }
                if (ch == 'z' && state == 0)
                {
                    @out.Write(0);
                    @out.Write(0);
                    @out.Write(0);
                    @out.Write(0);
                    continue;
                }
                if (ch < '!' || ch > 'u')
                {
                    throw new PdfException(PdfException.IllegalCharacterInAscii85decode);
                }
                chn[state] = ch - '!';
                ++state;
                if (state == 5)
                {
                    state = 0;
                    int r = 0;
                    for (int j = 0; j < 5; ++j)
                    {
                        r = r * 85 + chn[j];
                    }
                    @out.Write((byte)(r >> 24));
                    @out.Write((byte)(r >> 16));
                    @out.Write((byte)(r >> 8));
                    @out.Write((byte)r);
                }
            }
            if (state == 2)
            {
                int r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + 85 * 85 * 85 + 85 * 85 + 85;
                @out.Write((byte)(r >> 24));
            }
            else
            {
                if (state == 3)
                {
                    int r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + chn[2] * 85 * 85 + 85 * 85 + 85;
                    @out.Write((byte)(r >> 24));
                    @out.Write((byte)(r >> 16));
                }
                else
                {
                    if (state == 4)
                    {
                        int r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + chn[2] * 85 * 85 + chn[3] * 85 + 85;
                        @out.Write((byte)(r >> 24));
                        @out.Write((byte)(r >> 16));
                        @out.Write((byte)(r >> 8));
                    }
                }
            }
            return(@out.ToArray());
        }

예제 #3

0

파일 보기

            /// <exception cref="System.IO.IOException"/>
            protected override PdfDictionary ReadDictionary(bool objStm)
            {
                // The method copies the logic of PdfReader's method.
                // Only Contents related checks have been introduced.
                currentLevel++;
                PdfDictionary dic = new PdfDictionary();

                while (!rangeIsCorrect)
                {
                    tokens.NextValidToken();
                    if (tokens.GetTokenType() == PdfTokenizer.TokenType.EndDic)
                    {
                        currentLevel--;
                        break;
                    }
                    if (tokens.GetTokenType() != PdfTokenizer.TokenType.Name)
                    {
                        tokens.ThrowError(PdfException.DictionaryKey1IsNotAName, tokens.GetStringValue());
                    }
                    PdfName   name = ReadPdfName(true);
                    PdfObject obj;
                    if (PdfName.Contents.Equals(name) && searchInV && contentsLevel == currentLevel)
                    {
                        long startPosition = tokens.GetPosition();
                        int  ch;
                        int  whiteSpacesCount = -1;
                        do
                        {
                            ch = tokens.Read();
                            whiteSpacesCount++;
                        }while (ch != -1 && PdfTokenizer.IsWhitespace(ch));
                        tokens.Seek(startPosition);
                        obj = ReadObject(true, objStm);
                        long endPosition = tokens.GetPosition();
                        if (endPosition == contentsEnd && startPosition + whiteSpacesCount == contentsStart)
                        {
                            rangeIsCorrect = true;
                        }
                    }
                    else
                    {
                        if (PdfName.V.Equals(name) && !searchInV && 1 == currentLevel)
                        {
                            searchInV = true;
                            obj       = ReadObject(true, objStm);
                            searchInV = false;
                        }
                        else
                        {
                            obj = ReadObject(true, objStm);
                        }
                    }
                    if (obj == null)
                    {
                        if (tokens.GetTokenType() == PdfTokenizer.TokenType.EndDic)
                        {
                            tokens.ThrowError(PdfException.UnexpectedGtGt);
                        }
                        if (tokens.GetTokenType() == PdfTokenizer.TokenType.EndArray)
                        {
                            tokens.ThrowError(PdfException.UnexpectedCloseBracket);
                        }
                    }
                    dic.Put(name, obj);
                }
                return(dic);
            }

예제 #4

0

파일 보기

파일: InlineImageParsingUtils.cs 프로젝트: amitdumka/itext7-dotnet

        /// <summary>
        /// Parses the samples of the image from the underlying content parser, accounting for filters
        /// The parser must be positioned immediately after the ID operator that ends the inline image's dictionary.
        /// </summary>
        /// <remarks>
        /// Parses the samples of the image from the underlying content parser, accounting for filters
        /// The parser must be positioned immediately after the ID operator that ends the inline image's dictionary.
        /// The parser will be left positioned immediately following the EI operator.
        /// <b>Note:</b>This implementation does not actually apply the filters at this time
        /// </remarks>
        /// <param name="imageDictionary">the dictionary of the inline image</param>
        /// <param name="ps">the content parser</param>
        /// <returns>the samples of the image</returns>
        /// <exception cref="System.IO.IOException">if anything bad happens during parsing</exception>
        private static byte[] ParseSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfCanvasParser
                                           ps)
        {
            // by the time we get to here, we have already parsed the ID operator
            if (!imageDictionary.ContainsKey(PdfName.Filter) && ImageColorSpaceIsKnown(imageDictionary, colorSpaceDic)
                )
            {
                return(ParseUnfilteredSamples(imageDictionary, colorSpaceDic, ps));
            }
            // read all content until we reach an EI operator surrounded by whitespace.
            // The following algorithm has two potential issues: what if the image stream
            // contains <ws>EI<ws> ?
            // Plus, there are some streams that don't have the <ws> before the EI operator
            // it sounds like we would have to actually decode the content stream, which
            // I'd rather avoid right now.
            MemoryStream baos        = new MemoryStream();
            MemoryStream accumulated = new MemoryStream();
            int          ch;
            int          found     = 0;
            PdfTokenizer tokeniser = ps.GetTokeniser();

            while ((ch = tokeniser.Read()) != -1)
            {
                if (found == 0 && PdfTokenizer.IsWhitespace(ch))
                {
                    found++;
                    accumulated.Write(ch);
                }
                else
                {
                    if (found == 1 && ch == 'E')
                    {
                        found++;
                        accumulated.Write(ch);
                    }
                    else
                    {
                        if (found == 1 && PdfTokenizer.IsWhitespace(ch))
                        {
                            // this clause is needed if we have a white space character that is part of the image data
                            // followed by a whitespace character that precedes the EI operator.  In this case, we need
                            // to flush the first whitespace, then treat the current whitespace as the first potential
                            // character for the end of stream check.  Note that we don't increment 'found' here.
                            baos.Write(accumulated.ToArray());
                            accumulated.JReset();
                            accumulated.Write(ch);
                        }
                        else
                        {
                            if (found == 2 && ch == 'I')
                            {
                                found++;
                                accumulated.Write(ch);
                            }
                            else
                            {
                                if (found == 3 && PdfTokenizer.IsWhitespace(ch))
                                {
                                    byte[] tmp = baos.ToArray();
                                    if (InlineImageStreamBytesAreComplete(tmp, imageDictionary))
                                    {
                                        return(tmp);
                                    }
                                    baos.Write(accumulated.ToArray());
                                    accumulated.JReset();
                                    baos.Write(ch);
                                    found = 0;
                                }
                                else
                                {
                                    baos.Write(accumulated.ToArray());
                                    accumulated.JReset();
                                    baos.Write(ch);
                                    found = 0;
                                }
                            }
                        }
                    }
                }
            }
            throw new InlineImageParsingUtils.InlineImageParseException(PdfException.CannotFindImageDataOrEI);
        }

예제 #5

0

파일 보기

        private static void ParseCid(String cmapName, AbstractCMap cmap, ICMapLocation location, int level)
        {
            if (level >= MAX_LEVEL)
            {
                return;
            }
            PdfTokenizer inp = location.GetLocation(cmapName);

            try {
                IList <CMapObject> list = new List <CMapObject>();
                CMapContentParser  cp   = new CMapContentParser(inp);
                int maxExc = 50;
                while (true)
                {
                    try {
                        cp.Parse(list);
                    }
                    catch (Exception) {
                        if (--maxExc < 0)
                        {
                            break;
                        }
                        continue;
                    }
                    if (list.Count == 0)
                    {
                        break;
                    }
                    String last = list[list.Count - 1].ToString();
                    if (level == 0 && list.Count == 3 && last.Equals(def))
                    {
                        CMapObject cmapObject = list[0];
                        if (Registry.Equals(cmapObject.ToString()))
                        {
                            cmap.SetRegistry(list[1].ToString());
                        }
                        else
                        {
                            if (Ordering.Equals(cmapObject.ToString()))
                            {
                                cmap.SetOrdering(list[1].ToString());
                            }
                            else
                            {
                                if (CMapName.Equals(cmapObject.ToString()))
                                {
                                    cmap.SetName(list[1].ToString());
                                }
                                else
                                {
                                    if (Supplement.Equals(cmapObject.ToString()))
                                    {
                                        try {
                                            cmap.SetSupplement((int)list[1].GetValue());
                                        }
                                        catch (Exception) {
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        if ((last.Equals(endcidchar) || last.Equals(endbfchar)) && list.Count >= 3)
                        {
                            int lMax = list.Count - 2;
                            for (int k = 0; k < lMax; k += 2)
                            {
                                if (list[k].IsString())
                                {
                                    cmap.AddChar(list[k].ToString(), list[k + 1]);
                                }
                            }
                        }
                        else
                        {
                            if ((last.Equals(endcidrange) || last.Equals(endbfrange)) && list.Count >= 4)
                            {
                                int lMax = list.Count - 3;
                                for (int k = 0; k < lMax; k += 3)
                                {
                                    if (list[k].IsString() && list[k + 1].IsString())
                                    {
                                        cmap.AddRange(list[k].ToString(), list[k + 1].ToString(), list[k + 2]);
                                    }
                                }
                            }
                            else
                            {
                                if (last.Equals(usecmap) && list.Count == 2 && list[0].IsName())
                                {
                                    ParseCid(list[0].ToString(), cmap, location, level + 1);
                                }
                                else
                                {
                                    if (last.Equals(endcodespacerange))
                                    {
                                        for (int i = 0; i < list.Count + 1; i += 2)
                                        {
                                            if (list[i].IsHexString() && list[i + 1].IsHexString())
                                            {
                                                byte[] low  = list[i].ToHexByteArray();
                                                byte[] high = list[i + 1].ToHexByteArray();
                                                cmap.AddCodeSpaceRange(low, high);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception) {
                ILog logger = LogManager.GetLogger(typeof(CMapParser));
                logger.Error(iText.IO.LogMessageConstant.UNKNOWN_ERROR_WHILE_PROCESSING_CMAP);
            }
            finally {
                inp.Close();
            }
        }

예제 #6

0

파일 보기

 /// <summary>Creates a new instance of PdfContentParser</summary>
 /// <param name="tokeniser">the tokeniser with the content</param>
 public CMapContentParser(PdfTokenizer tokeniser)
 {
     this.tokeniser = tokeniser;
 }

예제 #7

0

파일 보기

        /// <summary>Reads a pdf object.</summary>
        /// <returns>the pdf object</returns>
        /// <exception cref="System.IO.IOException">on error</exception>
        public virtual CMapObject ReadObject()
        {
            if (!NextValidToken())
            {
                return(null);
            }
            PdfTokenizer.TokenType type = tokeniser.GetTokenType();
            switch (type)
            {
            case PdfTokenizer.TokenType.StartDic: {
                return(ReadDictionary());
            }

            case PdfTokenizer.TokenType.StartArray: {
                return(ReadArray());
            }

            case PdfTokenizer.TokenType.String: {
                CMapObject obj;
                if (tokeniser.IsHexString())
                {
                    obj = new CMapObject(CMapObject.HEX_STRING, PdfTokenizer.DecodeStringContent(tokeniser.GetByteContent(), true
                                                                                                 ));
                }
                else
                {
                    obj = new CMapObject(CMapObject.STRING, PdfTokenizer.DecodeStringContent(tokeniser.GetByteContent(), false
                                                                                             ));
                }
                return(obj);
            }

            case PdfTokenizer.TokenType.Name: {
                return(new CMapObject(CMapObject.NAME, DecodeName(tokeniser.GetByteContent())));
            }

            case PdfTokenizer.TokenType.Number: {
                CMapObject numObject = new CMapObject(CMapObject.NUMBER, null);
                try {
                    numObject.SetValue((int)Double.Parse(tokeniser.GetStringValue(), System.Globalization.CultureInfo.InvariantCulture
                                                         ));
                }
                catch (FormatException) {
                    numObject.SetValue(int.MinValue);
                }
                return(numObject);
            }

            case PdfTokenizer.TokenType.Other: {
                return(new CMapObject(CMapObject.LITERAL, tokeniser.GetStringValue()));
            }

            case PdfTokenizer.TokenType.EndArray: {
                return(new CMapObject(CMapObject.TOKEN, "]"));
            }

            case PdfTokenizer.TokenType.EndDic: {
                return(new CMapObject(CMapObject.TOKEN, ">>"));
            }

            default: {
                return(new CMapObject(0, ""));
            }
            }
        }

예제 #8

0

파일 보기

        public virtual void PrimitivesTest()
        {
            String data = "<</Size 70." + "/Value#20 .1" + "/Root 46 0 R" + "/Info 44 0 R" + "/ID[<736f6d652068657820737472696e672>(some simple string )<8C2547D58D4BD2C6F3D32B830BE3259D2>-70.1--0.2]"
                          + "/Name1 --15" + "/Prev ---116.23 >>";
            RandomAccessSourceFactory factory = new RandomAccessSourceFactory();
            PdfTokenizer tok = new PdfTokenizer(new RandomAccessFileOrArray(factory.CreateSource(data.GetBytes(iText.IO.Util.EncodingUtil.ISO_8859_1
                                                                                                               ))));

            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.StartDic);
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Name);
            PdfName name = new PdfName(tok.GetByteContent());

            NUnit.Framework.Assert.AreEqual("Size", name.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Number);
            PdfNumber num = new PdfNumber(tok.GetByteContent());

            NUnit.Framework.Assert.AreEqual("70.", num.ToString());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Name);
            name = new PdfName(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("Value ", name.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Number);
            num = new PdfNumber(tok.GetByteContent());
            NUnit.Framework.Assert.AreNotSame("0.1", num.ToString());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Name);
            name = new PdfName(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("Root", name.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Ref);
            PdfIndirectReference @ref = new PdfIndirectReference(null, tok.GetObjNr(), tok.GetGenNr());

            NUnit.Framework.Assert.AreEqual("46 0 R", @ref.ToString());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Name);
            name = new PdfName(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("Info", name.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Ref);
            @ref = new PdfIndirectReference(null, tok.GetObjNr(), tok.GetGenNr());
            NUnit.Framework.Assert.AreEqual("44 0 R", @ref.ToString());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Name);
            name = new PdfName(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("ID", name.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.StartArray);
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.String);
            NUnit.Framework.Assert.AreEqual(tok.IsHexString(), true);
            PdfString str = new PdfString(tok.GetByteContent(), tok.IsHexString());

            NUnit.Framework.Assert.AreEqual("some hex string ", str.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.String);
            NUnit.Framework.Assert.AreEqual(tok.IsHexString(), false);
            str = new PdfString(tok.GetByteContent(), tok.IsHexString());
            NUnit.Framework.Assert.AreEqual("some simple string ", str.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.String);
            NUnit.Framework.Assert.AreEqual(tok.IsHexString(), true);
            str = new PdfString(tok.GetByteContent(), tok.IsHexString());
            NUnit.Framework.Assert.AreEqual("\u008C%G\u00D5\u008DK\u00D2\u00C6\u00F3\u00D3+\u0083\u000B\u00E3%\u009D "
                                            , str.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Number);
            num = new PdfNumber(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("-70.1", num.ToString());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Number);
            num = new PdfNumber(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("-0.2", num.ToString());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.EndArray);
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Name);
            name = new PdfName(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("Name1", name.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Number);
            num = new PdfNumber(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("0", num.ToString());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Name);
            name = new PdfName(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("Prev", name.GetValue());
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(tok.GetTokenType(), PdfTokenizer.TokenType.Number);
            num = new PdfNumber(tok.GetByteContent());
            NUnit.Framework.Assert.AreEqual("-116.23", num.ToString());
        }

예제 #9

0

파일 보기

파일: PdfCanvasParser.cs 프로젝트: amitdumka/itext7-dotnet

 /// <summary>Creates a new instance of PdfContentParser</summary>
 /// <param name="tokeniser">the tokeniser with the content</param>
 /// <param name="currentResources">
 /// current resources of the content stream.
 /// It is optional parameter, which is used for performance improvements of specific cases of
 /// inline images parsing.
 /// </param>
 public PdfCanvasParser(PdfTokenizer tokeniser, PdfResources currentResources)
 {
     this.tokeniser        = tokeniser;
     this.currentResources = currentResources;
 }

예제 #10

0

파일 보기

파일: PdfCanvasParser.cs 프로젝트: amitdumka/itext7-dotnet

 /// <summary>Creates a new instance of PdfContentParser</summary>
 /// <param name="tokeniser">the tokeniser with the content</param>
 public PdfCanvasParser(PdfTokenizer tokeniser)
 {
     this.tokeniser = tokeniser;
 }

예제 #11

0

파일 보기

파일: PdfCanvasParser.cs 프로젝트: amitdumka/itext7-dotnet

 /// <summary>Sets the tokeniser.</summary>
 /// <param name="tokeniser">the tokeniser</param>
 public virtual void SetTokeniser(PdfTokenizer tokeniser)
 {
     this.tokeniser = tokeniser;
 }

예제 #12

0

파일 보기

 protected internal virtual void GenerateValue()
 {
     System.Diagnostics.Debug.Assert(content != null, "No byte[] content to generate value");
     value = PdfEncodings.ConvertToString(PdfTokenizer.DecodeStringContent(content, hexWriting), null);
 }

C# (CSharp) PdfTokenizer 예제들