Exemplo n.º 1
0
        public virtual void SeekTest()
        {
            String data = "/Name1 70";

            PdfTokenizer.TokenType[] expectedTypes = new PdfTokenizer.TokenType[] { PdfTokenizer.TokenType.Name, PdfTokenizer.TokenType
                                                                                    .Number, PdfTokenizer.TokenType.EndOfFile };
            RandomAccessSourceFactory factory = new RandomAccessSourceFactory();
            PdfTokenizer tok = new PdfTokenizer(new RandomAccessFileOrArray(factory.CreateSource(data.GetBytes(iText.IO.Util.EncodingUtil.ISO_8859_1
                                                                                                               ))));

            tok.Seek(0);
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(expectedTypes[0], tok.GetTokenType());
            NUnit.Framework.Assert.AreEqual("Name1", tok.GetStringValue());
            tok.Seek(7);
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(expectedTypes[1], tok.GetTokenType());
            NUnit.Framework.Assert.AreEqual("70", tok.GetStringValue());
            tok.Seek(8);
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(expectedTypes[1], tok.GetTokenType());
            NUnit.Framework.Assert.AreEqual("0", tok.GetStringValue());
            tok.Seek(9);
            tok.NextValidToken();
            NUnit.Framework.Assert.AreEqual(expectedTypes[2], tok.GetTokenType());
        }
Exemplo n.º 2
0
        /// <summary>Reads a pdf object.</summary>
        /// <returns>the pdf object</returns>
        /// <exception cref="System.IO.IOException">on error</exception>
        public virtual PdfObject ReadObject()
        {
            if (!NextValidToken())
            {
                return(null);
            }
            PdfTokenizer.TokenType type = tokeniser.GetTokenType();
            switch (type)
            {
            case PdfTokenizer.TokenType.StartDic: {
                PdfDictionary dic = ReadDictionary();
                return(dic);
            }

            case PdfTokenizer.TokenType.StartArray: {
                return(ReadArray());
            }

            case PdfTokenizer.TokenType.String: {
                PdfString str = new PdfString(tokeniser.GetDecodedStringContent()).SetHexWriting(tokeniser.IsHexString());
                return(str);
            }

            case PdfTokenizer.TokenType.Name: {
                return(new PdfName(tokeniser.GetByteContent()));
            }

            case PdfTokenizer.TokenType.Number: {
                //use PdfNumber(byte[]) here, as in this case number parsing won't happen until it's needed.
                return(new PdfNumber(tokeniser.GetByteContent()));
            }

            default: {
                return(new PdfLiteral(tokeniser.GetByteContent()));
            }
            }
        }
Exemplo n.º 3
0
        // if we hit here, the file is either corrupt (stream ended unexpectedly),
        // or the last token ended exactly at the end of a stream.  This last
        // case can occur inside an Object Stream.
        /// <exception cref="System.IO.IOException"/>
        public virtual bool NextToken()
        {
            int ch;

            outBuf.Reset();
            do
            {
                ch = file.Read();
            }while (ch != -1 && IsWhitespace(ch));
            if (ch == -1)
            {
                type = PdfTokenizer.TokenType.EndOfFile;
                return(false);
            }
            switch (ch)
            {
            case '[': {
                type = PdfTokenizer.TokenType.StartArray;
                break;
            }

            case ']': {
                type = PdfTokenizer.TokenType.EndArray;
                break;
            }

            case '/': {
                type = PdfTokenizer.TokenType.Name;
                while (true)
                {
                    ch = file.Read();
                    if (delims[ch + 1])
                    {
                        break;
                    }
                    outBuf.Append(ch);
                }
                BackOnePosition(ch);
                break;
            }

            case '>': {
                ch = file.Read();
                if (ch != '>')
                {
                    ThrowError(iText.IO.IOException.GtNotExpected);
                }
                type = PdfTokenizer.TokenType.EndDic;
                break;
            }

            case '<': {
                int v1 = file.Read();
                if (v1 == '<')
                {
                    type = PdfTokenizer.TokenType.StartDic;
                    break;
                }
                type      = PdfTokenizer.TokenType.String;
                hexString = true;
                int v2 = 0;
                while (true)
                {
                    while (IsWhitespace(v1))
                    {
                        v1 = file.Read();
                    }
                    if (v1 == '>')
                    {
                        break;
                    }
                    outBuf.Append(v1);
                    v1 = ByteBuffer.GetHex(v1);
                    if (v1 < 0)
                    {
                        break;
                    }
                    v2 = file.Read();
                    while (IsWhitespace(v2))
                    {
                        v2 = file.Read();
                    }
                    if (v2 == '>')
                    {
                        break;
                    }
                    outBuf.Append(v2);
                    v2 = ByteBuffer.GetHex(v2);
                    if (v2 < 0)
                    {
                        break;
                    }
                    v1 = file.Read();
                }
                if (v1 < 0 || v2 < 0)
                {
                    ThrowError(iText.IO.IOException.ErrorReadingString);
                }
                break;
            }

            case '%': {
                type = PdfTokenizer.TokenType.Comment;
                do
                {
                    ch = file.Read();
                }while (ch != -1 && ch != '\r' && ch != '\n');
                break;
            }

            case '(': {
                type      = PdfTokenizer.TokenType.String;
                hexString = false;
                int nesting = 0;
                while (true)
                {
                    ch = file.Read();
                    if (ch == -1)
                    {
                        break;
                    }
                    if (ch == '(')
                    {
                        ++nesting;
                    }
                    else
                    {
                        if (ch == ')')
                        {
                            --nesting;
                            if (nesting == -1)
                            {
                                break;
                            }
                        }
                        else
                        {
                            if (ch == '\\')
                            {
                                outBuf.Append('\\');
                                ch = file.Read();
                                if (ch < 0)
                                {
                                    break;
                                }
                            }
                        }
                    }
                    outBuf.Append(ch);
                }
                if (ch == -1)
                {
                    ThrowError(iText.IO.IOException.ErrorReadingString);
                }
                break;
            }

            default: {
                if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9'))
                {
                    type = PdfTokenizer.TokenType.Number;
                    bool isReal          = false;
                    int  numberOfMinuses = 0;
                    if (ch == '-')
                    {
                        do
                        {
                            // Take care of number like "--234". If Acrobat can read them so must we.
                            ++numberOfMinuses;
                            ch = file.Read();
                        }while (ch == '-');
                        outBuf.Append('-');
                    }
                    else
                    {
                        outBuf.Append(ch);
                        // We don't need to check if the number is real over here
                        // as we need to know that fact only in case if there are any minuses.
                        ch = file.Read();
                    }
                    while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'))
                    {
                        if (ch == '.')
                        {
                            isReal = true;
                        }
                        outBuf.Append(ch);
                        ch = file.Read();
                    }
                    if (numberOfMinuses > 1 && !isReal)
                    {
                        // Numbers of integer type and with more than one minus before them
                        // are interpreted by Acrobat as zero.
                        outBuf.Reset();
                        outBuf.Append('0');
                    }
                }
                else
                {
                    type = PdfTokenizer.TokenType.Other;
                    do
                    {
                        outBuf.Append(ch);
                        ch = file.Read();
                    }while (!delims[ch + 1]);
                }
                if (ch != -1)
                {
                    BackOnePosition(ch);
                }
                break;
            }
            }
            return(true);
        }
Exemplo n.º 4
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void NextValidToken()
        {
            int level = 0;

            byte[] n1  = null;
            byte[] n2  = null;
            long   ptr = 0;

            while (NextToken())
            {
                if (type == PdfTokenizer.TokenType.Comment)
                {
                    continue;
                }
                switch (level)
                {
                case 0: {
                    if (type != PdfTokenizer.TokenType.Number)
                    {
                        return;
                    }
                    ptr = file.GetPosition();
                    n1  = GetByteContent();
                    ++level;
                    break;
                }

                case 1: {
                    if (type != PdfTokenizer.TokenType.Number)
                    {
                        file.Seek(ptr);
                        type = PdfTokenizer.TokenType.Number;
                        outBuf.Reset().Append(n1);
                        return;
                    }
                    n2 = GetByteContent();
                    ++level;
                    break;
                }

                case 2: {
                    if (type == PdfTokenizer.TokenType.Other)
                    {
                        if (TokenValueEqualsTo(R))
                        {
                            System.Diagnostics.Debug.Assert(n2 != null);
                            type       = PdfTokenizer.TokenType.Ref;
                            reference  = Convert.ToInt32(iText.IO.Util.JavaUtil.GetStringForBytes(n1));
                            generation = Convert.ToInt32(iText.IO.Util.JavaUtil.GetStringForBytes(n2));
                            return;
                        }
                        else
                        {
                            if (TokenValueEqualsTo(Obj))
                            {
                                System.Diagnostics.Debug.Assert(n2 != null);
                                type       = PdfTokenizer.TokenType.Obj;
                                reference  = Convert.ToInt32(iText.IO.Util.JavaUtil.GetStringForBytes(n1));
                                generation = Convert.ToInt32(iText.IO.Util.JavaUtil.GetStringForBytes(n2));
                                return;
                            }
                        }
                    }
                    file.Seek(ptr);
                    type = PdfTokenizer.TokenType.Number;
                    outBuf.Reset().Append(n1);
                    return;
                }
                }
            }
            if (level == 1)
            {
                // if the level 1 check returns EOF, then we are still looking at a number - set the type back to Number
                type = PdfTokenizer.TokenType.Number;
                outBuf.Reset().Append(n1);
            }
        }
Exemplo n.º 5
0
        /// <summary>Reads a pdf object.</summary>
        /// <returns>the pdf object</returns>
        /// <exception cref="System.IO.IOException">on error</exception>
        public virtual CMapObject ReadObject()
        {
            if (!NextValidToken())
            {
                return(null);
            }
            PdfTokenizer.TokenType type = tokeniser.GetTokenType();
            switch (type)
            {
            case PdfTokenizer.TokenType.StartDic: {
                return(ReadDictionary());
            }

            case PdfTokenizer.TokenType.StartArray: {
                return(ReadArray());
            }

            case PdfTokenizer.TokenType.String: {
                CMapObject obj;
                if (tokeniser.IsHexString())
                {
                    obj = new CMapObject(CMapObject.HEX_STRING, PdfTokenizer.DecodeStringContent(tokeniser.GetByteContent(), true
                                                                                                 ));
                }
                else
                {
                    obj = new CMapObject(CMapObject.STRING, PdfTokenizer.DecodeStringContent(tokeniser.GetByteContent(), false
                                                                                             ));
                }
                return(obj);
            }

            case PdfTokenizer.TokenType.Name: {
                return(new CMapObject(CMapObject.NAME, DecodeName(tokeniser.GetByteContent())));
            }

            case PdfTokenizer.TokenType.Number: {
                CMapObject numObject = new CMapObject(CMapObject.NUMBER, null);
                try {
                    numObject.SetValue((int)Double.Parse(tokeniser.GetStringValue(), System.Globalization.CultureInfo.InvariantCulture
                                                         ));
                }
                catch (FormatException) {
                    numObject.SetValue(int.MinValue);
                }
                return(numObject);
            }

            case PdfTokenizer.TokenType.Other: {
                return(new CMapObject(CMapObject.LITERAL, tokeniser.GetStringValue()));
            }

            case PdfTokenizer.TokenType.EndArray: {
                return(new CMapObject(CMapObject.TOKEN, "]"));
            }

            case PdfTokenizer.TokenType.EndDic: {
                return(new CMapObject(CMapObject.TOKEN, ">>"));
            }

            default: {
                return(new CMapObject(0, ""));
            }
            }
        }
Exemplo n.º 6
0
        public virtual void NextValidToken()
        {
            int level = 0;

            byte[] n1  = null;
            byte[] n2  = null;
            long   ptr = 0;

            while (NextToken())
            {
                if (type == PdfTokenizer.TokenType.Comment)
                {
                    continue;
                }
                switch (level)
                {
                case 0: {
                    if (type != PdfTokenizer.TokenType.Number)
                    {
                        return;
                    }
                    ptr = file.GetPosition();
                    n1  = GetByteContent();
                    ++level;
                    break;
                }

                case 1: {
                    if (type != PdfTokenizer.TokenType.Number)
                    {
                        file.Seek(ptr);
                        type = PdfTokenizer.TokenType.Number;
                        outBuf.Reset().Append(n1);
                        return;
                    }
                    n2 = GetByteContent();
                    ++level;
                    break;
                }

                case 2: {
                    if (type == PdfTokenizer.TokenType.Other)
                    {
                        if (TokenValueEqualsTo(R))
                        {
                            System.Diagnostics.Debug.Assert(n2 != null);
                            type = PdfTokenizer.TokenType.Ref;
                            try {
                                reference  = Convert.ToInt32(iText.IO.Util.JavaUtil.GetStringForBytes(n1));
                                generation = Convert.ToInt32(iText.IO.Util.JavaUtil.GetStringForBytes(n2));
                            }
                            catch (Exception) {
                                //warn about incorrect reference number
                                //Exception: NumberFormatException for java, FormatException or OverflowException for .NET
                                ILog logger = LogManager.GetLogger(typeof(PdfTokenizer));
                                logger.Error(MessageFormatUtil.Format(iText.IO.LogMessageConstant.INVALID_INDIRECT_REFERENCE, iText.IO.Util.JavaUtil.GetStringForBytes
                                                                          (n1), iText.IO.Util.JavaUtil.GetStringForBytes(n2)));
                                reference  = -1;
                                generation = 0;
                            }
                            return;
                        }
                        else
                        {
                            if (TokenValueEqualsTo(Obj))
                            {
                                System.Diagnostics.Debug.Assert(n2 != null);
                                type       = PdfTokenizer.TokenType.Obj;
                                reference  = Convert.ToInt32(iText.IO.Util.JavaUtil.GetStringForBytes(n1));
                                generation = Convert.ToInt32(iText.IO.Util.JavaUtil.GetStringForBytes(n2));
                                return;
                            }
                        }
                    }
                    file.Seek(ptr);
                    type = PdfTokenizer.TokenType.Number;
                    outBuf.Reset().Append(n1);
                    return;
                }
                }
            }
            // if the level 1 check returns EOF,
            // then we are still looking at a number - set the type back to Number
            if (level == 1)
            {
                type = PdfTokenizer.TokenType.Number;
                outBuf.Reset().Append(n1);
            }
        }