public static string GetTJContent(string rawContent, CMapToUnicode cMapToUnicode, EncodingDifferenceToUnicode encodingDifferenceToUnicode)
        {
            string content;
            string rawArray = rawContent.Remove(rawContent.Length - 2).Trim();

            if (string.IsNullOrWhiteSpace(rawArray))
            {
                return(null);
            }
            PdfArrayDataType pdfArrayDataType = PdfArrayDataType.Parse(rawArray);

            content = string.Empty;
            foreach (string item in pdfArrayDataType.Elements.Where(_ => _ is string))
            {
                string escapedContent;
                escapedContent = item.Trim();
                content       +=
                    PdfHexStringDataType.IsStartChar(escapedContent) ?
                    PdfFontHelper.ToUnicode(PdfHexStringDataType.GetHexContent(escapedContent), cMapToUnicode, encodingDifferenceToUnicode).ToString() :
                    PdfFontHelper.ToUnicode(PdfStringDataType.GetContentFromEscapedContent(escapedContent), cMapToUnicode, encodingDifferenceToUnicode);
            }
            if (content.Contains("Media"))
            {
                Console.WriteLine();
            }
            return(content);
        }
Example #2
0
        public static BFRange Parse(string s, ref int startPosition)
        {
            string sBeginChar;
            string sUnicodeChar;

            Statement.SkipSpace(s, ref startPosition);
            sBeginChar = PdfHexStringDataType.GetRawData(s, ref startPosition);
            Statement.SkipSpace(s, ref startPosition);
            sUnicodeChar = PdfHexStringDataType.GetRawData(s, ref startPosition);

            int beginChar   = int.Parse(sBeginChar.Substring(1, sBeginChar.Length - 2), NumberStyles.HexNumber);
            int unicodeChar = int.Parse(sUnicodeChar.Substring(1, sUnicodeChar.Length - 2), NumberStyles.HexNumber);

            return(new BFRange()
            {
                BeginChar = beginChar,
                EndChar = beginChar,
                UnicodeChar = unicodeChar
            });
        }
        public static string GetNextStatement(string content, ref int i)
        {
            string statement        = "";
            bool   readingStatement = false;

            if (i >= content.Length)
            {
                return(null);
            }

            while (i < content.Length)
            {
                if (content[i] == ' ')
                {
                    if (readingStatement)
                    {
                        i++;
                        return(statement);
                    }

                    statement += " ";
                    i++;
                }
                else if (content[i] == '\n')
                {
                    i++;
                    return(statement);
                }
                else if (readingStatement && IsSeparator(content[i]))
                {
                    return(statement);
                }
                else if (PdfNumericDataType.IsStartChar(content, i))
                {
                    // string parameter
                    statement += PdfNumericDataType.GetRawData(content, ref i);
                }
                else if (PdfStringDataType.IsStartChar(content, i))
                {
                    // string parameter
                    statement += PdfStringDataType.GetRawData(content, ref i);
                }
                else if (PdfArrayDataType.IsStartChar(content, i))
                {
                    // array parameter
                    statement += PdfArrayDataType.GetRawData(content, ref i);
                }
                else if (PdfHexStringDataType.IsStartChar(content, i))
                {
                    // hex string parameter
                    statement += PdfHexStringDataType.GetRawData(content, ref i);
                }
                else if (content[i] == 't' && i + 5 < content.Length && content.Substring(i, 5) == "true ")
                {
                    // boolean true parameter
                    statement += "true ";
                    i         += 5;
                }
                else if (content[i] == 'f' && i + 6 < content.Length && content.Substring(i, 6) == "false ")
                {
                    // boolean false parameter
                    statement += "false ";
                    i         += 6;
                }
                else if (PdfObjectDataType.IsStartChar(content, i))
                {
                    // hex string parameter
                    statement += PdfObjectDataType.GetRawData(content, ref i);
                }
                else
                {
                    statement       += content[i];
                    readingStatement = true;
                    i++;
                }
            }

            return(statement);
        }
        public override void CloseMultiLineStatement()
        {
            Lines = new List <TextObjectStatementLine>();

            TextObjectStatementLine actualLineSettings = new TextObjectStatementLine();
            Matrix textTransformMatrix = Matrix.Identity;
            Point  position            = new Point();
            float  leadingParameter    = 0;

            int pageRotation = PdfReader.GetPageRotation(PageNumber);

            for (int index = 0; index < RawContent.Count; index++)
            {
                string rawContent = RawContent[index];
                if (rawContent.EndsWith("Tm"))
                {
                    Matrix matrix;
                    if (Matrix.TryParse(rawContent, out matrix))
                    {
                        textTransformMatrix = matrix;
                    }
                }
                else if (rawContent.EndsWith("Tf"))
                {
                    string[] fontParameters = rawContent.Split(' ');
                    if (fontParameters.Length < 3)
                    {
                        // Try to retrieve from previous line. This is global a parsing issue
                        if (index < 1)
                        {
                            continue;
                        }

                        fontParameters = (RawContent[index - 1].Trim() + " " + rawContent.Trim()).Split(' ');
                        if (fontParameters.Length < 3)
                        {
                            continue;
                        }
                    }
                    float fontSize;
                    if (float.TryParse(fontParameters[fontParameters.Length - 2], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out fontSize))
                    {
                        actualLineSettings.FontHeight = fontSize;
                    }
                    actualLineSettings.CMapToUnicode = PdfFontHelper.GetFontCMapToUnicode(PdfReader, PageNumber, fontParameters[fontParameters.Length - 3]);
                    actualLineSettings.EncodingDifferenceToUnicode = EncodingDifferenceToUnicode.Parse(PdfFontHelper.GetFont(PdfReader, PageNumber, fontParameters[fontParameters.Length - 3]));
                }
                else if (rawContent.EndsWith("Td"))
                {
                    float    tx;
                    float    ty;
                    string[] parameters = rawContent.Split(' ');
                    if (
                        float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tx) &&
                        float.TryParse(parameters[1], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out ty))
                    {
                        textTransformMatrix = new Matrix(1, 0, 0, 1, tx, ty);
                    }
                }
                else if (rawContent.EndsWith("TD"))
                {
                    float    tx;
                    float    ty;
                    string[] parameters = rawContent.Split(' ');
                    if (
                        float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tx) &&
                        float.TryParse(parameters[1], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out ty))
                    {
                        textTransformMatrix = new Matrix(1, 0, 0, 1, tx, ty) * textTransformMatrix;
                        leadingParameter    = -ty;
                    }
                }
                else if (rawContent.EndsWith("TL"))
                {
                    float    tl;
                    string[] parameters = rawContent.Split(' ');
                    if (
                        float.TryParse(parameters[0], NumberStyles.Any, NumberFormatInfo.InvariantInfo, out tl))
                    {
                        leadingParameter = tl;
                    }
                }
                else if (rawContent.EndsWith("T*"))
                {
                    textTransformMatrix = new Matrix(1, 0, 0, 1, 0, -leadingParameter) * textTransformMatrix;
                }
                else if (rawContent.EndsWith("TJ"))
                {
                    string content = GetTJContent(rawContent, actualLineSettings.CMapToUnicode, actualLineSettings.EncodingDifferenceToUnicode);
                    if (string.IsNullOrEmpty(content))
                    {
                        continue;
                    }
                    var line = actualLineSettings.Clone();
                    line.FontHeight =
                        line.FontHeight * textTransformMatrix.a * (pageRotation == 90 || pageRotation == 270 ? BaseTransformMatrix.b : BaseTransformMatrix.a);
                    line.Position = BaseTransformMatrix.TransformPoint(new Point(textTransformMatrix.TransformX(position.X, position.Y + line.FontHeight), textTransformMatrix.TransformY(position.X, position.Y + line.FontHeight))).Rotate(pageRotation);
                    line.Content  = content;
                    Lines.Add(line);
                }
                else if (rawContent.Trim().EndsWith("Tj"))
                {
                    string escapedContent;
                    escapedContent = rawContent.Trim();
                    escapedContent = escapedContent.Remove(escapedContent.Length - 2);
                    string content = PdfHexStringDataType.IsStartChar(escapedContent) ? PdfHexStringDataType.GetContent(escapedContent) : PdfStringDataType.GetContentFromEscapedContent(escapedContent);

                    var line = actualLineSettings.Clone();
                    line.FontHeight =
                        line.FontHeight * textTransformMatrix.a * (pageRotation == 90 || pageRotation == 270 ? BaseTransformMatrix.b : BaseTransformMatrix.a);
                    line.Position = BaseTransformMatrix.TransformPoint(new Point(textTransformMatrix.TransformX(position.X, position.Y + line.FontHeight), textTransformMatrix.TransformY(position.X, position.Y + line.FontHeight))).Rotate(pageRotation);
                    line.Content  = PdfFontHelper.ToUnicode(content, line.CMapToUnicode, line.EncodingDifferenceToUnicode);
                    Lines.Add(line);
                }
            }
        }