コード例 #1
0
        //Extract word by location(rect)/
        string GetTextByLocation(int page, RectangleJ area, bool landscape)
        {
            const float dpi             = 72.0f;
            float       landscapeHeight = 8.23f;
            RectangleJ  location        = new RectangleJ(area.X, area.Y, area.Width, area.Height);

            if (landscape)
            {
                location.X      = landscapeHeight - area.Y - area.Height;
                location.Y      = area.X;
                location.Width  = area.Height;
                location.Height = area.Width;
            }

            location.X      *= dpi;
            location.Y      *= dpi;
            location.Width  *= dpi;
            location.Height *= dpi;

            RenderFilter[]          filter = { new RegionTextRenderFilter(location) };
            ITextExtractionStrategy strategy;
            StringBuilder           text = new StringBuilder();

            using (PdfReader reader = new PdfReader(filepath))
            {
                strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
                text.AppendLine(PdfTextExtractor.GetTextFromPage(reader, page, strategy));
            }
            return(text.ToString());
        }
コード例 #2
0
        public bool Accept(LocationTextExtractionStrategy.TextChunk textChunk)
        {
            var rectJ = new RectangleJ(Rect);

            return(rectJ.Contains(textChunk.StartLocation[0], textChunk.StartLocation[1]) &&
                   rectJ.Contains(textChunk.EndLocation[0], textChunk.EndLocation[1]));
        }
コード例 #3
0
        /******/

        public List <string[]> GetTableCells(int page, RectangleJ area, float rowHeight, float rowHeightOffset, float[] columnWidth, bool landscape)
        {
            List <string[]> result = new List <string[]>();

            List <RectangleJ> rows = new List <RectangleJ>();

            rows = GetRowArea(page, area, rowHeight, rowHeightOffset);

            foreach (RectangleJ row in rows)
            {
                result.Add(GetColumns(page, row, columnWidth, landscape));
            }

            /* Debugging */

            /*
             * int i = 0;
             * foreach (string[] row in result) {
             *      Console.WriteLine("Row: " + i++);
             *      for (int j = 0; j < 150; j++)
             *      {
             *              Console.Write("*");
             *      }
             *      Console.WriteLine();
             *      int colNo = 0;
             *      foreach (string column in row)
             *              Console.WriteLine("Column {0}: {1}", colNo++, column);
             *      Console.WriteLine();
             * }
             * /* END Debugging */

            return(result);
        }
コード例 #4
0
        bool CheckCompanyName(string stamp, RectangleJ stampLocation, bool landscape)
        {
            Regex regex = new Regex(@stamp);
            Match match = regex.Match(GetTextByLocation(1, stampLocation, landscape));

            return(match.Success);
        }
コード例 #5
0
        /**
         * @return null if the intersection is empty, {@link com.itextpdf.text.Rectangle} representing intersection otherwise
         */
        private Rectangle Intersection(Rectangle rect1, Rectangle rect2)
        {
            RectangleJ awtRect1        = new RectangleJ(rect1);
            RectangleJ awtRect2        = new RectangleJ(rect2);
            RectangleJ awtIntersection = awtRect1.Intersection(awtRect2);

            return(awtIntersection.IsEmpty() ? null : new Rectangle(awtIntersection));
        }
コード例 #6
0
        public RectangleJ Intersection(RectangleJ r)
        {
            float x1 = Math.Max(x, r.x);
            float y1 = Math.Max(y, r.y);
            float x2 = Math.Min(x + width, r.x + r.width);
            float y2 = Math.Min(y + height, r.y + r.height);

            return(new RectangleJ(x1, y1, x2 - x1, y2 - y1));
        }
コード例 #7
0
 public void Add(RectangleJ rect) {
     float x1 = Math.Min(Math.Min(x, x + width), Math.Min(rect.x, rect.x + rect.width));
     float x2 = Math.Max(Math.Max(x, x + width), Math.Max(rect.x, rect.x + rect.width));
     float y1 = Math.Min(Math.Min(y, y + height), Math.Min(rect.y, rect.y + rect.height));
     float y2 = Math.Max(Math.Max(y, y + height), Math.Max(rect.y, rect.y + rect.height));
     x = x1;
     y = y1;
     width = x2 - x1;
     height = y2 - y1;
 }
コード例 #8
0
        public void Add(RectangleJ rect)
        {
            float x1 = Math.Min(Math.Min(x, x + width), Math.Min(rect.x, rect.x + rect.width));
            float x2 = Math.Max(Math.Max(x, x + width), Math.Max(rect.x, rect.x + rect.width));
            float y1 = Math.Min(Math.Min(y, y + height), Math.Min(rect.y, rect.y + rect.height));
            float y2 = Math.Max(Math.Max(y, y + height), Math.Max(rect.y, rect.y + rect.height));

            x      = x1;
            y      = y1;
            width  = x2 - x1;
            height = y2 - y1;
        }
コード例 #9
0
        private static bool ContainsAll(RectangleJ rect, params Point2D[] points)
        {
            foreach (Point2D point in points)
            {
                if (!rect.Contains(point))
                {
                    return(false);
                }
            }

            return(true);
        }
コード例 #10
0
        /**
         * Method invokes by the PdfContentStreamProcessor.
         * Passes a TextRenderInfo for every text chunk that is encountered.
         * We'll use this object to obtain coordinates.
         * @see com.itextpdf.text.pdf.parser.RenderListener#renderText(com.itextpdf.text.pdf.parser.TextRenderInfo)
         */
        virtual public void RenderText(TextRenderInfo renderInfo)
        {
            if (textRectangle == null)
            {
                textRectangle = renderInfo.GetDescentLine().GetBoundingRectange();
            }
            else
            {
                textRectangle.Add(renderInfo.GetDescentLine().GetBoundingRectange());
            }

            textRectangle.Add(renderInfo.GetAscentLine().GetBoundingRectange());
        }
コード例 #11
0
        List <RectangleJ> GetRowArea(int page, RectangleJ area, float rowHeight, float rowHeightOffset)         /* Gets row areas from a given area in one page */
        {
            List <RectangleJ> result = new List <RectangleJ>();

            float x = area.X;
            float y = area.Y + area.Height - rowHeight;

            for (; y >= area.Y; y -= rowHeight)
            {
                result.Add(new RectangleJ(x, y, area.Width, rowHeight + 2 * rowHeightOffset));
            }

            return(result);
        }
コード例 #12
0
        string[] GetColumns(int page, RectangleJ row, float[] columnWidth, bool landscape)              /* Gets column text given a specified row area and column widths */
        {
            string[]   result = new string[columnWidth.Length];
            RectangleJ column = new RectangleJ(row.X, row.Y, columnWidth[0], row.Height);

            for (int i = 0; i < columnWidth.Length; i++)
            {
                column.Width = columnWidth[i];
                result[i]    = GetTextByLocation(page, column, landscape).Trim();
                column.X    += columnWidth[i];
            }

            return(result);
        }
コード例 #13
0
ファイル: CLS_PDFOCR.cs プロジェクト: jbalam5/ProjectTest
        /// <summary>
        /// Lê uma tabela de um pdf
        /// </summary>
        /// <param name="pdf">Caminho do PDF</param>
        /// <param name="origemXPag1">Inicio da leitura no eixo X para a primeira página</param>
        /// <param name="origemYPag1">Inicio da leitura no eixo Y para a primeira página</param>
        /// <param name="linhasPag1">Quantidade de linhas da primeira página</param>
        /// <param name="origemXOutrasPag">Inicio da leitura no eixo X para as demais páginas</param>
        /// <param name="origemYOutrasPag">Inicio da leitura no eixo Y para as demais páginas</param>
        /// <param name="linhasOutrasPag">Quantidade de linhas das demais páginas</param>
        /// <param name="alturaLinha">Altrura da linha</param>
        /// <param name="colunas">Nome e largura das colunas</param>
        /// <returns></returns>
        private List <Dictionary <string, string> > ReadTabelaPDF(string pdf, float origemXPag1, float origemYPag1, int linhasPag1, float origemXOutrasPag, float origemYOutrasPag, int linhasOutrasPag, float alturaLinha, Dictionary <string, float> colunas)
        {
            // Primeira página
            float origemX          = origemXPag1;
            float origemY          = origemYPag1;
            int   quantidadeLinhas = linhasPag1;

            var resultado = new List <Dictionary <string, string> >();

            using (iTextSharp.text.pdf.PdfReader leitor = new iTextSharp.text.pdf.PdfReader(pdf))
            {
                var texto = string.Empty;
                for (int i = 1; i <= leitor.NumberOfPages; i++)
                {
                    if (i > 1)
                    {
                        origemX          = origemXOutrasPag;
                        origemY          = origemYOutrasPag;
                        quantidadeLinhas = linhasOutrasPag;
                    }
                    for (int l = 0; l < quantidadeLinhas; l++)
                    {
                        var   dados         = new Dictionary <string, string>();
                        int   c             = 0;
                        float deslocamentoX = 0;
                        foreach (var coluna in colunas)
                        {
                            RectangleJ rect = new RectangleJ(origemX + deslocamentoX, origemY + (l * alturaLinha), coluna.Value, alturaLinha);
                            iTextSharp.text.pdf.parser.RenderFilter            filter   = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect);
                            iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(), filter);
                            texto = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(leitor, i, strategy);

                            dados.Add(coluna.Key, texto);
                            c++;
                            deslocamentoX += coluna.Value;
                        }
                        if (dados != null)
                        {
                            resultado.Add(dados);
                        }
                    }
                }
            }
            return(resultado);
        }
コード例 #14
0
ファイル: Linear.cs プロジェクト: kael89/statement-analyzer
        int GetTransactionsPage()
        {
            RectangleJ area      = new RectangleJ(1.09f, 0.45f, 8f, 6.09f);
            float      rowHeight = 0.31f;

            float[] columnWidth = new float[] { 6f, 2f };

            List <string[]> contents = GetTableCells(2, area, rowHeight, columnWidth, true);

            foreach (string[] row in contents)
            {
                if (Regex.Match(row[0], "Detailed Cash Flow").Success)
                {
                    return(int.Parse(row[1]));
                }
            }

            return(-1);
        }
コード例 #15
0
ファイル: Converter.cs プロジェクト: zulfiqar1982/zulfiqar
        public string ReadFromPosistionIText()
        {
            RectangleJ rect = new RectangleJ(0, 0, 2000, 1800);

            RenderFilter[]          filter = { new RegionTextRenderFilter(rect) };
            ITextExtractionStrategy strategy;
            StringBuilder           sb = new StringBuilder();

            for (int i = 1; i <= Document.NumberOfPages; i++)
            {
                strategy = new FilteredTextRenderListener(
                    new LocationTextExtractionStrategy(), filter
                    );
                sb.AppendLine(
                    PdfTextExtractor.GetTextFromPage(Document, i, strategy)
                    );
            }

            return(sb.ToString());
        }
コード例 #16
0
ファイル: Linear.cs プロジェクト: kael89/statement-analyzer
        int GetRowsToHeader(int page)
        {
            RectangleJ area = GetTransactionsArea(page);

            area.Width = 0.73f;
            float[] column = new float[] { 0.73f };

            int             i    = 1;
            List <string[]> rows = GetTableCells(page, area, rowHeight, column, true);

            foreach (string[] header in rows)
            {
                if (Regex.Match(header[0], "MA_CASH").Success)
                {
                    return(i);
                }
                i++;
            }
            return(0);
        }
コード例 #17
0
ファイル: UtilFileBradesco.cs プロジェクト: lrenzi/dev
        /// <summary>
        /// Retorna os campos do PDF por posição faz um retangulo
        /// </summary>
        /// <param name="CaminhoArquivo">Caminho do arqivo a ser lido</param>
        /// <param name="Posicoes">Array com 3 posições</param>
        /// <param name="Pagina">Número da página aonde se encontra os dados do cliente</param>
        /// <returns>Retorna o valor encontrado</returns>
        private string RetornarValor(String CaminhoArquivo, string[] Posicoes, int Pagina)
        {
            string retorno = string.Empty;

            using (PdfReader pdfReader = new PdfReader(CaminhoArquivo))
            {
                RectangleJ     rect         = new RectangleJ(float.Parse(Posicoes[0]), float.Parse(Posicoes[1]), float.Parse(Posicoes[2]), float.Parse(Posicoes[3]));
                RenderFilter[] renderFilter = { new RegionTextRenderFilter(rect) };

                ITextExtractionStrategy textExtractionStrategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), renderFilter);
                retorno = PdfTextExtractor.GetTextFromPage(pdfReader, Pagina, textExtractionStrategy);

                rect                   = null;
                renderFilter           = null;
                textExtractionStrategy = null;

                pdfReader.Close();
                pdfReader.Dispose();
            }

            return(retorno == string.Empty ? " " : retorno);
        }
コード例 #18
0
 /**
  * Constructs a filter
  * @param filterRect the rectangle to filter text against.
  */
 public RegionTextRenderFilter(iTextSharp.text.Rectangle filterRect)
 {
     this.filterRect = new RectangleJ(filterRect);
 }
コード例 #19
0
 /**
  * Constructs a filter
  * @param filterRect the rectangle to filter text against.  Note that this is a java.awt.Rectangle !
  */
 public RegionTextRenderFilter(RectangleJ filterRect)
 {
     this.filterRect = filterRect;
 }
コード例 #20
0
 public List <string[]> GetTableCells(int page, RectangleJ area, float rowHeight, float rowHeightOffset, float[] columnWidth)
 {
     return(GetTableCells(page, area, rowHeight, rowHeightOffset, columnWidth, false));
 }
コード例 #21
0
 public RectangleJ Intersection(RectangleJ r) {
     float x1 = Math.Max(x, r.x);
     float y1 = Math.Max(y, r.y);
     float x2 = Math.Min(x + width, r.x + r.width);
     float y2 = Math.Min(y + height, r.y + r.height);
     return new RectangleJ(x1, y1, x2 - x1, y2 - y1);
 }
コード例 #22
0
 public List <string[]> GetTableCells(int page, RectangleJ area, float rowHeight, float[] columnWidth, bool landscape)
 {
     return(GetTableCells(page, area, rowHeight, 0f, columnWidth, landscape));
 }
コード例 #23
0
ファイル: Linear.cs プロジェクト: kael89/statement-analyzer
        public override List <string[]> GetTransactions()
        {
            List <string[]> temp   = new List <string[]>();
            List <string[]> result = new List <string[]>();

            float[] transactionsHeader = new float[] { 0.73f };
            int     rowsToHeader;
            bool    headerFound     = false;
            bool    transactionsEnd = false;

            for (int page = transactionsPageNumber; page <= numberOfPages && !transactionsEnd; page++)
            {
                rowsToHeader = 0;
                //Find where Transactions start
                if (!headerFound)
                {
                    rowsToHeader = GetRowsToHeader(page);
                    if (rowsToHeader > 0)
                    {
                        headerFound = true;
                    }
                }

                //Get Transactions
                if (headerFound)
                {
                    RectangleJ area = GetTransactionsArea(page);
                    area.Height -= rowsToHeader * rowHeight;

                    temp = GetTableCells(page, area, rowHeight, transactionsColumnWidth, true);
                    foreach (string[] row in temp)
                    {
                        Match match;

                        if (Regex.Match(row[1], "Opening Balance").Success || Regex.Match(row[0], "MA_CASH").Success)
                        {
                            continue;
                        }
                        else
                        {
                            match = Regex.Match(row[0], @"(\d{2})\/(\d{2})\/(\d{4})");

                            if (match.Success)
                            {
                                //Format Date
                                row[0] = match.Groups[3].Value + "-" + match.Groups[2].Value + "-" + match.Groups[1].Value;
                                row[2] = (row[2] == "-") ? "" : row[2].Substring(1);
                                result.Add(row);
                            }
                            else if (row[0] != "" && !Regex.Match(row[0], "Code").Success)
                            {
                                transactionsEnd = true;
                                break;
                            }
                        }
                    }
                }
            }

            return(result);
        }
コード例 #24
0
        public override List <string[]> GetTransactions()
        {
            List <string[]> temp   = new List <string[]>();
            List <string[]> result = new List <string[]>();

            Regex  regexYearDescription = new Regex("STATEMENT OPENING BALANCE");
            Regex  regexClosingBalance  = new Regex("CLOSING BALANCE");
            Regex  regexDate            = new Regex(@"(\d{2})(\s|-)(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)");
            string year = "2015";
            Match  match;

            bool       closingStatementFound = false;
            bool       openingStatementFound = false;
            RectangleJ transactionsArea      = transactionsFirstPage;

            for (int page = 1; page <= numberOfPages; page++)
            {
                temp = GetTableCells(page, transactionsArea, rowHeight, transactionsColumnWidth);

                for (int i = 0; i < temp.Count; i++)
                {
                    RemoveDots(temp[i]);

                    if (regexYearDescription.Match(temp[i][1]).Success)
                    {
                        openingStatementFound = true;
                        transactionsArea      = transactionsGeneralPage;
                        year = temp[i][0];
                        continue;
                    }
                    else if (regexClosingBalance.Match(temp[i][1]).Success)
                    {
                        transactionsArea      = transactionsFirstPage;
                        closingStatementFound = true;
                        break;
                    }
                    else if ((match = regexDate.Match(temp[i][0])).Success)
                    {
                        temp[i][0] = match.Groups[1].Value + "-" + match.Groups[3].Value + "-" + year;
                    }
                    else
                    {
                        //If previous cell in Date containted a date
                        if (temp[i][0] == "" && i > 0 && regexDate.Match(temp[i - 1][0]).Success)
                        {
                            temp[i - 1][1] += " " + temp[i][1];
                            temp[i - 1][2]  = temp[i][2];
                            temp[i - 1][3]  = temp[i][3];
                        }
                        continue;
                    }

                    if (openingStatementFound)
                    {
                        result.Add(temp[i]);
                    }
                }

                if (closingStatementFound)
                {
                    openingStatementFound = closingStatementFound = false;
                    continue;
                }
            }

            return(result);
        }
コード例 #25
0
ファイル: Rectangle.cs プロジェクト: rrossenbg/vprint
 /**
  * Constructs a <CODE>Rectangle</CODE>-object based on a <CODE>com.itextpdf.awt.geom.Rectangle</CODE> object
  * @param rect com.itextpdf.awt.geom.Rectangle
  */
 public Rectangle(RectangleJ rect) : this(rect.X, rect.Y, rect.X + rect.Width, rect.Y + rect.Height)
 {
 }