Example #1
0
        public Task <File> Convert(string path)
        {
            return(Task.Run(() =>
            {
                var file = new File
                {
                    Path = path,
                    Mime = "application/pdf"
                };

                using (var document = new PdfDocument(new PdfReader(path)))
                {
                    int numOfPages = document.GetNumberOfPages();

                    var listener = new FilteredEventListener();
                    var extractionStrategy = listener
                                             .AttachEventListener(new LocationTextExtractionStrategy());

                    var processor = new PdfCanvasProcessor(listener);
                    var content = new StringBuilder();

                    for (int i = 1; i <= numOfPages; i++)
                    {
                        processor.ProcessPageContent(document.GetPage(i));
                        content.Append(extractionStrategy.GetResultantText());

                        processor.Reset();
                    }

                    file.Content = content.ToString();
                }

                return file;
            }));
        }
Example #2
0
        /// <summary>
        /// Parsing data from Oy axis
        /// </summary>
        /// <param name="page">Data of page</param>
        /// <returns>data of Oy axis</returns>
        internal StringBuilder ParsingOyAxis(PdfPage page)
        {
            // temp variable
            Rectangle                      readBox;
            TextRegionEventFilter          readText;
            FilteredEventListener          listener;
            LocationTextExtractionStrategy extractor;
            PdfCanvasProcessor             parser;

            string[]      lines;
            StringBuilder result = new StringBuilder();

            // area limit for read
            readBox = new Rectangle(Margin.Left, Margin.Bottom + 60, 20,
                                    page.GetPageSize().GetHeight() - Margin.Bottom - 160);

            readText = new TextRegionEventFilter(readBox);
            listener = new FilteredEventListener();

            // create a text extraction renderer
            extractor = listener
                        .AttachEventListener(new LocationTextExtractionStrategy(),
                                             readText);

            lock (block)
            {
                (parser = new PdfCanvasProcessor(listener))
                .ProcessPageContent(page);
                parser.Reset();
            }

            // read every line (row)
            lines = extractor
                    .GetResultantText()
                    .Split('\n');

            foreach (string line in lines)
            {
                if (!string.IsNullOrEmpty(line.Trim()))
                {
                    result.AppendLine(line);
                }
            }

            TextExtractionStrategy strategy =
                listener.AttachEventListener(new TextExtractionStrategy(), readText);

            lock (block)
            {
                (parser = new PdfCanvasProcessor(listener))
                .ProcessPageContent(page);
                parser.Reset();
            }

            PositionOyAxis = strategy.TextResult.ToArray();

            return(result);
        }
Example #3
0
        /// <summary>
        /// Parsing columns name
        /// </summary>
        /// <param name="page">Data of page</param>
        /// <returns>names of columns from page</returns>
        internal StringBuilder ParsingColumns(PdfPage page)
        {
            // temp variable
            Rectangle                      readBox;
            TextRegionEventFilter          readText;
            FilteredEventListener          listener;
            LocationTextExtractionStrategy extractor;
            PdfCanvasProcessor             parser;

            string[]      lines;
            StringBuilder result = new StringBuilder();

            // area limit for read
            readBox = new Rectangle(Margin.Left,
                                    page.GetPageSize().GetHeight() - Margin.Top - 70,
                                    (page.GetPageSize().GetWidth() - Margin.Right) / 4, 10);
            readText = new TextRegionEventFilter(readBox);
            listener = new FilteredEventListener();

            // create a text extraction renderer
            extractor = listener
                        .AttachEventListener(new LocationTextExtractionStrategy(),
                                             readText);

            lock (block)
            {
                (parser = new PdfCanvasProcessor(listener))
                .ProcessPageContent(page);
                parser.Reset();
            }

            // read every line (row)
            lines = extractor.GetResultantText()
                    .Split('\n');

            foreach (string line in lines)
            {
                result.AppendLine(line);
            }

            return(result);
        }