Example #1
0
        private async Task <PdfPageSizeInfo> GetPdfPageSize(string pdfFilePath)
        {
            var info = await GetPdfInfo(pdfFilePath);

            var infos      = info.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            var sizeString = infos.Where(x => x.StartsWith("Page size:")).FirstOrDefault();

            var retVal = new PdfPageSizeInfo();

            retVal.OriginalSizeString = sizeString;

            var splitSize = sizeString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

            retVal.SizeX = float.Parse(splitSize[2].Replace('.', ','), new System.Globalization.CultureInfo("de-DE"));
            retVal.SizeY = float.Parse(splitSize[4].Replace('.', ','), new System.Globalization.CultureInfo("de-DE"));
            retVal.Unit  = splitSize[5];

            return(retVal);
        }
Example #2
0
        /// <summary>
        /// Gets text from a PDF based that is within a given area.
        /// </summary>
        /// <param name="pdfFilePath">PDF location on disk.</param>
        /// <param name="cropAreaInfo">Percentual area which is to be extracted.</param>
        /// <param name="pdfPageSizeInfo">Size information of the PDF file. Used to calculate absolute area from percental area.</param>
        /// <returns></returns>
        public async Task <string> GetTextFromPdf(string pdfFilePath, PercentalAreaInfo cropAreaInfo, PdfPageSizeInfo pdfPageSizeInfo)
        {
            var pdfInfo = pdfPageSizeInfo;
            var x       = (int)Math.Round(cropAreaInfo.TopLeftX * pdfInfo.SizeX, 0);
            var y       = (int)Math.Round(cropAreaInfo.TopLeftY * pdfInfo.SizeY, 0);
            var W       = (int)Math.Round(cropAreaInfo.Width * pdfInfo.SizeX, 0);
            var H       = (int)Math.Round(cropAreaInfo.Height * pdfInfo.SizeY, 0);

            var pdfToTextOptions = " -f " + cropAreaInfo.PageNumber + " -l " + cropAreaInfo.PageNumber + " -x " + x + " -y " + y + " -W " + W + " -H " + H + " -layout -nopgbrk ";

            var retVal = await GetTextFromPdf(pdfFilePath, false, pdfToTextOptions);

            // Remove last line break, as it is added by poppler and does not represent the selected area
            if (retVal.Length > 1)
            {
                retVal = retVal.Remove(retVal.Length - 2);
            }

            if (retVal is null)
            {
                retVal = string.Empty;
            }

            return(retVal);
        }