private async Task <PdfPageSizeInfo> GetPdfPageSize(string pdfFilePath) { var info = await GetPdfInfo(pdfFilePath); var infos = info.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); var sizeString = infos.Where(x => x.StartsWith("Page size:")).FirstOrDefault(); var retVal = new PdfPageSizeInfo(); retVal.OriginalSizeString = sizeString; var splitSize = sizeString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); retVal.SizeX = float.Parse(splitSize[2].Replace('.', ','), new System.Globalization.CultureInfo("de-DE")); retVal.SizeY = float.Parse(splitSize[4].Replace('.', ','), new System.Globalization.CultureInfo("de-DE")); retVal.Unit = splitSize[5]; return(retVal); }
/// <summary> /// Gets text from a PDF based that is within a given area. /// </summary> /// <param name="pdfFilePath">PDF location on disk.</param> /// <param name="cropAreaInfo">Percentual area which is to be extracted.</param> /// <param name="pdfPageSizeInfo">Size information of the PDF file. Used to calculate absolute area from percental area.</param> /// <returns></returns> public async Task <string> GetTextFromPdf(string pdfFilePath, PercentalAreaInfo cropAreaInfo, PdfPageSizeInfo pdfPageSizeInfo) { var pdfInfo = pdfPageSizeInfo; var x = (int)Math.Round(cropAreaInfo.TopLeftX * pdfInfo.SizeX, 0); var y = (int)Math.Round(cropAreaInfo.TopLeftY * pdfInfo.SizeY, 0); var W = (int)Math.Round(cropAreaInfo.Width * pdfInfo.SizeX, 0); var H = (int)Math.Round(cropAreaInfo.Height * pdfInfo.SizeY, 0); var pdfToTextOptions = " -f " + cropAreaInfo.PageNumber + " -l " + cropAreaInfo.PageNumber + " -x " + x + " -y " + y + " -W " + W + " -H " + H + " -layout -nopgbrk "; var retVal = await GetTextFromPdf(pdfFilePath, false, pdfToTextOptions); // Remove last line break, as it is added by poppler and does not represent the selected area if (retVal.Length > 1) { retVal = retVal.Remove(retVal.Length - 2); } if (retVal is null) { retVal = string.Empty; } return(retVal); }