private void BtnExtractClick(object sender, RoutedEventArgs e) { string pdfFilePath = tbPdfPath.Text; if (!File.Exists(pdfFilePath)) { MessageBox.Show("PDF file does not exist!"); return; } var pdfData = ContentRetreivalUtils.GetExtractedInfo(pdfFilePath); tbExtractedInfo.Text = pdfData == null ? "null" : pdfData.ToString(); tbLayoutView.Text = PdfReadingUtils.ReadLayoutedView(pdfFilePath); //tbRawContent.Text = PdfReadingUtils.ReadRawContent(pdfFilePath); tbLayoutContent.Text = PdfReadingUtils.ReadVisibleLayoutContent(pdfFilePath); //tbHtmlContent.Text = PdfReadingUtils.ReadHtmlMetaContent(pdfFilePath); //try //{ // browser.NavigateToString(tbHtmlContent.Text); //} //catch //{ //} //tbPdfInfo.Text = PdfReadingUtils.ReadPdfInfo(pdfFilePath); }
public static PdfExtractedData GetExtractedInfo(string pdfFilePath) { string firstPageContent = PdfReadingUtils.ReadPdfContent(pdfFilePath, 1); var firstPartData = BuildFirstPartPdfData(firstPageContent); PdfExtractedData pdfData = null; pdfData = ExtractFromUnknownPdf(firstPartData); if (pdfData == null || pdfData.IsTitleInValid()) { string allFileContent = PdfReadingUtils.ReadPdfContent(pdfFilePath); var dois = FindDoi(allFileContent); if (dois != null && dois.Count > 0) { pdfData = new PdfExtractedData { Dois = dois }; } } if (pdfData != null) // || String.IsNullOrWhiteSpace(pdfData.Title)) { string baseFileName = System.IO.Path.GetFileNameWithoutExtension(pdfFilePath); int fileNameYear; string fileNamePub; if (ExtractInfoFromFileName(baseFileName, out fileNameYear, out fileNamePub)) { if (pdfData.Year < 0) { pdfData.Year = fileNameYear; } if (String.IsNullOrWhiteSpace(pdfData.PubName)) { pdfData.PubName = fileNamePub; } } if (pdfData.IsTitleInValid() && pdfData.Dois == null) { return(null); } else { return(pdfData); } } return(null); }