private void BtnExtractClick(object sender, RoutedEventArgs e) { string pdfFilePath = tbPdfPath.Text; if (!File.Exists(pdfFilePath)) { MessageBox.Show("PDF file does not exist!"); return; } var pdfData = ContentRetreivalUtils.GetExtractedInfo(pdfFilePath); tbExtractedInfo.Text = pdfData == null ? "null" : pdfData.ToString(); tbLayoutView.Text = PdfReadingUtils.ReadLayoutedView(pdfFilePath); //tbRawContent.Text = PdfReadingUtils.ReadRawContent(pdfFilePath); tbLayoutContent.Text = PdfReadingUtils.ReadVisibleLayoutContent(pdfFilePath); //tbHtmlContent.Text = PdfReadingUtils.ReadHtmlMetaContent(pdfFilePath); //try //{ // browser.NavigateToString(tbHtmlContent.Text); //} //catch //{ //} //tbPdfInfo.Text = PdfReadingUtils.ReadPdfInfo(pdfFilePath); }
public void ReloadFolderContents() { string folderName = DirectoryPath; if (String.IsNullOrEmpty(folderName) || !Directory.Exists(folderName)) { throw new IOException(String.Format("No such direcotry exists: '{0}'", folderName)); } Clear(); string pattern = ReplacementPattern; pattern = Regex.Replace(pattern, @"\$\(\s*title\s*\)", "{0}", RegexOptions.IgnoreCase); pattern = Regex.Replace(pattern, @"\$\(\s*author\s*\)", "{1}", RegexOptions.IgnoreCase); pattern = Regex.Replace(pattern, @"\$\(\s*publisher\s*\)", "{2}", RegexOptions.IgnoreCase); pattern = Regex.Replace(pattern, @"\$\(\s*year\s*\)", "{3}", RegexOptions.IgnoreCase); var pdfFiles = Directory.GetFiles(folderName, "*.pdf"); //var textInfo = new CultureInfo("en-US", false).TextInfo; foreach (var pdfFile in pdfFiles) { var pdfData = ContentRetreivalUtils.GetExtractedInfo(pdfFile); var pdfFileName = Path.GetFileName(pdfFile); var sugFileName = pdfFileName; bool isValid = pdfData != null && !pdfData.IsTitleInValid(); if (isValid) { pdfData.CleanContent(); string title = "[NULL]"; if (!String.IsNullOrEmpty(pdfData.Title)) { title = pdfData.Title; } string auths = "[NULL]"; if (pdfData.AuthorNames != null && pdfData.AuthorNames.Count > 0) { auths = pdfData.AuthorsToString; } string pub = "[NULL]"; if (!String.IsNullOrEmpty(pdfData.PubName)) { pub = pdfData.PubName; } string year = "[NULL]"; if (pdfData.Year > 0) { year = pdfData.Year.ToString(CultureInfo.InvariantCulture); } string newName = String.Format(pattern, title, auths, pub, year); newName = Regex.Replace(newName, @"(\p{P})(\p{P}*\s*\[NULL\]\s*)+\p{P}", "$1"); newName = Regex.Replace(newName, @"\s*\p{P}+\s*\[NULL\]\s*$", ""); newName = Regex.Replace(newName, @"^\s*\[NULL\]\s*\p{P}+\s*", ""); newName = newName.Replace("[NULL]", ""); newName = newName.Trim() + ".pdf"; newName = PostProcessFileName(newName); sugFileName = newName; } var item = new PdfItemInfo(pdfFileName, sugFileName, isValid && (pdfFileName != sugFileName)); item.IsValid = isValid; if (!isValid) { item.Message = "Could not extract useful information from PDF content!"; } else if (isValid && (pdfFileName == sugFileName)) { item.Message = "The suggested name and the original name are already the same!"; } Add(item); } }