Exemple #1
0
        private void BtnExtractClick(object sender, RoutedEventArgs e)
        {
            string pdfFilePath = tbPdfPath.Text;

            if (!File.Exists(pdfFilePath))
            {
                MessageBox.Show("PDF file does not exist!");
                return;
            }

            var pdfData = ContentRetreivalUtils.GetExtractedInfo(pdfFilePath);

            tbExtractedInfo.Text = pdfData == null ? "null" : pdfData.ToString();

            tbLayoutView.Text = PdfReadingUtils.ReadLayoutedView(pdfFilePath);
            //tbRawContent.Text = PdfReadingUtils.ReadRawContent(pdfFilePath);
            tbLayoutContent.Text = PdfReadingUtils.ReadVisibleLayoutContent(pdfFilePath);
            //tbHtmlContent.Text = PdfReadingUtils.ReadHtmlMetaContent(pdfFilePath);
            //try
            //{
            //    browser.NavigateToString(tbHtmlContent.Text);
            //}
            //catch
            //{
            //}
            //tbPdfInfo.Text = PdfReadingUtils.ReadPdfInfo(pdfFilePath);
        }
        public void ReloadFolderContents()
        {
            string folderName = DirectoryPath;

            if (String.IsNullOrEmpty(folderName) || !Directory.Exists(folderName))
            {
                throw new IOException(String.Format("No such direcotry exists: '{0}'", folderName));
            }

            Clear();

            string pattern = ReplacementPattern;

            pattern = Regex.Replace(pattern, @"\$\(\s*title\s*\)", "{0}", RegexOptions.IgnoreCase);
            pattern = Regex.Replace(pattern, @"\$\(\s*author\s*\)", "{1}", RegexOptions.IgnoreCase);
            pattern = Regex.Replace(pattern, @"\$\(\s*publisher\s*\)", "{2}", RegexOptions.IgnoreCase);
            pattern = Regex.Replace(pattern, @"\$\(\s*year\s*\)", "{3}", RegexOptions.IgnoreCase);

            var pdfFiles = Directory.GetFiles(folderName, "*.pdf");

            //var textInfo = new CultureInfo("en-US", false).TextInfo;

            foreach (var pdfFile in pdfFiles)
            {
                var  pdfData     = ContentRetreivalUtils.GetExtractedInfo(pdfFile);
                var  pdfFileName = Path.GetFileName(pdfFile);
                var  sugFileName = pdfFileName;
                bool isValid     = pdfData != null && !pdfData.IsTitleInValid();

                if (isValid)
                {
                    pdfData.CleanContent();

                    string title = "[NULL]";

                    if (!String.IsNullOrEmpty(pdfData.Title))
                    {
                        title = pdfData.Title;
                    }

                    string auths = "[NULL]";
                    if (pdfData.AuthorNames != null && pdfData.AuthorNames.Count > 0)
                    {
                        auths = pdfData.AuthorsToString;
                    }

                    string pub = "[NULL]";
                    if (!String.IsNullOrEmpty(pdfData.PubName))
                    {
                        pub = pdfData.PubName;
                    }

                    string year = "[NULL]";
                    if (pdfData.Year > 0)
                    {
                        year = pdfData.Year.ToString(CultureInfo.InvariantCulture);
                    }

                    string newName = String.Format(pattern, title, auths, pub, year);
                    newName = Regex.Replace(newName, @"(\p{P})(\p{P}*\s*\[NULL\]\s*)+\p{P}", "$1");
                    newName = Regex.Replace(newName, @"\s*\p{P}+\s*\[NULL\]\s*$", "");
                    newName = Regex.Replace(newName, @"^\s*\[NULL\]\s*\p{P}+\s*", "");

                    newName     = newName.Replace("[NULL]", "");
                    newName     = newName.Trim() + ".pdf";
                    newName     = PostProcessFileName(newName);
                    sugFileName = newName;
                }

                var item = new PdfItemInfo(pdfFileName, sugFileName, isValid && (pdfFileName != sugFileName));
                item.IsValid = isValid;

                if (!isValid)
                {
                    item.Message = "Could not extract useful information from PDF content!";
                }
                else if (isValid && (pdfFileName == sugFileName))
                {
                    item.Message = "The suggested name and the original name are already the same!";
                }

                Add(item);
            }
        }