Ejemplo n.º 1
0
        private void BtnExtractClick(object sender, RoutedEventArgs e)
        {
            string pdfFilePath = tbPdfPath.Text;

            if (!File.Exists(pdfFilePath))
            {
                MessageBox.Show("PDF file does not exist!");
                return;
            }

            var pdfData = ContentRetreivalUtils.GetExtractedInfo(pdfFilePath);

            tbExtractedInfo.Text = pdfData == null ? "null" : pdfData.ToString();

            tbLayoutView.Text = PdfReadingUtils.ReadLayoutedView(pdfFilePath);
            //tbRawContent.Text = PdfReadingUtils.ReadRawContent(pdfFilePath);
            tbLayoutContent.Text = PdfReadingUtils.ReadVisibleLayoutContent(pdfFilePath);
            //tbHtmlContent.Text = PdfReadingUtils.ReadHtmlMetaContent(pdfFilePath);
            //try
            //{
            //    browser.NavigateToString(tbHtmlContent.Text);
            //}
            //catch
            //{
            //}
            //tbPdfInfo.Text = PdfReadingUtils.ReadPdfInfo(pdfFilePath);
        }
Ejemplo n.º 2
0
        public static PdfExtractedData GetExtractedInfo(string pdfFilePath)
        {
            string firstPageContent = PdfReadingUtils.ReadPdfContent(pdfFilePath, 1);
            var    firstPartData    = BuildFirstPartPdfData(firstPageContent);

            PdfExtractedData pdfData = null;

            pdfData = ExtractFromUnknownPdf(firstPartData);

            if (pdfData == null || pdfData.IsTitleInValid())
            {
                string allFileContent = PdfReadingUtils.ReadPdfContent(pdfFilePath);
                var    dois           = FindDoi(allFileContent);
                if (dois != null && dois.Count > 0)
                {
                    pdfData = new PdfExtractedData {
                        Dois = dois
                    };
                }
            }

            if (pdfData != null) // || String.IsNullOrWhiteSpace(pdfData.Title))
            {
                string baseFileName = System.IO.Path.GetFileNameWithoutExtension(pdfFilePath);
                int    fileNameYear;
                string fileNamePub;
                if (ExtractInfoFromFileName(baseFileName, out fileNameYear, out fileNamePub))
                {
                    if (pdfData.Year < 0)
                    {
                        pdfData.Year = fileNameYear;
                    }
                    if (String.IsNullOrWhiteSpace(pdfData.PubName))
                    {
                        pdfData.PubName = fileNamePub;
                    }
                }

                if (pdfData.IsTitleInValid() && pdfData.Dois == null)
                {
                    return(null);
                }
                else
                {
                    return(pdfData);
                }
            }

            return(null);
        }