Example #1
0
        public static PdfExtractedData GetExtractedInfo(string pdfFilePath)
        {
            string firstPageContent = PdfReadingUtils.ReadPdfContent(pdfFilePath, 1);
            var    firstPartData    = BuildFirstPartPdfData(firstPageContent);

            PdfExtractedData pdfData = null;

            pdfData = ExtractFromUnknownPdf(firstPartData);

            if (pdfData == null || pdfData.IsTitleInValid())
            {
                string allFileContent = PdfReadingUtils.ReadPdfContent(pdfFilePath);
                var    dois           = FindDoi(allFileContent);
                if (dois != null && dois.Count > 0)
                {
                    pdfData = new PdfExtractedData {
                        Dois = dois
                    };
                }
            }

            if (pdfData != null) // || String.IsNullOrWhiteSpace(pdfData.Title))
            {
                string baseFileName = System.IO.Path.GetFileNameWithoutExtension(pdfFilePath);
                int    fileNameYear;
                string fileNamePub;
                if (ExtractInfoFromFileName(baseFileName, out fileNameYear, out fileNamePub))
                {
                    if (pdfData.Year < 0)
                    {
                        pdfData.Year = fileNameYear;
                    }
                    if (String.IsNullOrWhiteSpace(pdfData.PubName))
                    {
                        pdfData.PubName = fileNamePub;
                    }
                }

                if (pdfData.IsTitleInValid() && pdfData.Dois == null)
                {
                    return(null);
                }
                else
                {
                    return(pdfData);
                }
            }

            return(null);
        }
        public static PdfExtractedData GetExtractedInfo(string pdfFilePath)
        {
            string firstPageContent = PdfReadingUtils.ReadPdfContent(pdfFilePath, 1);
            var firstPartData = BuildFirstPartPdfData(firstPageContent);

            PdfExtractedData pdfData = null;
            pdfData = ExtractFromUnknownPdf(firstPartData);

            if (pdfData == null || pdfData.IsTitleInValid())
            {
                string allFileContent = PdfReadingUtils.ReadPdfContent(pdfFilePath);
                var dois = FindDoi(allFileContent);
                if (dois != null && dois.Count > 0)
                {
                    pdfData = new PdfExtractedData { Dois = dois };
                }
            }

            if (pdfData != null) // || String.IsNullOrWhiteSpace(pdfData.Title))
            {
                string baseFileName = System.IO.Path.GetFileNameWithoutExtension(pdfFilePath);
                int fileNameYear;
                string fileNamePub;
                if(ExtractInfoFromFileName(baseFileName, out fileNameYear, out fileNamePub))
                {
                    if (pdfData.Year < 0)
                        pdfData.Year = fileNameYear;
                    if (String.IsNullOrWhiteSpace(pdfData.PubName))
                        pdfData.PubName = fileNamePub;
                }

                if (pdfData.IsTitleInValid() && pdfData.Dois == null)
                {
                    return null;
                }
                else
                {
                    return pdfData;
                }
            }

            return null;
        }