public static PdfExtractedData GetExtractedInfo(string pdfFilePath) { string firstPageContent = PdfReadingUtils.ReadPdfContent(pdfFilePath, 1); var firstPartData = BuildFirstPartPdfData(firstPageContent); PdfExtractedData pdfData = null; pdfData = ExtractFromUnknownPdf(firstPartData); if (pdfData == null || pdfData.IsTitleInValid()) { string allFileContent = PdfReadingUtils.ReadPdfContent(pdfFilePath); var dois = FindDoi(allFileContent); if (dois != null && dois.Count > 0) { pdfData = new PdfExtractedData { Dois = dois }; } } if (pdfData != null) // || String.IsNullOrWhiteSpace(pdfData.Title)) { string baseFileName = System.IO.Path.GetFileNameWithoutExtension(pdfFilePath); int fileNameYear; string fileNamePub; if (ExtractInfoFromFileName(baseFileName, out fileNameYear, out fileNamePub)) { if (pdfData.Year < 0) { pdfData.Year = fileNameYear; } if (String.IsNullOrWhiteSpace(pdfData.PubName)) { pdfData.PubName = fileNamePub; } } if (pdfData.IsTitleInValid() && pdfData.Dois == null) { return(null); } else { return(pdfData); } } return(null); }
public static PdfExtractedData GetExtractedInfo(string pdfFilePath) { string firstPageContent = PdfReadingUtils.ReadPdfContent(pdfFilePath, 1); var firstPartData = BuildFirstPartPdfData(firstPageContent); PdfExtractedData pdfData = null; pdfData = ExtractFromUnknownPdf(firstPartData); if (pdfData == null || pdfData.IsTitleInValid()) { string allFileContent = PdfReadingUtils.ReadPdfContent(pdfFilePath); var dois = FindDoi(allFileContent); if (dois != null && dois.Count > 0) { pdfData = new PdfExtractedData { Dois = dois }; } } if (pdfData != null) // || String.IsNullOrWhiteSpace(pdfData.Title)) { string baseFileName = System.IO.Path.GetFileNameWithoutExtension(pdfFilePath); int fileNameYear; string fileNamePub; if(ExtractInfoFromFileName(baseFileName, out fileNameYear, out fileNamePub)) { if (pdfData.Year < 0) pdfData.Year = fileNameYear; if (String.IsNullOrWhiteSpace(pdfData.PubName)) pdfData.PubName = fileNamePub; } if (pdfData.IsTitleInValid() && pdfData.Dois == null) { return null; } else { return pdfData; } } return null; }