public static void Run() { log.Info("Begin ACS4 ingestion process..."); int numFilesDropped = 0; int numFilesIngested = 0; ACS4ApiResponse builtInCatalogRaw = null; Dictionary <string, ACS4CatalogResourceMapItem> mapBuiltInCatalog = null; IEnumerable <ACS4Catalog> builtInCatalog = null; ACS4ApiResponse eCtbCatalogRaw = null; Dictionary <string, ACS4CatalogResourceMapItem> eCtbCatalogMap = null; IEnumerable <ACS4Catalog> eCtbCatalog = null; List <HCPFileInfo> fetchedFiles = null; List <List <HCPACS4.FileIO.HCPFileInfo> > allDroppedFiles = new List <List <HCPACS4.FileIO.HCPFileInfo> >(); var dbRunLog = new ACS4Ingest.RunLog(); dbRunLog.StartTimeUtc = System.DateTime.UtcNow; dbRunLog.BatchSize = HCPACS4.AdobeConfig.BATCH_SIZE; dbRunLog.Insert(); log.Info(String.Format("RunLogId {0}", dbRunLog.Id)); // check for any files on drop folders var dropFolders = ACS4Ingest.ConfigFtpDropFolder.Query("SELECT * FROM ConfigFtpDropFolder WHERE Enabled = 1"); foreach (var dropFolder in dropFolders) { try { log.Info(String.Format("List dropped epubs for {0} from {1}{2}", dropFolder.Alias, dropFolder.FtpHost, dropFolder.FtpRemotePath)); var ftpDrop = new DropFolder(dropFolder.Id, dropFolder.Alias, dropFolder.FtpHost, dropFolder.FtpUser, dropFolder.FtpPassword, dropFolder.FtpRemotePath, dropFolder.Enabled); var droppedFiles = ftpDrop.ListFiles(".epub"); var dbFileStats = new ACS4Ingest.DropFolderStat(); dbFileStats.RunLogId = dbRunLog.Id; dbFileStats.DropFolderId = dropFolder.Id; dbFileStats.EpubCount = droppedFiles.Count; dbFileStats.Insert(); numFilesDropped += droppedFiles.Count; log.Info(String.Format("{0} epubs found at {1}{2}", droppedFiles.Count, dropFolder.FtpHost, dropFolder.FtpRemotePath)); } catch (Exception ex) { log.Error(String.Format("Exception listing files for {0} from {1}{2}", dropFolder.Alias, dropFolder.FtpHost, dropFolder.FtpRemotePath), ex); } } dbRunLog.NumFilesDropped = numFilesDropped; dbRunLog.Update(); if (numFilesDropped == 0) { log.Info(String.Format("No epubs to process, exiting job.")); return; } //TODO: Currently batch size is applied to each folder individually // Should apply batch size to the total # of files from all drops /** Get ACS4 catalogs **/ try { // Get ACS4 Built In Distributor catalog to know if an asset already exists in ACS4 system log.Info(String.Format("Fetching ACS4 Built In Distributor catalog...")); builtInCatalogRaw = eCTB.FetchCatalog(HCPACS4.AdobeConfig.ACS_ECTB_BUILTIN_DISTRIBUTORID, HCPACS4.AdobeConfig.ACS_ECTB_BUILTIN_SECRET_KEY); if (!builtInCatalogRaw.IsError()) { builtInCatalog = HCPUtils.ParseACS4Catalog(builtInCatalogRaw.ResponseBody); mapBuiltInCatalog = HCPUtils.MapIsbnToACS4Resource(builtInCatalog); log.Info(String.Format("ACS4 Built In Distributor fetched successfully with {0} records", builtInCatalog.Count())); } else { log.Error(String.Format("Error Fetching ACS4 Built In Distributor : {0} - {1}", builtInCatalogRaw.ErrorCode, builtInCatalogRaw.ErrorMessage)); } // Get eCTB catalog - to know if epub has already been assigned distribution rights within eCTB distributor log.Info(String.Format("Fetching eCtb Distributor catalog...")); eCtbCatalogRaw = eCTB.FetchCatalog(HCPACS4.AdobeConfig.ACS_ECTB_DISTRIBUTORID, HCPACS4.AdobeConfig.ACS_ECTB_SECRET_KEY); if (!eCtbCatalogRaw.IsError()) { eCtbCatalog = HCPUtils.ParseACS4Catalog(eCtbCatalogRaw.ResponseBody); eCtbCatalogMap = HCPUtils.MapIsbnToACS4Resource(eCtbCatalog); log.Info(String.Format("eCtb Distributor fetched successfully with {0} records", eCtbCatalog.Count())); } else { log.Error(String.Format("Error Fetching eCtb Distributor : {0} - {1}", eCtbCatalogRaw.ErrorCode, eCtbCatalogRaw.ErrorMessage)); } dbRunLog.InitialCountBuiltInCatalog = builtInCatalog.Count(); dbRunLog.InitialCounteCtbCatalog = eCtbCatalog.Count(); dbRunLog.Update(); } catch (Exception ex) { log.Error("Exception fetching ACS4 catalog(s)", ex); } /** Get the files and begin processing **/ if (!builtInCatalogRaw.IsError() && !eCtbCatalogRaw.IsError() && numFilesDropped > 0) { // Download the files from FTP //List<HCPFileInfo> fetchedFiles = new List<HCPFileInfo>(); fetchedFiles = new List <HCPFileInfo>(); foreach (var dropFolder in dropFolders) { log.Info(String.Format("Downloading files from {0}{1} ...", dropFolder.FtpHost, dropFolder.FtpRemotePath)); string savePath = String.Format(@"{0}\{1}", HCPACS4.AdobeConfig.LOCAL_STAGING_PATH.TrimEnd('\\'), dropFolder.Alias); var ftpDrop = new DropFolder(dropFolder.Id, dropFolder.Alias, dropFolder.FtpHost, dropFolder.FtpUser, dropFolder.FtpPassword, dropFolder.FtpRemotePath, dropFolder.Enabled); var files = ftpDrop.DownloadFiles(".epub", savePath, HCPACS4.AdobeConfig.BATCH_SIZE, HCPACS4.AdobeConfig.DELETE_FROM_FTP); log.Info(String.Format("Downloaded {0} files from {1}{2} ...", files.Count, dropFolder.FtpHost, dropFolder.FtpRemotePath)); foreach (var f in files) { f.DropFolderAlias = dropFolder.Alias; f.DropFolderId = dropFolder.Id; } fetchedFiles.AddRange(files); } // load / analyze / package epub files foreach (var f in fetchedFiles) { bool existsBuiltInCatalog = false; bool existsEctbDistributionRights = false; bool galleyAttemptingToOverwriteNonGalley = false; Epub epub = new Epub(f.FullName, f.DropFolderAlias); var dbLog = new ACS4Ingest.IngestionLog(); dbLog.RunLogId = dbRunLog.Id; dbLog.EpubFileName = f.FileName; dbLog.Isbn = epub.Isbn; dbLog.FileSize = epub.FileSizeBytes; dbLog.ArchivePath = f.FileArchivePath; dbLog.DropFolderId = f.DropFolderId; dbLog.IsbnFromFileName = epub.IsbnFromFileName; dbLog.IsbnFromOpf = epub.IsbnFromOpf; dbLog.OpfIdentifier = epub.OpfIdentifier; dbLog.IsCorrupt = epub.CorruptFile; dbLog.EnteredDateUtc = System.DateTime.UtcNow; dbLog.Insert(); /** PreFlight Checks **/ /** TODO: * Needs some refactoring: * move to function * log to IngestionErrors table accomodate 1 to many to multiple to table **/ if (epub.CorruptFile) { log.Info(String.Format("FailedPreFlightCheck {0} : {1}", "CorruptFile", epub.FullName)); dbLog.Ingested = false; dbLog.IsValidPreCheck = false; dbLog.PreCheckError = "CorruptFile"; dbLog.Update(); continue; } if (epub.IsbnFromFileName != epub.IsbnFromOpf) { log.Info(String.Format("FailedPreFlightCheck {0} : {1}", "IsbnMismatchFileNameOpf", epub.FullName)); dbLog.IsbnFromOpf = epub.IsbnFromOpf; dbLog.Ingested = false; dbLog.IsValidPreCheck = false; dbLog.PreCheckError = "IsbnMismatchFileNameOpf"; dbLog.Update(); continue; } if (epub.InvalidDocType) { log.Info(String.Format("FailedPreFlightCheck {0} : {1}", "InvalidDocType", epub.FullName)); dbLog.Ingested = false; dbLog.IsValidPreCheck = false; dbLog.PreCheckError = "InvalidDocType"; dbLog.Update(); continue; } // TODO: Zero in on this size restriction - 50 MB for now if (epub.FileSizeBytes > HCPACS4.AdobeConfig.MAX_FILE_SIZE) { log.Info(String.Format("FailedPreFlightCheck {0} : {1}", "FileTooLarge", epub.FullName)); dbLog.Ingested = false; dbLog.IsValidPreCheck = false; dbLog.PreCheckError = "FileTooLarge"; dbLog.Update(); continue; } // lookup to see if isbn exists in Built In Catalog // ACS4 does not work on isbn13, it will happily ingest the same isbn several times ACS4CatalogResourceMapItem mappedBuiltInCatalog; //TODO: using Isbn //if (mapBuiltInCatalog.TryGetValue(epub.IsbnFromOpf, out mappedBuiltInCatalog)) if (mapBuiltInCatalog.TryGetValue(epub.Isbn, out mappedBuiltInCatalog)) { epub.ACS4Guid = mappedBuiltInCatalog.Guid; existsBuiltInCatalog = true; if (epub.IsGalley && !mappedBuiltInCatalog.IsGalley) { galleyAttemptingToOverwriteNonGalley = true; } } // See if book is already assigned to eCTB Distributor if (eCtbCatalog.Any(i => i.resource == epub.ACS4Guid)) { existsEctbDistributionRights = true; } dbLog.IsValidPreCheck = true; dbLog.IngestType = existsBuiltInCatalog ? "update" : "add"; /* Abort ingest if this file is a galley and existing is a non Galley */ if (galleyAttemptingToOverwriteNonGalley) { log.Info(String.Format("IngestAborted {0} : {1}", "GalleyCannotOverwriteNonGalley", epub.FullName)); dbLog.Ingested = false; dbLog.IsValidPreCheck = false; dbLog.PreCheckError = "GalleyCannotOverwriteNonGalley"; dbLog.Update(); continue; } PackageRequest package = new PackageRequest(epub); PackageRequestResponse packageResponse = package.SendRequest(); log.Info(String.Format("{0} : {1} : {2}", (bool)packageResponse.Ingested ? "INGESTED" : "NOT INGESTED", epub.FullName, packageResponse.Ingested ? null : String.Format("{0} : {1}", packageResponse.ErrorCode, packageResponse.ErrorMessage))); // assign to eCTB distributor if does not already exists ACS4ApiResponse rightsReqResponse = null; if ((bool)packageResponse.Ingested) { numFilesIngested++; if (!existsEctbDistributionRights) { // assign the book to the eCTB distributor ManageDistributionRights rightsReq = new ManageDistributionRights(HCPACS4.AdobeConfig.ACS_ECTB_DISTRIBUTORID, packageResponse.ACS4Guid); rightsReqResponse = rightsReq.AssignRights(); rightsReq = null; } } dbLog.PackageHttpResponseCode = (int)packageResponse.HttpStatusCode; dbLog.Ingested = (bool)packageResponse.Ingested; dbLog.ACS4Guid = packageResponse.ACS4Guid; dbLog.FileSize = epub.FileSizeBytes; dbLog.PackageResponse = packageResponse.ResponseBody; dbLog.PackageErrorCode = packageResponse.IsError() ? packageResponse.ErrorCode : null; //TODO: Ensure we truncate to 256 dbLog.PackageErrorMessage = packageResponse.IsError() ? String.IsNullOrEmpty(packageResponse.ErrorMessage) ? null : packageResponse.ErrorMessage.Substring(0, 256) : null; dbLog.PackageErrorMessage = packageResponse.IsError() ? packageResponse.ErrorMessage : null; if (rightsReqResponse != null) { dbLog.DistributionRightsAssigned = !rightsReqResponse.IsError(); dbLog.DistributionRightsErrorCode = rightsReqResponse.IsError() ? rightsReqResponse.ErrorCode : null; //TODO: Ensure we truncate to 256 dbLog.DistributionRightsErrorMessage = rightsReqResponse.IsError() ? String.IsNullOrEmpty(rightsReqResponse.ErrorMessage) ? rightsReqResponse.ErrorMessage.Substring(0, 256) : null : null; dbLog.DistributionRightsErrorMessage = rightsReqResponse.IsError() ? rightsReqResponse.ErrorMessage : null; } dbLog.EnteredDateUtc = System.DateTime.UtcNow; //dbLog.Insert(); dbLog.Update(); dbLog = null; package = null; packageResponse = null; rightsReqResponse = null; } } else { log.Info(String.Format("Nothing to do, exiting job.")); } dbRunLog.NumFilesIngested = numFilesIngested; dbRunLog.EndTimeUtc = System.DateTime.UtcNow; dbRunLog.Update(); log.Info(String.Format("Total files ingested {0}", numFilesIngested)); log.Info("End ACS4 ingestion process"); Cleanup(fetchedFiles); }
private void AnalyzeEpub() { this.FileSizeBytes = new FileInfo(this.FullName).Length; this.IsbnFromFileName = HCPUtils.GetIsbn13(this.FullName); this.Isbn = this.IsbnFromFileName; try { using (ZipArchive archive = ZipFile.OpenRead(this.FullName)) { foreach (ZipArchiveEntry entry in archive.Entries) { // check for invalid DOCTYPE in any .xhtml file if (entry.FullName.EndsWith(".xhtml") && !this.InvalidDocType) { using (var stream = entry.Open()) using (var reader = new StreamReader(stream)) { string xhtml = reader.ReadToEnd(); if (xhtml.Contains("about:legacy-compat")) { this.InvalidDocType = true; } } } // check .opf - primarily for isbn if (entry.FullName.EndsWith(".opf")) { this.OpfFileName = entry.FullName; this.HasOpf = true; // get isbn from opf using (var stream = entry.Open()) { XDocument xmlOpf = XDocument.Load(stream); XNamespace dc = "http://purl.org/dc/elements/1.1/"; XNamespace opf = "http://www.idpf.org/2007/opf"; this.OpfIdentifier = xmlOpf.Descendants(opf + "metadata").Elements(dc + "identifier").FirstOrDefault().Value; if (!String.IsNullOrEmpty(this.OpfIdentifier)) { // sometimes isbns are formatted with dashes string tmpIsbn = this.OpfIdentifier.Replace("-", ""); this.IsbnFromOpf = HCPUtils.GetIsbn13(tmpIsbn); } } } } } } catch (System.IO.InvalidDataException exIO) { this.CorruptFile = true; log.Error(String.Format("IO Exception analyzing Epub {0} - {1}", this.FullName, exIO.StackTrace)); } catch (Exception ex) { log.Error(String.Format("Exception analyzing Epub {0} - {1}", this.FullName, ex.StackTrace)); } }