/// <summary> Saves all of the digital resource information to the database </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Determine total size on the disk string[] all_files_final = Directory.GetFiles(Resource.Resource_Folder); double size = all_files_final.Sum(ThisFile => (double)(((new FileInfo(ThisFile)).Length) / 1024)); Resource.DiskSpaceMb = size; // Also, set the TextSearchable flag correctly string[] text_files = Directory.GetFiles(Resource.Resource_Folder, "*.txt"); bool page_image_text_found = false; foreach (string thisFile in text_files) { // Is this text from a PAGE IMAGE (jpeg or jp2) file? string filename_sans_extension = Path.GetFileNameWithoutExtension(thisFile); string possible_jpeg = Path.Combine(Resource.Resource_Folder, filename_sans_extension + ".jpg"); string possible_jp2 = Path.Combine(Resource.Resource_Folder, filename_sans_extension + ".jpg"); if ((File.Exists(possible_jp2)) || (File.Exists(possible_jpeg))) { page_image_text_found = true; break; } } Resource.Metadata.Behaviors.Text_Searchable = page_image_text_found; // Save this package to the database if (!Resource.Save_to_Database(Resource.NewPackage)) { OnError("Error saving data to SobekCM database. The database may not reflect the most recent data in the METS.", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return true; } return true; }
private void Rename_Any_Received_METS_File(Incoming_Digital_Resource ResourcePackage) { string recd_filename = "recd_" + DateTime.Now.Year + "_" + DateTime.Now.Month.ToString().PadLeft(2, '0') + "_" + DateTime.Now.Day.ToString().PadLeft(2, '0') + ".mets.bak"; // If a renamed file already exists for this year, delete the incoming with that name (shouldn't exist) if (File.Exists(ResourcePackage.Resource_Folder + "\\" + recd_filename)) { File.Delete(ResourcePackage.Resource_Folder + "\\" + recd_filename); } if (File.Exists(ResourcePackage.Resource_Folder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + ".mets")) { File.Move(ResourcePackage.Resource_Folder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + ".mets", ResourcePackage.Resource_Folder + "\\" + recd_filename); ResourcePackage.METS_File = recd_filename; return; } if (File.Exists(ResourcePackage.Resource_Folder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + ".mets.xml")) { File.Move(ResourcePackage.Resource_Folder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + ".mets.xml", ResourcePackage.Resource_Folder + "\\" + recd_filename); ResourcePackage.METS_File = recd_filename; return; } if (File.Exists(ResourcePackage.Resource_Folder + "\\" + ResourcePackage.BibID + ".mets")) { File.Move(ResourcePackage.Resource_Folder + "\\" + ResourcePackage.BibID + ".mets", ResourcePackage.Resource_Folder + "\\" + recd_filename); ResourcePackage.METS_File = recd_filename; return; } if (File.Exists(ResourcePackage.Resource_Folder + "\\" + ResourcePackage.BibID + ".mets.xml")) { File.Move(ResourcePackage.Resource_Folder + "\\" + ResourcePackage.BibID + ".mets.xml", ResourcePackage.Resource_Folder + "\\" + recd_filename); ResourcePackage.METS_File = recd_filename; } }
/// <summary> Extracts a thumbnail image from a PDF file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Get the executable path/file for ghostscript and imagemagick string ghostscript_executable = MultiInstance_Builder_Settings.Ghostscript_Executable; string imagemagick_executable = MultiInstance_Builder_Settings.ImageMagick_Executable; // Preprocess each PDF string[] pdfs = Directory.GetFiles(resourceFolder, "*.pdf"); foreach (string thisPdf in pdfs) { // Get the fileinfo and the name FileInfo thisPdfInfo = new FileInfo(thisPdf); string fileName = thisPdfInfo.Name.Replace(thisPdfInfo.Extension, ""); // Does the thumbnail exist for this item? if (( !String.IsNullOrEmpty(ghostscript_executable)) && (!String.IsNullOrEmpty(imagemagick_executable))) { if (!File.Exists(resourceFolder + "\\" + fileName + "thm.jpg")) { PDF_Tools.Create_Thumbnail(resourceFolder, thisPdf, resourceFolder + "\\" + fileName + "thm.jpg", ghostscript_executable, imagemagick_executable); } } } return true; }
/// <summary> Attempts to clean dirty OCR files that may somehow /// contain unprintable characters and other flaws </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Clean any incoming text files first try { // Get the list of all text files here string[] text_files = Directory.GetFiles(resourceFolder, "*.txt"); if (text_files.Length > 0) { // Step through each text file foreach (string textFile in text_files) { // Clean the text file first Text_Cleaner.Clean_Text_File(textFile); } } return(true); } catch { } return(true); }
/// <summary> Updates the basic dimensional information stored for all of the JPEG files /// within the service METS file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Now, just look for the data being present in each file if (Directory.Exists(Resource.Resource_Folder)) { // Now, step through each file foreach (SobekCM_File_Info thisFile in Resource.Metadata.Divisions.Files) { // Does this exist? string file_in_resource_folder = Path.Combine(Resource.Resource_Folder, thisFile.System_Name); if (!File.Exists(file_in_resource_folder)) { continue; } // Is this a jpeg? if (thisFile.System_Name.ToUpper().IndexOf(".JPG") > 0) { if (thisFile.System_Name.ToUpper().IndexOf("THM.JPG") < 0) { // JPEG attributes are ALWAYS re-calculated Compute_Jpeg_Attributes(thisFile, Resource.Resource_Folder); } } // Is this a jpeg2000? if (thisFile.System_Name.ToUpper().IndexOf("JP2") > 0) { Compute_Jpeg2000_Attributes(thisFile, Resource.Resource_Folder); } } } return(true); }
/// <summary> Updates the basic dimensional information stored for all of the JPEG files /// within the service METS file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Now, just look for the data being present in each file if (Directory.Exists(Resource.Resource_Folder)) { // Now, step through each file foreach (SobekCM_File_Info thisFile in Resource.Metadata.Divisions.Files) { // Does this exist? string file_in_resource_folder = Path.Combine(Resource.Resource_Folder, thisFile.System_Name); if (!File.Exists(file_in_resource_folder)) continue; // Is this a jpeg? if (thisFile.System_Name.ToUpper().IndexOf(".JPG") > 0) { if (thisFile.System_Name.ToUpper().IndexOf("THM.JPG") < 0) { // JPEG attributes are ALWAYS re-calculated Compute_Jpeg_Attributes(thisFile, Resource.Resource_Folder); } } // Is this a jpeg2000? if (thisFile.System_Name.ToUpper().IndexOf("JP2") > 0) { Compute_Jpeg2000_Attributes(thisFile, Resource.Resource_Folder); } } } return true; }
/// <summary> Performs OCR on the incoming TIFF files to create indexable text </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Run OCR for any TIFF files that do not have any corresponding TXT files if (Settings.Builder.OCR_Command_Prompt.Length > 0) { string[] ocr_tiff_files = Directory.GetFiles(resourceFolder, "*.tif"); foreach (string thisTiffFile in ocr_tiff_files) { FileInfo thisTiffFileInfo = new FileInfo(thisTiffFile); string text_file = resourceFolder + "\\" + thisTiffFileInfo.Name.Replace(thisTiffFileInfo.Extension, "") + ".txt"; if (!File.Exists(text_file)) { try { string command = String.Format(Settings.Builder.OCR_Command_Prompt, thisTiffFile, text_file); Process ocrProcess = new Process { StartInfo = { FileName = command } }; ocrProcess.Start(); ocrProcess.WaitForExit(); } catch { OnError("Error launching OCR on (" + thisTiffFileInfo.Name + ")", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } } } return true; }
/// <summary> </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { Rename_Any_Received_METS_File(Resource); // Determine if this is actually already IN the final image server spot first // Determine the file root for this Resource.File_Root = Resource.BibID.Substring(0, 2) + "\\" + Resource.BibID.Substring(2, 2) + "\\" + Resource.BibID.Substring(4, 2) + "\\" + Resource.BibID.Substring(6, 2) + "\\" + Resource.BibID.Substring(8, 2); // Determine the destination folder for this resource string serverPackageFolder = Settings.Servers.Image_Server_Network + Resource.File_Root + "\\" + Resource.VID; // If this is re-processing the resource in situ, then just return.. nothing to move if (NormalizePath(Resource.Resource_Folder) == NormalizePath(serverPackageFolder)) { return(true); } // Clear the list of new images files here, since moving the package will recalculate this Resource.NewImageFiles.Clear(); // Move all files to the image server if (!Move_All_Files_To_Image_Server(Resource, Resource.NewImageFiles, serverPackageFolder)) { OnError("Error moving some files to the image server for " + Resource.BibID + ":" + Resource.VID, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(false); } return(true); }
/// <summary> Attempts to clean dirty OCR files that may somehow /// contain unprintable characters and other flaws </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Clean any incoming text files first try { // Get the list of all text files here string[] text_files = Directory.GetFiles(resourceFolder, "*.txt"); if (text_files.Length > 0) { // Step through each text file foreach (string textFile in text_files) { // Clean the text file first Text_Cleaner.Clean_Text_File(textFile); } } return true; } catch { } return true; }
/// <summary> Extracts indexable (i.e, without the tags) text from a HTML file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Preprocess each XML file for the text string[] xml_files = Directory.GetFiles(resourceFolder, "*.xml"); foreach (string thisXml in xml_files) { // Get the fileinfo and the name FileInfo thisXmlInfo = new FileInfo(thisXml); // Just don't pull text for the static page string xml_upper = thisXmlInfo.Name.ToUpper(); if ((xml_upper.IndexOf(".METS") < 0) && (xml_upper != "DOC.XML") && (xml_upper != "CITATION_METS.XML") && (xml_upper != "MARC.XML")) { string text_fileName = thisXmlInfo.Name.Replace(".", "_") + ".txt"; // Does the full text exist for this item? if (!File.Exists(resourceFolder + "\\" + text_fileName)) { HTML_XML_Text_Extractor.Extract_Text(thisXml, resourceFolder + "\\" + text_fileName); } } } return(true); }
/// <summary> Extracts indexable (i.e, without the tags) text from a HTML file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; string bibID = Resource.BibID; string vid = Resource.VID; // Preprocess each HTML file for the text string[] html_files = Directory.GetFiles(resourceFolder, "*.htm*"); foreach (string thisHtml in html_files) { // Get the fileinfo and the name FileInfo thisHtmlInfo = new FileInfo(thisHtml); // Exclude QC_Error.html if (thisHtmlInfo.Name.ToUpper() != "QC_ERROR.HTML") { // Just don't pull text for the static page if (thisHtmlInfo.Name.ToUpper() != bibID.ToUpper() + "_" + vid.ToUpper() + ".HTML") { string text_fileName = thisHtmlInfo.Name.Replace(".", "_") + ".txt"; // Does the full text exist for this item? if (!File.Exists(resourceFolder + "\\" + text_fileName)) { HTML_XML_Text_Extractor.Extract_Text(thisHtml, resourceFolder + "\\" + text_fileName); } } } } return(true); }
/// <summary> Performs OCR on the incoming TIFF files to create indexable text </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Run OCR for any TIFF files that do not have any corresponding TXT files if (Settings.Builder.OCR_Command_Prompt.Length > 0) { string[] ocr_tiff_files = Directory.GetFiles(resourceFolder, "*.tif"); foreach (string thisTiffFile in ocr_tiff_files) { FileInfo thisTiffFileInfo = new FileInfo(thisTiffFile); string text_file = resourceFolder + "\\" + thisTiffFileInfo.Name.Replace(thisTiffFileInfo.Extension, "") + ".txt"; if (!File.Exists(text_file)) { try { string command = String.Format(Settings.Builder.OCR_Command_Prompt, thisTiffFile, text_file); Process ocrProcess = new Process { StartInfo = { FileName = command } }; ocrProcess.Start(); ocrProcess.WaitForExit(); } catch { OnError("Error launching OCR on (" + thisTiffFileInfo.Name + ")", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } } } return(true); }
/// <summary> Saves all of the digital resource information to solr/lucene </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Save this to the Solr/Lucene database if (!String.IsNullOrEmpty(Settings.Servers.Document_Solr_Index_URL)) { if (((!Resource.Metadata.Behaviors.IP_Restriction_Membership_Is_Null) && (Resource.Metadata.Behaviors.IP_Restriction_Membership < 0)) || ((!Resource.Metadata.Behaviors.Dark_Flag_Is_Null) && (Resource.Metadata.Behaviors.Dark_Flag))) { Solr_Controller.Delete_Resource_From_Index(Settings.Servers.Document_Solr_Index_URL, Settings.Servers.Page_Solr_Index_URL, Resource.BibID, Resource.VID); return(true); } try { Solr_Controller.Update_Index(Settings.Servers.Document_Solr_Index_URL, Settings.Servers.Page_Solr_Index_URL, Resource.Metadata, true); } catch (Exception ee) { OnError("Error saving data to the Solr/Lucene index. The index may not reflect the most recent data in the METS.", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); OnError("Solr Error: " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } return(true); }
/// <summary> Extracts a thumbnail image from a PDF file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Get the executable path/file for ghostscript and imagemagick string ghostscript_executable = MultiInstance_Builder_Settings.Ghostscript_Executable; string imagemagick_executable = MultiInstance_Builder_Settings.ImageMagick_Executable; // Preprocess each PDF string[] pdfs = Directory.GetFiles(resourceFolder, "*.pdf"); foreach (string thisPdf in pdfs) { // Get the fileinfo and the name FileInfo thisPdfInfo = new FileInfo(thisPdf); string fileName = thisPdfInfo.Name.Replace(thisPdfInfo.Extension, ""); // Does the thumbnail exist for this item? if ((!String.IsNullOrEmpty(ghostscript_executable)) && (!String.IsNullOrEmpty(imagemagick_executable))) { if (!File.Exists(resourceFolder + "\\" + fileName + "thm.jpg")) { PDF_Tools.Create_Thumbnail(resourceFolder, thisPdf, resourceFolder + "\\" + fileName + "thm.jpg", ghostscript_executable, imagemagick_executable); } } } return(true); }
/// <summary> Extracts indexable (i.e, without the tags) text from a HTML file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; string bibID = Resource.BibID; string vid = Resource.VID; // Preprocess each HTML file for the text string[] html_files = Directory.GetFiles(resourceFolder, "*.htm*"); foreach (string thisHtml in html_files) { // Get the fileinfo and the name FileInfo thisHtmlInfo = new FileInfo(thisHtml); // Exclude QC_Error.html if (thisHtmlInfo.Name.ToUpper() != "QC_ERROR.HTML") { // Just don't pull text for the static page if (thisHtmlInfo.Name.ToUpper() != bibID.ToUpper() + "_" + vid.ToUpper() + ".HTML") { string text_fileName = thisHtmlInfo.Name.Replace(".", "_") + ".txt"; // Does the full text exist for this item? if (!File.Exists(resourceFolder + "\\" + text_fileName)) { HTML_XML_Text_Extractor.Extract_Text(thisHtml, resourceFolder + "\\" + text_fileName); } } } } return true; }
/// <summary> Saves all of the digital resource information to solr/lucene </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Save this to the Solr/Lucene database if ( !String.IsNullOrEmpty(Settings.Servers.Document_Solr_Index_URL)) { if (((!Resource.Metadata.Behaviors.IP_Restriction_Membership_Is_Null) && (Resource.Metadata.Behaviors.IP_Restriction_Membership < 0)) || ((!Resource.Metadata.Behaviors.Dark_Flag_Is_Null) && (Resource.Metadata.Behaviors.Dark_Flag))) { Solr_Controller.Delete_Resource_From_Index(Settings.Servers.Document_Solr_Index_URL, Settings.Servers.Page_Solr_Index_URL, Resource.BibID, Resource.VID); return true; } try { Solr_Controller.Update_Index(Settings.Servers.Document_Solr_Index_URL, Settings.Servers.Page_Solr_Index_URL, Resource.Metadata, true); } catch (Exception ee) { OnError("Error saving data to the Solr/Lucene index. The index may not reflect the most recent data in the METS.", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); OnError("Solr Error: " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } return true; }
/// <summary> </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { Rename_Any_Received_METS_File(Resource); // Determine if this is actually already IN the final image server spot first // Determine the file root for this Resource.File_Root = Resource.BibID.Substring(0, 2) + "\\" + Resource.BibID.Substring(2, 2) + "\\" + Resource.BibID.Substring(4, 2) + "\\" + Resource.BibID.Substring(6, 2) + "\\" + Resource.BibID.Substring(8, 2); // Determine the destination folder for this resource string serverPackageFolder = Settings.Servers.Image_Server_Network + Resource.File_Root + "\\" + Resource.VID; // If this is re-processing the resource in situ, then just return.. nothing to move if (NormalizePath(Resource.Resource_Folder) == NormalizePath(serverPackageFolder)) return true; // Clear the list of new images files here, since moving the package will recalculate this Resource.NewImageFiles.Clear(); // Move all files to the image server if (!Move_All_Files_To_Image_Server(Resource, Resource.NewImageFiles, serverPackageFolder)) { OnError("Error moving some files to the image server for " + Resource.BibID + ":" + Resource.VID, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return false; } return true; }
/// <summary> Reloads the basic behavior information from the database into the /// digital resource, such as collections and thumbnails </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Load the METS file if (!Resource.Load_METS()) { OnError("Error reading most recent METS file from " + Resource.BibID + ":" + Resource.VID, Resource.BibID + ":" + Resource.VID, String.Empty, Resource.BuilderLogId); return(false); } // Add thumbnail, aggregation informaiton, and dark/access information from the database if (!Resource.NewPackage) { Engine_Database.Add_Minimum_Builder_Information(Resource.Metadata); } else { // Check for any access/restriction/embargo date in the RightsMD section RightsMD_Info rightsInfo = Resource.Metadata.Get_Metadata_Module(GlobalVar.PALMM_RIGHTSMD_METADATA_MODULE_KEY) as RightsMD_Info; if ((rightsInfo != null) && (rightsInfo.hasData)) { switch (rightsInfo.Access_Code) { case RightsMD_Info.AccessCode_Enum.Campus: // Was there an embargo date? if (rightsInfo.Has_Embargo_End) { if (DateTime.Compare(DateTime.Now, rightsInfo.Embargo_End) < 0) { Resource.Metadata.Behaviors.IP_Restriction_Membership = 1; } } else { Resource.Metadata.Behaviors.IP_Restriction_Membership = 1; } break; case RightsMD_Info.AccessCode_Enum.Private: // Was there an embargo date? if (rightsInfo.Has_Embargo_End) { if (DateTime.Compare(DateTime.Now, rightsInfo.Embargo_End) < 0) { Resource.Metadata.Behaviors.Dark_Flag = true; } } else { Resource.Metadata.Behaviors.Dark_Flag = true; } break; } } } return(true); }
/// <summary> Reloads the basic behavior information from the database into the /// digital resource, such as collections and thumbnails </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Load the METS file if (!Resource.Load_METS()) { OnError("Error reading most recent METS file from " + Resource.BibID + ":" + Resource.VID, Resource.BibID + ":" + Resource.VID, String.Empty, Resource.BuilderLogId); return false; } // Add thumbnail, aggregation informaiton, and dark/access information from the database if (!Resource.NewPackage) { SobekCM_Database.Add_Minimum_Builder_Information(Resource.Metadata); } else { // Check for any access/restriction/embargo date in the RightsMD section RightsMD_Info rightsInfo = Resource.Metadata.Get_Metadata_Module(GlobalVar.PALMM_RIGHTSMD_METADATA_MODULE_KEY) as RightsMD_Info; if ((rightsInfo != null) && (rightsInfo.hasData)) { switch (rightsInfo.Access_Code) { case RightsMD_Info.AccessCode_Enum.Campus: // Was there an embargo date? if (rightsInfo.Has_Embargo_End) { if (DateTime.Compare(DateTime.Now, rightsInfo.Embargo_End) < 0) { Resource.Metadata.Behaviors.IP_Restriction_Membership = 1; } } else { Resource.Metadata.Behaviors.IP_Restriction_Membership = 1; } break; case RightsMD_Info.AccessCode_Enum.Private: // Was there an embargo date? if (rightsInfo.Has_Embargo_End) { if (DateTime.Compare(DateTime.Now, rightsInfo.Embargo_End) < 0) { Resource.Metadata.Behaviors.Dark_Flag = true; } } else { Resource.Metadata.Behaviors.Dark_Flag = true; } break; } } } return true; }
/// <summary> Updates the item-level web.config file based on restriction information </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Delete any existing web.config file and write is as necessary try { string web_config = Resource.Resource_Folder + "\\web.config"; if (File.Exists(web_config)) { File.Delete(web_config); } if ((Resource.Metadata.Behaviors.Dark_Flag) || (Resource.Metadata.Behaviors.IP_Restriction_Membership > 0)) { StreamWriter writer = new StreamWriter(web_config, false); writer.WriteLine("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); writer.WriteLine("<configuration>"); writer.WriteLine(" <system.webServer>"); writer.WriteLine(" <security>"); writer.WriteLine(" <ipSecurity allowUnlisted=\"false\">"); writer.WriteLine(" <clear />"); writer.WriteLine(" <add ipAddress=\"127.0.0.1\" allowed=\"true\" />"); if (!String.IsNullOrEmpty(Settings.Servers.SobekCM_Web_Server_IP)) { writer.WriteLine(" <add ipAddress=\"" + Settings.Servers.SobekCM_Web_Server_IP.Trim() + "\" allowed=\"true\" />"); } writer.WriteLine(" </ipSecurity>"); writer.WriteLine(" </security>"); writer.WriteLine(" <modules runAllManagedModulesForAllRequests=\"true\" />"); writer.WriteLine(" </system.webServer>"); // Is there now a main thumbnail? if ((Resource.Metadata.Behaviors.Main_Thumbnail.Length > 0) && (Resource.Metadata.Behaviors.Main_Thumbnail.IndexOf("http:") < 0)) { writer.WriteLine(" <location path=\"" + Resource.Metadata.Behaviors.Main_Thumbnail + "\">"); writer.WriteLine(" <system.webServer>"); writer.WriteLine(" <security>"); writer.WriteLine(" <ipSecurity allowUnlisted=\"true\" />"); writer.WriteLine(" </security>"); writer.WriteLine(" </system.webServer>"); writer.WriteLine(" </location>"); } writer.WriteLine("</configuration>"); writer.Flush(); writer.Close(); } } catch (Exception) { OnError("Unable to update the resource web.config file", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(false); } return(true); }
private bool Move_All_Files_To_Image_Server(Incoming_Digital_Resource ResourcePackage, List <string> NewImageFiles, string ServerPackageFolder) { try { // Make sure a directory exists here if (!Directory.Exists(ServerPackageFolder)) { Directory.CreateDirectory(ServerPackageFolder); } else { // COpy any existing mets file to keep what the METS looked like before this change if (File.Exists(ServerPackageFolder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + ".mets.xml")) { File.Copy(ServerPackageFolder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + ".mets.xml", ServerPackageFolder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + "_" + DateTime.Now.Year + "_" + DateTime.Now.Month + "_" + DateTime.Now.Day + ".mets.bak", true); } } // Move all the files to the digital resource file server string[] all_files = Directory.GetFiles(ResourcePackage.Resource_Folder); foreach (string thisFile in all_files) { FileInfo thisFileInfo = new FileInfo(thisFile); string new_file = ServerPackageFolder + "/" + thisFileInfo.Name; // Keep the list of new image files being copied, which may be used later if (Settings.System.Page_Image_Extensions.Contains(thisFileInfo.Extension.ToUpper().Replace(".", ""))) { NewImageFiles.Add(thisFileInfo.Name); } // If the file exists, delete it, if (File.Exists(new_file)) { File.Delete(new_file); } // Move the file over File.Move(thisFile, new_file); } // Remove the directory and any files which somehow remain ResourcePackage.Delete(); // Since the package has been moved, repoint the resource ResourcePackage.Resource_Folder = ServerPackageFolder; return(true); } catch { return(false); } }
/// <summary> Creates all the image derivative files from original jpeg and tiff files </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; string[] all_jpegs = Directory.GetFiles(resourceFolder, "*.jpg"); // Check each JPEG FileStream reuseStream = null; foreach (string thisJpeg in all_jpegs) { // Exclude thumbnails if (thisJpeg.IndexOf("thm.jpg", StringComparison.InvariantCultureIgnoreCase) > 0) { continue; } string extension = Path.GetExtension(thisJpeg); string name = Path.GetFileName(thisJpeg); // Check the size // Load the JPEG try { Image jpegSourceImg = SafeImageFromFile(thisJpeg, ref reuseStream); if ((jpegSourceImg.Width > Engine_ApplicationCache_Gateway.Settings.Resources.JPEG_Maximum_Width) || (jpegSourceImg.Height > Engine_ApplicationCache_Gateway.Settings.Resources.JPEG_Maximum_Height)) { // Copy the JPEG string final_destination = Path.Combine(resourceFolder, Engine_ApplicationCache_Gateway.Settings.Resources.Backup_Files_Folder_Name); if (!Directory.Exists(final_destination)) { Directory.CreateDirectory(final_destination); } string copy_file = final_destination + "\\" + name.Replace(extension, "") + "_ORIG.jpg"; File.Copy(thisJpeg, copy_file, true); // Create the TIFF string tiff_file = resourceFolder + "\\" + name.Replace(extension, "") + ".tif"; jpegSourceImg.Save(tiff_file, ImageFormat.Tiff); // Delete the original JPEG file File.Delete(thisJpeg); } } catch (Exception ee) { OnError("Error checking JPEG in ConvertLargeJpegItemModule : " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(true); } } return(true); }
private bool Move_All_Files_To_Image_Server(Incoming_Digital_Resource ResourcePackage, List<string> NewImageFiles, string ServerPackageFolder) { try { // Make sure a directory exists here if (!Directory.Exists(ServerPackageFolder)) { Directory.CreateDirectory(ServerPackageFolder); } else { // COpy any existing mets file to keep what the METS looked like before this change if (File.Exists(ServerPackageFolder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + ".mets.xml")) { File.Copy(ServerPackageFolder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + ".mets.xml", ServerPackageFolder + "\\" + ResourcePackage.BibID + "_" + ResourcePackage.VID + "_" + DateTime.Now.Year + "_" + DateTime.Now.Month + "_" + DateTime.Now.Day + ".mets.bak", true); } } // Move all the files to the digital resource file server string[] all_files = Directory.GetFiles(ResourcePackage.Resource_Folder); foreach (string thisFile in all_files) { FileInfo thisFileInfo = new FileInfo(thisFile); string new_file = ServerPackageFolder + "/" + thisFileInfo.Name; // Keep the list of new image files being copied, which may be used later if (Settings.System.Page_Image_Extensions.Contains(thisFileInfo.Extension.ToUpper().Replace(".", ""))) NewImageFiles.Add(thisFileInfo.Name); // If the file exists, delete it, if (File.Exists(new_file)) { File.Delete(new_file); } // Move the file over File.Move(thisFile, new_file); } // Remove the directory and any files which somehow remain ResourcePackage.Delete(); // Since the package has been moved, repoint the resource ResourcePackage.Resource_Folder = ServerPackageFolder; return true; } catch { return false; } }
/// <summary> Updates the item-level web.config file based on restriction information </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Delete any existing web.config file and write is as necessary try { string web_config = Resource.Resource_Folder + "\\web.config"; if (File.Exists(web_config)) File.Delete(web_config); if ((Resource.Metadata.Behaviors.Dark_Flag) || (Resource.Metadata.Behaviors.IP_Restriction_Membership > 0)) { StreamWriter writer = new StreamWriter(web_config, false); writer.WriteLine("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); writer.WriteLine("<configuration>"); writer.WriteLine(" <system.webServer>"); writer.WriteLine(" <security>"); writer.WriteLine(" <ipSecurity allowUnlisted=\"false\">"); writer.WriteLine(" <clear />"); writer.WriteLine(" <add ipAddress=\"127.0.0.1\" allowed=\"true\" />"); if ( !String.IsNullOrEmpty(Settings.Servers.SobekCM_Web_Server_IP)) writer.WriteLine(" <add ipAddress=\"" + Settings.Servers.SobekCM_Web_Server_IP.Trim() + "\" allowed=\"true\" />"); writer.WriteLine(" </ipSecurity>"); writer.WriteLine(" </security>"); writer.WriteLine(" <modules runAllManagedModulesForAllRequests=\"true\" />"); writer.WriteLine(" </system.webServer>"); // Is there now a main thumbnail? if ((Resource.Metadata.Behaviors.Main_Thumbnail.Length > 0) && (Resource.Metadata.Behaviors.Main_Thumbnail.IndexOf("http:") < 0)) { writer.WriteLine(" <location path=\"" + Resource.Metadata.Behaviors.Main_Thumbnail + "\">"); writer.WriteLine(" <system.webServer>"); writer.WriteLine(" <security>"); writer.WriteLine(" <ipSecurity allowUnlisted=\"true\" />"); writer.WriteLine(" </security>"); writer.WriteLine(" </system.webServer>"); writer.WriteLine(" </location>"); } writer.WriteLine("</configuration>"); writer.Flush(); writer.Close(); } } catch (Exception) { OnError("Unable to update the resource web.config file", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return false; } return true; }
/// <summary> Saves a service METS file within the digital resource folder </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { try { Resource.Metadata.Save_SobekCM_METS(); } catch (Exception ee) { OnError("Exception caught while saving the SobekCM service METS : " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return false; } return true; }
/// <summary> Saves a service METS file within the digital resource folder </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { try { Resource.Metadata.Save_SobekCM_METS(); } catch (Exception ee) { OnError("Exception caught while saving the SobekCM service METS : " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(false); } return(true); }
/// <summary> Ensures a main thumbnail has been selected for this digital resource </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string startupPath = Assembly.GetExecutingAssembly().CodeBase; // Ensure a thumbnail is attached if ((Resource.Metadata.Behaviors.Main_Thumbnail.Length == 0) || ((Resource.Metadata.Behaviors.Main_Thumbnail.IndexOf("http:") < 0) && (!File.Exists(Path.Combine(Resource.Resource_Folder, Resource.Metadata.Behaviors.Main_Thumbnail))))) { // Look for a valid thumbnail if (File.Exists(Path.Combine(Resource.Resource_Folder, "mainthm.jpg"))) Resource.Metadata.Behaviors.Main_Thumbnail = "mainthm.jpg"; else { string[] jpeg_files = Directory.GetFiles(Resource.Resource_Folder, "*thm.jpg"); if (jpeg_files.Length > 0) { Resource.Metadata.Behaviors.Main_Thumbnail = (new FileInfo(jpeg_files[0])).Name; } else { if (Resource.Metadata.Divisions.Page_Count == 0) { List<SobekCM_File_Info> downloads = Resource.Metadata.Divisions.Download_Other_Files; foreach (SobekCM_File_Info thisDownloadFile in downloads) { string mimetype = thisDownloadFile.MIME_Type(thisDownloadFile.File_Extension).ToUpper(); if ((mimetype.IndexOf("AUDIO") >= 0) || (mimetype.IndexOf("VIDEO") >= 0)) { if (File.Exists(Path.Combine(startupPath, "images\\multimedia.jpg"))) { File.Copy(Path.Combine(startupPath, "images\\multimedia.jpg"), Path.Combine(Resource.Resource_Folder, "multimediathm.jpg"), true); Resource.Metadata.Behaviors.Main_Thumbnail = "multimediathm.jpg"; } break; } } } } } // Should this be saved? if ((Resource.Metadata.Web.ItemID > 0) && (Resource.Metadata.Behaviors.Main_Thumbnail.Length > 0)) { SobekCM_Database.Set_Item_Main_Thumbnail(Resource.BibID, Resource.VID, Resource.Metadata.Behaviors.Main_Thumbnail); } } return true; }
/// <summary> Ensures a main thumbnail has been selected for this digital resource </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Ensure a thumbnail is attached if ((Resource.Metadata.Behaviors.Main_Thumbnail.Length == 0) || ((Resource.Metadata.Behaviors.Main_Thumbnail.IndexOf("http:") < 0) && (!File.Exists(Path.Combine(Resource.Resource_Folder, Resource.Metadata.Behaviors.Main_Thumbnail))))) { // Look for a valid thumbnail if (File.Exists(Path.Combine(Resource.Resource_Folder, "mainthm.jpg"))) { Resource.Metadata.Behaviors.Main_Thumbnail = "mainthm.jpg"; } else { string[] jpeg_files = Directory.GetFiles(Resource.Resource_Folder, "*thm.jpg"); if (jpeg_files.Length > 0) { Resource.Metadata.Behaviors.Main_Thumbnail = (new FileInfo(jpeg_files[0])).Name; } else { if (Resource.Metadata.Divisions.Page_Count == 0) { List <SobekCM_File_Info> downloads = Resource.Metadata.Divisions.Download_Other_Files; foreach (SobekCM_File_Info thisDownloadFile in downloads) { string mimetype = thisDownloadFile.MIME_Type(thisDownloadFile.File_Extension).ToUpper(); if ((mimetype.IndexOf("AUDIO") >= 0) || (mimetype.IndexOf("VIDEO") >= 0)) { if (File.Exists(Path.Combine(MultiInstance_Builder_Settings.Builder_Executable_Directory, "images\\multimedia.jpg"))) { File.Copy(Path.Combine(MultiInstance_Builder_Settings.Builder_Executable_Directory, "images\\multimedia.jpg"), Path.Combine(Resource.Resource_Folder, "multimediathm.jpg"), true); Resource.Metadata.Behaviors.Main_Thumbnail = "multimediathm.jpg"; } break; } } } } } // Should this be saved? if ((Resource.Metadata.Web.ItemID > 0) && (Resource.Metadata.Behaviors.Main_Thumbnail.Length > 0)) { SobekCM_Item_Database.Set_Item_Main_Thumbnail(Resource.BibID, Resource.VID, Resource.Metadata.Behaviors.Main_Thumbnail); } } return(true); }
/// <summary> Gets the page count from a PDF file, for statiscal reporting </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // If there are no pages, look for a PDF we can use to get a page count if (Resource.Metadata.Divisions.Physical_Tree.Pages_PreOrder.Count <= 0) { string[] pdf_files = Directory.GetFiles(Resource.Resource_Folder, "*.pdf"); if (pdf_files.Length > 0) { int pdf_page_count = PDF_Tools.Page_Count(pdf_files[0]); if (pdf_page_count > 0) Resource.Metadata.Divisions.Page_Count = pdf_page_count; } } return true; }
/// <summary> Adds new tracking workflow and milestones for the incoming digital resource folder </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Clear the flag for additional work SobekCM_Item_Database.Update_Additional_Work_Needed_Flag(Resource.Metadata.Web.ItemID, false); // Mark a log in the database that this was handled as well SobekCM_Item_Database.Add_Workflow(Resource.Metadata.Web.ItemID, "Bulk Loaded", String.Empty, "SobekCM Bulk Loader", String.Empty); // If the item is born digital, has files, and is currently public, close out the digitization milestones completely if ((!Resource.Metadata.Tracking.Born_Digital_Is_Null) && (Resource.Metadata.Tracking.Born_Digital) && (Resource.Metadata.Behaviors.IP_Restriction_Membership >= 0) && (Resource.Metadata.Divisions.Download_Tree.Has_Files)) { SobekCM_Item_Database.Update_Digitization_Milestone(Resource.Metadata.Web.ItemID, 4, DateTime.Now); } return(true); }
/// <summary> Adds new tracking workflow and milestones for the incoming digital resource folder </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Clear the flag for additional work SobekCM_Database.Update_Additional_Work_Needed_Flag(Resource.Metadata.Web.ItemID, false, null); // Mark a log in the database that this was handled as well SobekCM_Item_Database.Add_Workflow(Resource.Metadata.Web.ItemID, "Bulk Loaded", String.Empty, "SobekCM Bulk Loader", String.Empty); // If the item is born digital, has files, and is currently public, close out the digitization milestones completely if ((!Resource.Metadata.Tracking.Born_Digital_Is_Null) && (Resource.Metadata.Tracking.Born_Digital) && (Resource.Metadata.Behaviors.IP_Restriction_Membership >= 0) && (Resource.Metadata.Divisions.Download_Tree.Has_Files)) { SobekCM_Item_Database.Update_Digitization_Milestone(Resource.Metadata.Web.ItemID, 4, DateTime.Now); } return true; }
/// <summary> Checks the text files for a match that appears to be a social security number and /// emails a warning to the privacy email address on a possible match </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; string bibID = Resource.BibID; string vid = Resource.VID; // Look for SSN in text string ssn_text_file_name = String.Empty; string ssn_match = String.Empty; try { // Get the list of all text files here string[] text_files = Directory.GetFiles(resourceFolder, "*.txt"); if (text_files.Length > 0) { // Step through each text file foreach (string textFile in text_files) { // If no SSN possibly found, look for one if (ssn_match.Length == 0) { ssn_match = Text_Cleaner.Has_SSN(textFile); if (ssn_match.Length > 0) { ssn_text_file_name = (new FileInfo(textFile)).Name; } } } } } catch { } // Send a database email if there appears to have been a SSN if (ssn_match.Length > 0) { if (!String.IsNullOrEmpty(Settings.Email.Privacy_Email)) { Email_Helper.SendEmail(Settings.Email.Privacy_Email, "Possible Social Security Number Located", "A string which appeared to be a possible social security number was found while bulk loading or post-processing an item.\n\nThe SSN was found in package " + bibID + ":" + vid + " in file '" + ssn_text_file_name + "'.\n\nThe text which may be a SSN is '" + ssn_match + "'.\n\nPlease review this item and remove any private information which should not be on the web server.", false, Settings.System.System_Name); } OnProcess("Possible SSN Located (" + ssn_text_file_name + ")", "Privacy Checking", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } return(true); }
/// <summary> Performs some cleanup on digital resource folders from previous versions that had some /// extraneous files and didn't store the backup files in a subfolder </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { try { // Insure subfolder exists string backup_dir = Resource.Resource_Folder + "\\" + Settings.Resources.Backup_Files_Folder_Name; if (!Directory.Exists(backup_dir)) { Directory.CreateDirectory(backup_dir); } // Look for backup mets string[] backup_files = Directory.GetFiles(Resource.Resource_Folder, "*.mets.bak"); foreach (string thisBackUpFile in backup_files) { string name = Path.GetFileName(thisBackUpFile); if (File.Exists(backup_dir + "\\" + name)) { File.Delete(backup_dir + "\\" + name); } File.Move(thisBackUpFile, backup_dir + "\\" + name); } // Look for the original mets if (File.Exists(Resource.Resource_Folder + "\\original.mets.xml")) { if (File.Exists(backup_dir + "\\original.mets.xml")) { File.Delete(backup_dir + "\\original.mets.xml"); } File.Move(Resource.Resource_Folder + "\\original.mets.xml", backup_dir + "\\original.mets.xml"); } // If the citation_mets.xml file exists, delete that if (File.Exists(Resource.Resource_Folder + "\\citation_mets.xml")) { File.Delete(Resource.Resource_Folder + "\\citation_mets.xml"); } } catch { // Log as a warning OnProcess("WARNING: Unable to perform final cleanup on web folder", "Warning", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } return(true); }
/// <summary> Adds only newly added images and views to the resource object </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Ensure all new image files are linked to the METS file bool jpeg_added = false; bool jpeg2000_added = false; foreach (string thisFile in Resource.NewImageFiles) { // Leave out the legacy QC images if ((thisFile.ToUpper().IndexOf(".QC.JPG") < 0) && (thisFile.ToUpper().IndexOf("THM.JPG") < 0)) { // Add this file FileInfo thisFileInfo = new FileInfo(thisFile); Resource.Metadata.Divisions.Physical_Tree.Add_File(thisFileInfo.Name); // Also, check to see if this is a jpeg or jpeg2000 if (thisFileInfo.Extension.ToUpper() == ".JP2") { jpeg2000_added = true; } if (thisFileInfo.Extension.ToUpper() == ".JPG") { jpeg_added = true; } } } //// Ensure proper views are attached to this item //if ((jpeg2000_added) || (jpeg_added)) //{ // Resource.Metadata.Behaviors.Add_View(View_Enum.JPEG); // if (jpeg_added) // { // Resource.Metadata.Behaviors.Add_View(View_Enum.JPEG); // Resource.Metadata.Behaviors.Add_View(View_Enum.RELATED_IMAGES); // if (jpeg2000_added) // Resource.Metadata.Behaviors.Add_View(View_Enum.JPEG2000); // } // else // { // Resource.Metadata.Behaviors.Add_View(View_Enum.JPEG2000); // } //} return(true); }
/// <summary> Checks the text files for a match that appears to be a social security number and /// emails a warning to the privacy email address on a possible match </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; string bibID = Resource.BibID; string vid = Resource.VID; // Look for SSN in text string ssn_text_file_name = String.Empty; string ssn_match = String.Empty; try { // Get the list of all text files here string[] text_files = Directory.GetFiles(resourceFolder, "*.txt"); if (text_files.Length > 0) { // Step through each text file foreach (string textFile in text_files) { // If no SSN possibly found, look for one if (ssn_match.Length == 0) { ssn_match = Text_Cleaner.Has_SSN(textFile); if (ssn_match.Length > 0) ssn_text_file_name = (new FileInfo(textFile)).Name; } } } } catch { } // Send a database email if there appears to have been a SSN if (ssn_match.Length > 0) { if ( !String.IsNullOrEmpty(Settings.Email.Privacy_Email)) { Email_Helper.SendEmail(Settings.Email.Privacy_Email, "Possible Social Security Number Located", "A string which appeared to be a possible social security number was found while bulk loading or post-processing an item.\n\nThe SSN was found in package " + bibID + ":" + vid + " in file '" + ssn_text_file_name + "'.\n\nThe text which may be a SSN is '" + ssn_match + "'.\n\nPlease review this item and remove any private information which should not be on the web server.", false, Settings.System.System_Name); } OnProcess("Possible SSN Located (" + ssn_text_file_name + ")", "Privacy Checking", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } return true; }
/// <summary> </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Determine if this is actually already IN the final image server spot first // Determine the file root for this Resource.File_Root = Resource.BibID.Substring(0, 2) + "\\" + Resource.BibID.Substring(2, 2) + "\\" + Resource.BibID.Substring(4, 2) + "\\" + Resource.BibID.Substring(6, 2) + "\\" + Resource.BibID.Substring(8, 2); // Determine the destination folder for this resource string serverPackageFolder = Settings.Servers.Image_Server_Network + Resource.File_Root + "\\" + Resource.VID; // If this is re-processing the resource in situ, then just return.. nothing to move if (NormalizePath(Resource.Resource_Folder) == NormalizePath(serverPackageFolder)) { // Still worth it to make a backup though, in case the METS is changed down-stream if ((Directory.Exists(Resource.Resource_Folder)) && (File.Exists(Resource.Resource_Folder + "\\" + Resource.BibID + "_" + Resource.VID + ".mets.xml"))) { if (!Directory.Exists(Resource.Resource_Folder + "\\sobek_files")) { Directory.CreateDirectory(Resource.Resource_Folder + "\\sobek_files"); } string destination_mets = Resource.Resource_Folder + "\\sobek_files\\" + Resource.BibID + "_" + Resource.VID + "_" + DateTime.Now.Year + "_" + DateTime.Now.Month + "_" + DateTime.Now.Day + ".mets.bak"; if (!File.Exists(destination_mets)) { File.Copy(Resource.Resource_Folder + "\\" + Resource.BibID + "_" + Resource.VID + ".mets.xml", destination_mets, true); } } return(true); } // Rename thr eceuived METS file Rename_Any_Received_METS_File(Resource); // Clear the list of new images files here, since moving the package will recalculate this Resource.NewImageFiles.Clear(); // Move all files to the image server if (!Move_All_Files_To_Image_Server(Resource, Resource.NewImageFiles, serverPackageFolder)) { OnError("Error moving some files to the image server for " + Resource.BibID + ":" + Resource.VID, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(false); } return(true); }
/// <summary> Gets the page count from a PDF file, for statiscal reporting </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // If there are no pages, look for a PDF we can use to get a page count if (Resource.Metadata.Divisions.Physical_Tree.Pages_PreOrder.Count <= 0) { string[] pdf_files = Directory.GetFiles(Resource.Resource_Folder, "*.pdf"); if (pdf_files.Length > 0) { int pdf_page_count = PDF_Tools.Page_Count(pdf_files[0]); if (pdf_page_count > 0) { Resource.Metadata.Divisions.Page_Count = pdf_page_count; } } } return(true); }
/// <summary> Saves all of the digital resource information to solr/lucene </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Save this to the Solr/Lucene database if ( !String.IsNullOrEmpty(Settings.Servers.Document_Solr_Index_URL)) { try { Solr_Controller.Update_Index(Settings.Servers.Document_Solr_Index_URL, Settings.Servers.Page_Solr_Index_URL, Resource.Metadata, true); } catch (Exception ee) { OnError("Error saving data to the Solr/Lucene index. The index may not reflect the most recent data in the METS.", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); OnError("Solr Error: " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } return true; }
/// <summary> Performs some cleanup on digital resource folders from previous versions that had some /// extraneous files and didn't store the backup files in a subfolder </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { try { // Insure subfolder exists string backup_dir = Resource.Resource_Folder + "\\" + Settings.Resources.Backup_Files_Folder_Name; if (!Directory.Exists(backup_dir)) { Directory.CreateDirectory(backup_dir); } // Look for backup mets string[] backup_files = Directory.GetFiles(Resource.Resource_Folder, "*.mets.bak"); foreach (string thisBackUpFile in backup_files) { string name = Path.GetFileName(thisBackUpFile); if (File.Exists(backup_dir + "\\" + name)) File.Delete(backup_dir + "\\" + name); File.Move(thisBackUpFile, backup_dir + "\\" + name); } // Look for the original mets if (File.Exists(Resource.Resource_Folder + "\\original.mets.xml")) { if (File.Exists(backup_dir + "\\original.mets.xml")) File.Delete(backup_dir + "\\original.mets.xml"); File.Move(Resource.Resource_Folder + "\\original.mets.xml", backup_dir + "\\original.mets.xml"); } // If the citation_mets.xml file exists, delete that if (File.Exists(Resource.Resource_Folder + "\\citation_mets.xml")) { File.Delete(Resource.Resource_Folder + "\\citation_mets.xml"); } } catch { // Log as a warning OnProcess("WARNING: Unable to perform final cleanup on web folder", "Warning", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } return true; }
/// <summary> Copies all incoming files into an archive folder, where an archiving process can pickup the new files </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Delete any pre-archive deletes if (!String.IsNullOrEmpty(Settings.Archive.PreArchive_Files_To_Delete)) { // Get the list of files again string[] files = Directory.GetFiles(resourceFolder); foreach (string thisFile in files) { FileInfo thisFileInfo = new FileInfo(thisFile); if (Regex.Match(thisFileInfo.Name, Settings.Archive.PreArchive_Files_To_Delete, RegexOptions.IgnoreCase).Success) { File.Delete(thisFile); } } } // Archive any files, per the folder instruction if (!Archive_Any_Files(Resource)) { return(false); } // Delete any remaining post-archive deletes if (!String.IsNullOrEmpty(Settings.Archive.PostArchive_Files_To_Delete)) { // Get the list of files again string[] files = Directory.GetFiles(resourceFolder); foreach (string thisFile in files) { FileInfo thisFileInfo = new FileInfo(thisFile); if (Regex.Match(thisFileInfo.Name, Settings.Archive.PostArchive_Files_To_Delete, RegexOptions.IgnoreCase).Success) { File.Delete(thisFile); } } } return(true); }
/// <summary> Adds only newly added images and views to the resource object </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Ensure all new image files are linked to the METS file bool jpeg_added = false; bool jpeg2000_added = false; foreach (string thisFile in Resource.NewImageFiles) { // Leave out the legacy QC images if ((thisFile.ToUpper().IndexOf(".QC.JPG") < 0) && (thisFile.ToUpper().IndexOf("THM.JPG") < 0)) { // Add this file FileInfo thisFileInfo = new FileInfo(thisFile); Resource.Metadata.Divisions.Physical_Tree.Add_File(thisFileInfo.Name); // Also, check to see if this is a jpeg or jpeg2000 if (thisFileInfo.Extension.ToUpper() == ".JP2") jpeg2000_added = true; if (thisFileInfo.Extension.ToUpper() == ".JPG") jpeg_added = true; } } //// Ensure proper views are attached to this item //if ((jpeg2000_added) || (jpeg_added)) //{ // Resource.Metadata.Behaviors.Add_View(View_Enum.JPEG); // if (jpeg_added) // { // Resource.Metadata.Behaviors.Add_View(View_Enum.JPEG); // Resource.Metadata.Behaviors.Add_View(View_Enum.RELATED_IMAGES); // if (jpeg2000_added) // Resource.Metadata.Behaviors.Add_View(View_Enum.JPEG2000); // } // else // { // Resource.Metadata.Behaviors.Add_View(View_Enum.JPEG2000); // } //} return true; }
/// <summary> Extracts indexable text from a PDF file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Preprocess each PDF string[] pdfs = Directory.GetFiles(resourceFolder, "*.pdf"); foreach (string thisPdf in pdfs) { // Get the fileinfo and the name FileInfo thisPdfInfo = new FileInfo(thisPdf); string fileName = thisPdfInfo.Name.Replace(thisPdfInfo.Extension, ""); // Does the full text exist for this item? if (!File.Exists(resourceFolder + "\\" + fileName + "_pdf.txt")) { PDF_Tools.Extract_Text(thisPdf, resourceFolder + "\\" + fileName + "_pdf.txt"); } } return true; }
/// <summary> Extracts indexable text from a PDF file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Preprocess each PDF string[] pdfs = Directory.GetFiles(resourceFolder, "*.pdf"); foreach (string thisPdf in pdfs) { // Get the fileinfo and the name FileInfo thisPdfInfo = new FileInfo(thisPdf); string fileName = thisPdfInfo.Name.Replace(thisPdfInfo.Extension, ""); // Does the full text exist for this item? if (!File.Exists(resourceFolder + "\\" + fileName + "_pdf.txt")) { PDF_Tools.Extract_Text(thisPdf, resourceFolder + "\\" + fileName + "_pdf.txt"); } } return(true); }
/// <summary> Copies all incoming files into an archive folder, where an archiving process can pickup the new files </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Delete any pre-archive deletes if ( !String.IsNullOrEmpty(Settings.Archive.PreArchive_Files_To_Delete)) { // Get the list of files again string[] files = Directory.GetFiles(resourceFolder); foreach (string thisFile in files) { FileInfo thisFileInfo = new FileInfo(thisFile); if (Regex.Match(thisFileInfo.Name, Settings.Archive.PreArchive_Files_To_Delete, RegexOptions.IgnoreCase).Success) { File.Delete(thisFile); } } } // Archive any files, per the folder instruction if (!Archive_Any_Files(Resource)) return false; // Delete any remaining post-archive deletes if (!String.IsNullOrEmpty(Settings.Archive.PostArchive_Files_To_Delete)) { // Get the list of files again string[] files = Directory.GetFiles(resourceFolder); foreach (string thisFile in files) { FileInfo thisFileInfo = new FileInfo(thisFile); if (Regex.Match(thisFileInfo.Name, Settings.Archive.PostArchive_Files_To_Delete, RegexOptions.IgnoreCase).Success) { File.Delete(thisFile); } } } return true; }
/// <summary> Adds ALL non-image files to the digital resource, regardless if they were newly added or not </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Ensure all non-image files are linked to the METS file string[] all_files = Directory.GetFiles(Resource.Resource_Folder); foreach (string thisFile in all_files) { FileInfo thisFileInfo = new FileInfo(thisFile); if ((!Regex.Match(thisFileInfo.Name, Settings.Resources.Files_To_Exclude_From_Downloads, RegexOptions.IgnoreCase).Success) && (String.Compare(thisFileInfo.Name, Resource.BibID + "_" + Resource.VID + ".html", StringComparison.OrdinalIgnoreCase) != 0)) { // Some last checks here if ((thisFileInfo.Name.IndexOf("marc.xml", StringComparison.OrdinalIgnoreCase) != 0) && (thisFileInfo.Name.IndexOf("doc.xml", StringComparison.OrdinalIgnoreCase) != 0) && (thisFileInfo.Name.IndexOf(".mets", StringComparison.OrdinalIgnoreCase) < 0) && (thisFileInfo.Name.IndexOf("citation_mets.xml", StringComparison.OrdinalIgnoreCase) < 0) && (thisFileInfo.Name.IndexOf("ufdc_mets.xml", StringComparison.OrdinalIgnoreCase) < 0) && (thisFileInfo.Name.IndexOf("agreement.txt", StringComparison.OrdinalIgnoreCase) < 0) && ((thisFileInfo.Name.IndexOf(".xml", StringComparison.OrdinalIgnoreCase) < 0) || (thisFileInfo.Name.IndexOf(Resource.BibID, StringComparison.OrdinalIgnoreCase) < 0))) { Resource.Metadata.Divisions.Download_Tree.Add_File(thisFileInfo.Name); } } } return(true); }
/// <summary> Adds ALL non-image files to the digital resource, regardless if they were newly added or not </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Ensure all non-image files are linked to the METS file string[] all_files = Directory.GetFiles(Resource.Resource_Folder); foreach (string thisFile in all_files) { FileInfo thisFileInfo = new FileInfo(thisFile); if ((!Regex.Match(thisFileInfo.Name, Settings.Resources.Files_To_Exclude_From_Downloads, RegexOptions.IgnoreCase).Success) && (String.Compare(thisFileInfo.Name, Resource.BibID + "_" + Resource.VID + ".html", StringComparison.OrdinalIgnoreCase) != 0)) { // Some last checks here if ((thisFileInfo.Name.IndexOf("marc.xml", StringComparison.OrdinalIgnoreCase) != 0) && (thisFileInfo.Name.IndexOf("doc.xml", StringComparison.OrdinalIgnoreCase) != 0) && (thisFileInfo.Name.IndexOf(".mets", StringComparison.OrdinalIgnoreCase) < 0) && (thisFileInfo.Name.IndexOf("citation_mets.xml", StringComparison.OrdinalIgnoreCase) < 0) && (thisFileInfo.Name.IndexOf("ufdc_mets.xml", StringComparison.OrdinalIgnoreCase) < 0) && (thisFileInfo.Name.IndexOf("agreement.txt", StringComparison.OrdinalIgnoreCase) < 0) && ((thisFileInfo.Name.IndexOf(".xml", StringComparison.OrdinalIgnoreCase) < 0) || (thisFileInfo.Name.IndexOf(Resource.BibID, StringComparison.OrdinalIgnoreCase) < 0))) { Resource.Metadata.Divisions.Download_Tree.Add_File(thisFileInfo.Name); } } } return true; }
/// <summary> Saves all of the digital resource information to the database </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Determine total size on the disk string[] all_files_final = Directory.GetFiles(Resource.Resource_Folder); double size = all_files_final.Sum(ThisFile => (double)(((new FileInfo(ThisFile)).Length) / 1024)); Resource.DiskSpaceMb = size; // Also, set the TextSearchable flag correctly string[] text_files = Directory.GetFiles(Resource.Resource_Folder, "*.txt"); bool page_image_text_found = false; foreach (string thisFile in text_files) { // Is this text from a PAGE IMAGE (jpeg or jp2) file? string filename_sans_extension = Path.GetFileNameWithoutExtension(thisFile); string possible_jpeg = Path.Combine(Resource.Resource_Folder, filename_sans_extension + ".jpg"); string possible_jp2 = Path.Combine(Resource.Resource_Folder, filename_sans_extension + ".jpg"); if ((File.Exists(possible_jp2)) || (File.Exists(possible_jpeg))) { page_image_text_found = true; break; } } Resource.Metadata.Behaviors.Text_Searchable = page_image_text_found; // Do not save the viewers here, since the default will be used for NEW items and // no change for existing items Resource.Metadata.Behaviors.Views = null; // Save this package to the database if (!Resource.Save_to_Database(Resource.NewPackage, Settings)) { OnError("Error saving data to SobekCM database. The database may not reflect the most recent data in the METS.", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(true); } return(true); }
/// <summary> Creates a static version for serving to search engine robots to provide as much indexable data as possible </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { //// Only build the statyic builder when needed //if (staticBuilder == null) //{ // // Create the new statics page builder // staticBuilder = new Static_Pages_Builder(Settings.Application_Server_URL, Settings.Static_Pages_Location, Settings.Application_Server_Network); //} //// Save the static page and then copy to all the image servers //try //{ // if (!Directory.Exists(Resource.Resource_Folder + "\\" + Settings.Backup_Files_Folder_Name)) // Directory.CreateDirectory(Resource.Resource_Folder + "\\" + Settings.Backup_Files_Folder_Name); // string static_file = Resource.Resource_Folder + "\\" + Settings.Backup_Files_Folder_Name + "\\" + Resource.Metadata.BibID + "_" + Resource.Metadata.VID + ".html"; // staticBuilder.Create_Item_Citation_HTML(Resource.Metadata, static_file, Resource.Resource_Folder); // if (!File.Exists(static_file)) // { // OnError("Error creating static page for this resource", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); // } // else // { // // Also copy to the static page location server // string web_server_file_version = Settings.Static_Pages_Location + Resource.File_Root + "\\" + Resource.BibID + "_" + Resource.VID + ".html"; // if (!Directory.Exists(Settings.Static_Pages_Location + Resource.File_Root)) // Directory.CreateDirectory(Settings.Static_Pages_Location + Resource.File_Root); // File.Copy(static_file, web_server_file_version, true); // } //} //catch //{ // OnError("Error creating static page for this resource", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); //} return true; }
/// <summary> Saves a MarcXML file within the digital resource folder </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { try { // Set the image location Resource.Metadata.Web.Image_Root = Settings.Servers.Image_URL + Resource.Metadata.Web.File_Root.Replace("\\", "/"); Resource.Metadata.Web.Set_BibID_VID(Resource.Metadata.BibID, Resource.Metadata.VID); // Create the options dictionary used when saving information to the database, or writing MarcXML Dictionary<string, object> options = new Dictionary<string, object>(); if (Engine_ApplicationCache_Gateway.Settings.MarcGeneration != null) { options["MarcXML_File_ReaderWriter:MARC Cataloging Source Code"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.Cataloging_Source_Code; options["MarcXML_File_ReaderWriter:MARC Location Code"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.Location_Code; options["MarcXML_File_ReaderWriter:MARC Reproduction Agency"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.Reproduction_Agency; options["MarcXML_File_ReaderWriter:MARC Reproduction Place"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.Reproduction_Place; options["MarcXML_File_ReaderWriter:MARC XSLT File"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.XSLT_File; } options["MarcXML_File_ReaderWriter:System Name"] = Engine_ApplicationCache_Gateway.Settings.System.System_Name; options["MarcXML_File_ReaderWriter:System Abbreviation"] = Engine_ApplicationCache_Gateway.Settings.System.System_Abbreviation; // Save the marc xml file MarcXML_File_ReaderWriter marcWriter = new MarcXML_File_ReaderWriter(); string errorMessage; if (!marcWriter.Write_Metadata(Resource.Metadata.Source_Directory + "\\marc.xml", Resource.Metadata, options, out errorMessage)) { OnError("Error while saving the MarcXML : " + errorMessage, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } catch (Exception ee) { OnError("Exception caught while saving the MarcXML : " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } return true; }
/// <summary> Method performs the work of the item-level submission package builder module </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { try { OnProcess("Externally logging work", "Standard", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); // get the log file location string start_directory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().CodeBase).Replace("file:\\", ""); string log_file_directory = Path.Combine(start_directory, "logs"); if (!Directory.Exists(log_file_directory)) { Directory.CreateDirectory(log_file_directory); } string log_file_name = Path.Combine(log_file_directory, "external_log.txt"); //lock (logfile_lock) //{ // Add this info using (StreamWriter writer = new StreamWriter(log_file_name, true)) { writer.WriteLine(Resource.BibID + ":" + Resource.VID + " handled at " + DateTime.Now.ToShortDateString()); writer.Flush(); writer.Close(); } //} } catch (Exception ee) { OnError("Exception caught while externally logging work : " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } return(true); }
/// <summary> Updates the basic dimensional information stored for all of the JPEG files /// within the service METS file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Now, just look for the data being present in each file if (Directory.Exists(Resource.Resource_Folder)) { foreach (SobekCM_File_Info thisFile in Resource.Metadata.Divisions.Files) { // Is this a jpeg? if (thisFile.System_Name.ToUpper().IndexOf(".JPG") > 0) { if (thisFile.System_Name.ToUpper().IndexOf("THM.JPG") < 0) Compute_Jpeg_Attributes(thisFile, Resource.Resource_Folder); } // Is this a jpeg2000? if (thisFile.System_Name.ToUpper().IndexOf("JP2") > 0) { Compute_Jpeg2000_Attributes(thisFile, Resource.Resource_Folder); } } } return true; }
/// <summary> Saves a MarcXML file within the digital resource folder </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { try { // Set the image location Resource.Metadata.Web.Image_Root = Settings.Servers.Image_URL + Resource.Metadata.Web.File_Root.Replace("\\", "/"); Resource.Metadata.Web.Set_BibID_VID(Resource.Metadata.BibID, Resource.Metadata.VID); // Create the options dictionary used when saving information to the database, or writing MarcXML Dictionary <string, object> options = new Dictionary <string, object>(); if (Engine_ApplicationCache_Gateway.Settings.MarcGeneration != null) { options["MarcXML_File_ReaderWriter:MARC Cataloging Source Code"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.Cataloging_Source_Code; options["MarcXML_File_ReaderWriter:MARC Location Code"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.Location_Code; options["MarcXML_File_ReaderWriter:MARC Reproduction Agency"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.Reproduction_Agency; options["MarcXML_File_ReaderWriter:MARC Reproduction Place"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.Reproduction_Place; options["MarcXML_File_ReaderWriter:MARC XSLT File"] = Engine_ApplicationCache_Gateway.Settings.MarcGeneration.XSLT_File; } options["MarcXML_File_ReaderWriter:System Name"] = Engine_ApplicationCache_Gateway.Settings.System.System_Name; options["MarcXML_File_ReaderWriter:System Abbreviation"] = Engine_ApplicationCache_Gateway.Settings.System.System_Abbreviation; // Save the marc xml file MarcXML_File_ReaderWriter marcWriter = new MarcXML_File_ReaderWriter(); string errorMessage; if (!marcWriter.Write_Metadata(Resource.Metadata.Source_Directory + "\\marc.xml", Resource.Metadata, options, out errorMessage)) { OnError("Error while saving the MarcXML : " + errorMessage, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } catch (Exception ee) { OnError("Exception caught while saving the MarcXML : " + ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } return(true); }
/// <summary> Creates all the image derivative files from original jpeg and tiff files </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { returnValue = true; string resourceFolder = Resource.Resource_Folder; string imagemagick_executable = MultiInstance_Builder_Settings.ImageMagick_Executable; string executing_directory = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().GetName().CodeBase).Replace("file:\\",""); string kakadu_directory = Path.Combine(executing_directory, "kakadu"); // Are there images that need to be processed here? if (!String.IsNullOrEmpty(imagemagick_executable)) { // Get the list of image files first List<string> imageFiles = new List<string>(); foreach (string imageExtension in image_extensions) { imageFiles.AddRange(Directory.GetFiles(resourceFolder, "*" + imageExtension)); } // Only continue if some exist if (imageFiles.Count > 0) { //// Build the list of files listed in the metadata //Dictionary<string, SobekCM_File_Info> names_to_mets_file = new Dictionary<string, SobekCM_File_Info>(StringComparer.OrdinalIgnoreCase); //List<SobekCM_File_Info> files = Resource.Metadata.Divisions.Physical_Tree.All_Files; //foreach (SobekCM_File_Info thisFile in files) //{ // names_to_mets_file[thisFile.System_Name] = thisFile; //} // Step through all the image files and find the collection of page images Dictionary<string, List<string>> imageRootFiles = new Dictionary<string, List<string>>( StringComparer.OrdinalIgnoreCase ); List<string> possibleThumbnails = new List<string>(); foreach (string thisImageFile in imageFiles) { // Skip .QC.JPG files if (thisImageFile.IndexOf(".qc.jpg", StringComparison.OrdinalIgnoreCase) > 0) continue; // If this might be a thumbnail image, save it for the very end for analysis if ((thisImageFile.IndexOf("thm.jpg", StringComparison.OrdinalIgnoreCase) > 0) && ( Path.GetFileNameWithoutExtension(thisImageFile).Length > 3 )) { // Save for final analysis possibleThumbnails.Add(thisImageFile); } else { // Get this filename without the extension string filename_sans_extension = Path.GetFileNameWithoutExtension(thisImageFile); // Has this root, or image grouping, already been analyzed? if (imageRootFiles.ContainsKey(filename_sans_extension)) { imageRootFiles[filename_sans_extension].Add(thisImageFile); } else { imageRootFiles.Add(filename_sans_extension, new List<string> {thisImageFile}); } } } // Now, re-analyze those files that could have potentially been a thumbnail jpeg foreach (string thisPossibleThumbnail in possibleThumbnails) { // Get this filename without the extension string filename_sans_extension = Path.GetFileNameWithoutExtension(thisPossibleThumbnail); // Remove the final 'thm' from the name first and look for a match string filename_sans_thumb_extension = filename_sans_extension.Substring(0, filename_sans_extension.Length - 3); // Has this root, or image grouping, already been analyzed? if (imageRootFiles.ContainsKey(filename_sans_thumb_extension)) { imageRootFiles[filename_sans_thumb_extension].Add(thisPossibleThumbnail); } else { imageRootFiles.Add(filename_sans_extension, new List<string> { thisPossibleThumbnail }); } } // Create the image process object for creating Image_Derivative_Creation_Processor imageProcessor = new Image_Derivative_Creation_Processor(imagemagick_executable, kakadu_directory, true, true, Settings.Resources.JPEG_Width, Settings.Resources.JPEG_Height, false, Settings.Resources.Thumbnail_Width, Settings.Resources.Thumbnail_Height, null); imageProcessor.New_Task_String += imageProcessor_New_Task_String; imageProcessor.Error_Encountered += imageProcessor_Error_Encountered; // Step through each file grouping and look for the newest file and jpeg and thumbnail dates string jpeg_file; string jpeg_thumb_file; string jpeg2000_file; foreach (string thisImageRoot in imageRootFiles.Keys) { // Ready for the next set of images jpeg_file = String.Empty; jpeg_thumb_file = String.Empty; jpeg2000_file = String.Empty; // Get the list of all related files List<string> theseImageFiles = imageRootFiles[thisImageRoot]; // Look for the jpeg and thumbnail derivatives int image_index = 0; while (image_index < theseImageFiles.Count) { // Get the extenxstion of this file string extension = Path.GetExtension(theseImageFiles[image_index]).ToUpper(); // Was this a special image file type (i.e., jpeg or jpeg2000?) if ((extension == ".JPG") || (extension == ".JP2")) { // If JPEG, does this appear to be the thumbnail? if (extension == ".JPG") { if (String.Compare(Path.GetFileNameWithoutExtension(theseImageFiles[image_index]), thisImageRoot + "thm", StringComparison.OrdinalIgnoreCase) == 0) { jpeg_thumb_file = theseImageFiles[image_index]; } else { jpeg_file = theseImageFiles[image_index]; } } else { jpeg2000_file = theseImageFiles[image_index]; } // Since this was a standard derivative file, remove it from the list (and don't icrement image_index) theseImageFiles.RemoveAt(image_index); } else { // Since this looks like source image (and not a standard derivative) // just keep it in the list and move to the next one image_index++; } } // Having separated the derivatives from the possible source files, let's determine if derivatives should be created // based on the dates for the files DateTime? jpeg_file_lastModTime = null; if (!String.IsNullOrEmpty(jpeg_file)) jpeg_file_lastModTime = File.GetLastWriteTime(jpeg_file); DateTime? jpeg_thumb_file_lastModTime = null; if ( !String.IsNullOrEmpty(jpeg_thumb_file)) jpeg_thumb_file_lastModTime = File.GetLastWriteTime(jpeg_thumb_file); DateTime? jpeg2000_file_lastModTime = null; if ( !String.IsNullOrEmpty(jpeg2000_file)) jpeg2000_file_lastModTime = File.GetLastWriteTime(jpeg2000_file); // Were there some ordinary source files left, that may need to be analyzed? if (theseImageFiles.Count > 0) { // Keep track of newest source file and date string newest_source_file = String.Empty; DateTime newest_source_file_date = new DateTime(1900, 1, 1); // Find the newest source file foreach (string thisSourceFile in theseImageFiles) { DateTime lastModTime = File.GetLastWriteTime(thisSourceFile); if (lastModTime.CompareTo(newest_source_file_date) > 0) { newest_source_file_date = lastModTime; newest_source_file = thisSourceFile; } } // Now, see if some of the basic derivatives are missing or too old if (((!jpeg_file_lastModTime.HasValue) || (jpeg_file_lastModTime.Value.CompareTo(newest_source_file_date) < 0)) || ((!jpeg_thumb_file_lastModTime.HasValue) || (jpeg_thumb_file_lastModTime.Value.CompareTo(newest_source_file_date) < 0)) || ((!jpeg2000_file_lastModTime.HasValue) || (jpeg2000_file_lastModTime.Value.CompareTo(newest_source_file_date) < 0))) { // Create all the derivatives string name_sans_extension = Path.GetFileNameWithoutExtension(newest_source_file); // Create the JPEG derivatives from the JPEG2000 imageProcessor.ImageMagick_Create_JPEG(newest_source_file, resourceFolder + "\\" + name_sans_extension + "thm.jpg", Settings.Resources.Thumbnail_Width, Settings.Resources.Thumbnail_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); imageProcessor.ImageMagick_Create_JPEG(newest_source_file, resourceFolder + "\\" + name_sans_extension + ".jpg", Settings.Resources.JPEG_Width, Settings.Resources.JPEG_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); imageProcessor.Create_JPEG2000(newest_source_file, name_sans_extension + ".jp2", resourceFolder, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); //// If the JPEG exists with width/height information clear the information //if (names_to_mets_file.ContainsKey(name_sans_extension + ".jpg")) //{ // names_to_mets_file[name_sans_extension + ".jpg"].Height = 0; // names_to_mets_file[name_sans_extension + ".jpg"].Width = 0; //} } } else { // No derivate source files found, but we may build the derivatives from the JPEG2000 file if (!String.IsNullOrEmpty(jpeg2000_file)) { //if (( jpeg_file_lastModTime.HasValue ) && ( jpeg_file_lastModTime.Value.Month == 9 ) && ( jpeg_file_lastModTime.Value.Day == 6 )) // Now, see if the other derivatives are missing or too old if (((!jpeg_file_lastModTime.HasValue) || (jpeg_file_lastModTime.Value.CompareTo(jpeg2000_file_lastModTime) < 0)) || ((!jpeg_thumb_file_lastModTime.HasValue) || (jpeg_thumb_file_lastModTime.Value.CompareTo(jpeg2000_file_lastModTime) < 0))) { string name_sans_extension = Path.GetFileNameWithoutExtension(jpeg2000_file); //// Create a temporary, full-size file //string temp_file = resourceFolder + "\\" + name_sans_extension + "_sobektemp.tif"; //imageProcessor.ImageMagick_Create_JPEG(jpeg2000_file, temp_file, -1, -1, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); // Create the JPEG derivatives from the JPEG2000 imageProcessor.ImageMagick_Create_JPEG(jpeg2000_file, resourceFolder + "\\" + name_sans_extension + "thm.jpg", Settings.Resources.Thumbnail_Width, Settings.Resources.Thumbnail_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); imageProcessor.ImageMagick_Create_JPEG(jpeg2000_file, resourceFolder + "\\" + name_sans_extension + ".jpg", Settings.Resources.JPEG_Width, Settings.Resources.JPEG_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); //// If the JPEG exists with width/height information clear the information //if (names_to_mets_file.ContainsKey(name_sans_extension + ".jpg")) //{ // names_to_mets_file[name_sans_extension + ".jpg"].Height = 0; // names_to_mets_file[name_sans_extension + ".jpg"].Width = 0; //} } } } } } } return returnValue; }
/// <summary> Method performs the work of the item-level submission package builder module </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public abstract bool DoWork(Incoming_Digital_Resource Resource);
/// <summary> Creates all the image derivative files from original jpeg and tiff files </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { returnValue = true; string resourceFolder = Resource.Resource_Folder; string bibID = Resource.BibID; string vid = Resource.VID; string imagemagick_executable = MultiInstance_Builder_Settings.ImageMagick_Executable; // Are there images that need to be processed here? if (!String.IsNullOrEmpty(imagemagick_executable)) { // Get the list of jpeg and tiff files string[] jpeg_files = Directory.GetFiles(resourceFolder, "*.jpg"); string[] tiff_files = Directory.GetFiles(resourceFolder, "*.tif"); // Only continue if some exist if ((jpeg_files.Length > 0) || (tiff_files.Length > 0)) { string startupPath = Path.GetDirectoryName(Assembly.GetCallingAssembly().Location); if (startupPath == null) { OnError("Unable to find the startup path in CreateImageDerivativesModule!", String.Empty, String.Empty, -1); return false; } string kakadu_path = Path.Combine(startupPath, "Kakadu"); // Create the image process object for creating Image_Derivative_Creation_Processor imageProcessor = new Image_Derivative_Creation_Processor(imagemagick_executable, kakadu_path, true, true, Settings.Resources.JPEG_Width, Settings.Resources.JPEG_Height, false, Settings.Resources.Thumbnail_Width, Settings.Resources.Thumbnail_Height, null); imageProcessor.New_Task_String += imageProcessor_New_Task_String; imageProcessor.Error_Encountered += imageProcessor_Error_Encountered; // Step through the JPEGS and ensure they have thumbnails (TIFF generation below makes them as well) if (jpeg_files.Length > 0) { foreach (string jpegFile in jpeg_files) { FileInfo jpegFileInfo = new FileInfo(jpegFile); string name = jpegFileInfo.Name.ToUpper(); if ((name.IndexOf("THM.JPG") < 0) && (name.IndexOf(".QC.JPG") < 0)) { string name_sans_extension = jpegFileInfo.Name.Replace(jpegFileInfo.Extension, ""); if (!File.Exists(resourceFolder + "\\" + name_sans_extension + "thm.jpg")) { imageProcessor.ImageMagick_Create_JPEG(jpegFile, resourceFolder + "\\" + name_sans_extension + "thm.jpg", Settings.Resources.Thumbnail_Width, Settings.Resources.Thumbnail_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); } } } } // Step through any TIFFs as well if (tiff_files.Length > 0) { // Do a complete image derivative creation process on these TIFF files imageProcessor.Process(resourceFolder, bibID, vid, tiff_files, Resource.BuilderLogId); // Since we are actually creating page images here (most likely) try to add // them to the package as well foreach (string thisTiffFile in tiff_files) { // Get the name of the tiff file FileInfo thisTiffFileInfo = new FileInfo(thisTiffFile); string tiffFileName = thisTiffFileInfo.Name.Replace(thisTiffFileInfo.Extension, ""); // Get matching files string[] matching_files = Directory.GetFiles(resourceFolder, tiffFileName + ".*"); // Now, step through all these files foreach (string derivativeFile in matching_files) { // If this is a page image type file, add it FileInfo derivativeFileInfo = new FileInfo(derivativeFile); if (Settings.Page_Image_Extensions.Contains(derivativeFileInfo.Extension.ToUpper().Replace(".", ""))) Resource.NewImageFiles.Add(derivativeFileInfo.Name); } } } } } return returnValue; }
/// <summary> Looks for TIFF images without matching text files and /// uses Tesseract (if installed) to perform the OCR </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { // Is Tesseract configured? if (String.IsNullOrEmpty(MultiInstance_Builder_Settings.Tesseract_Executable)) { OnProcess("Tesseract OCR software not found", "Tesseract OCR Module", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(true); } // Ensure the executable exists string tesseract_executable = MultiInstance_Builder_Settings.Tesseract_Executable; try { if (!File.Exists(tesseract_executable)) { OnProcess("Tesseract OCR executable configured, but not present", "Tesseract OCR Module", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(true); } } catch (Exception) { OnProcess("Exception thrown file checking for Tesseract OCR executable existance", "Tesseract OCR Module", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); return(true); } // Tesseract executable is configured and exists Tesseract_Processor.Tesseract_Executable = tesseract_executable; // Will only use the languag/type and directory information from the package string resourceFolder = Resource.Resource_Folder; string language = String.Empty; if ((Resource.Metadata.Bib_Info.Languages_Count > 0) && (!String.IsNullOrEmpty(Resource.Metadata.Bib_Info.Languages[0].Language_ISO_Code))) { language = Resource.Metadata.Bib_Info.Languages[0].Language_Text; } string type = Resource.Metadata.Bib_Info.SobekCM_Type_String; // Only certain TYPES should even be considered for OCR // Add the tesseract subfolder string tesseractSubFolder = Path.Combine(resourceFolder, "tesseract-output"); if (!Directory.Exists(tesseractSubFolder)) { Directory.CreateDirectory(tesseractSubFolder); } // Look through all the TIFFs string[] tiff_files = Directory.GetFiles(resourceFolder, "*.tif*"); foreach (string thisTiffFile in tiff_files) { string textFileName = Path.GetFileNameWithoutExtension(thisTiffFile) + ".txt"; string textFilePath = Path.Combine(tesseractSubFolder, textFileName); // Should this TIFF be processed by Tesseract OCR? bool processTiff = false; if (!File.Exists(textFilePath)) { processTiff = true; } else { DateTime textLastModifiedDate = (new FileInfo(textFilePath)).LastWriteTime; DateTime tiffLastModifiedDate = (new FileInfo(thisTiffFile)).LastWriteTime; if (textLastModifiedDate.CompareTo(tiffLastModifiedDate) < 0) { processTiff = true; } } // Newer TIFF than text, so process if (processTiff) { // Was this successful? if (!Tesseract_Processor.Process_TIFF(thisTiffFile, textFilePath)) { string exception_type = "Unknown Exception"; if (!String.IsNullOrEmpty(Tesseract_Processor.Last_Exception)) { exception_type = Tesseract_Processor.Last_Exception; } OnProcess("Tesseract OCR exception on " + Path.GetFileName(thisTiffFile) + ": " + exception_type, "Tesseract OCR Module", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } else { OnProcess("Tesseract OCR successfuly on " + Path.GetFileName(thisTiffFile) + " to " + textFilePath, "Tesseract OCR Module", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } } return(true); }
private void Complete_Single_Recent_Load_Requiring_Additional_Work(string Resource_Folder, Incoming_Digital_Resource AdditionalWorkResource) { AdditionalWorkResource.METS_Type_String = "Reprocess"; AdditionalWorkResource.BuilderLogId = Add_NonError_To_Log("........Reprocessing '" + AdditionalWorkResource.BibID + ":" + AdditionalWorkResource.VID + "'", "Standard", AdditionalWorkResource.BibID + ":" + AdditionalWorkResource.VID, AdditionalWorkResource.METS_Type_String, -1); try { // Load the METS file if ((!AdditionalWorkResource.Load_METS()) || (AdditionalWorkResource.BibID.Length == 0)) { Add_Error_To_Log("Error reading METS file from " + AdditionalWorkResource.Folder_Name.Replace("_", ":"), AdditionalWorkResource.Folder_Name.Replace("_", ":"), "Reprocess", AdditionalWorkResource.BuilderLogId); return; } AdditionalWorkResource.METS_Type_String = "Reprocess"; // Add thumbnail and aggregation informaiton from the database Library.Database.SobekCM_Database.Add_Minimum_Builder_Information(AdditionalWorkResource.Metadata); // Do all the item processing per instance config foreach (iSubmissionPackageModule thisModule in BuilderSettings.ItemProcessModules) { if (verbose) { Add_NonError_To_Log("Running module " + thisModule.GetType().ToString(), true, AdditionalWorkResource.BibID + ":" + AdditionalWorkResource.VID, String.Empty, AdditionalWorkResource.BuilderLogId); } if (!thisModule.DoWork(AdditionalWorkResource)) { Add_Error_To_Log("Unable to complete additional work for " + AdditionalWorkResource.BibID + ":" + AdditionalWorkResource.VID, AdditionalWorkResource.BibID + ":" + AdditionalWorkResource.VID, String.Empty, AdditionalWorkResource.BuilderLogId); return; } } // Save these collections to mark them for refreshing the RSS feeds, etc.. Add_Process_Info_To_PostProcess_Lists(AdditionalWorkResource.BibID, AdditionalWorkResource.VID, AdditionalWorkResource.Metadata.Behaviors.Aggregation_Code_List); // Finally, clear the memory a little bit AdditionalWorkResource.Clear_METS(); } catch (Exception ee) { Add_Error_To_Log("Unable to complete additional work for " + AdditionalWorkResource.BibID + ":" + AdditionalWorkResource.VID, AdditionalWorkResource.BibID + ":" + AdditionalWorkResource.VID, AdditionalWorkResource.METS_Type_String, AdditionalWorkResource.BuilderLogId, ee); } }
/// <summary> Converts office files ( powerpoints and Word files ) into a PDF, while still retaining the original file </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { string resourceFolder = Resource.Resource_Folder; // Should we try to convert office files? if (Settings.Builder.Convert_Office_Files_To_PDF) { try { // Preprocess each Powerpoint document to PDF string[] ppt_files = Directory.GetFiles(resourceFolder, "*.ppt*"); foreach (string thisPowerpoint in ppt_files) { // Get the fileinfo and the name FileInfo thisPowerpointInfo = new FileInfo(thisPowerpoint); string filename = thisPowerpointInfo.Name.Replace(thisPowerpointInfo.Extension, ""); // Does a PDF version exist for this item? string pdf_version = resourceFolder + "\\" + filename + ".pdf"; if (!File.Exists(pdf_version)) { int conversion_error = Word_Powerpoint_to_PDF_Converter.Powerpoint_To_PDF(thisPowerpoint, pdf_version); switch (conversion_error) { case 1: OnError("Error converting PPT to PDF: Can't open input file", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); break; case 2: OnError("Error converting PPT to PDF: Can't create output file", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); break; case 3: OnError("Error converting PPT to PDF: Converting failed", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); break; case 4: OnError("Error converting PPT to PDF: MS Office not installed", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); break; } } } // Preprocess each Word document to PDF string[] doc_files = Directory.GetFiles(resourceFolder, "*.doc*"); foreach (string thisWordDoc in doc_files) { // Get the fileinfo and the name FileInfo thisWordDocInfo = new FileInfo(thisWordDoc); string filename = thisWordDocInfo.Name.Replace(thisWordDocInfo.Extension, ""); // Does a PDF version exist for this item? string pdf_version = resourceFolder + "\\" + filename + ".pdf"; if (!File.Exists(pdf_version)) { int conversion_error = Word_Powerpoint_to_PDF_Converter.Word_To_PDF(thisWordDoc, pdf_version); switch (conversion_error) { case 1: OnError("Error converting Word DOC to PDF: Can't open input file", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); break; case 2: OnError("Error converting Word DOC to PDF: Can't create output file", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); break; case 3: OnError("Error converting Word DOC to PDF: Converting failed", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); break; case 4: OnError("Error converting Word DOC to PDF: MS Office not installed", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); break; } } } } catch (Exception ee) { StreamWriter errorWriter = new StreamWriter(Path.Combine(MultiInstance_Builder_Settings.Builder_Executable_Directory, "Logs\\error.log"), true); errorWriter.WriteLine("Message: " + ee.Message); errorWriter.WriteLine("Stack Trace: " + ee.StackTrace); errorWriter.Flush(); errorWriter.Close(); OnError("Unknown error converting office files to PDF", Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); OnError(ee.Message, Resource.BibID + ":" + Resource.VID, Resource.METS_Type_String, Resource.BuilderLogId); } } return(true); }
/// <summary> Creates all the image derivative files from original jpeg and tiff files </summary> /// <param name="Resource"> Incoming digital resource object </param> /// <returns> TRUE if processing can continue, FALSE if a critical error occurred which should stop all processing </returns> public override bool DoWork(Incoming_Digital_Resource Resource) { returnValue = true; string resourceFolder = Resource.Resource_Folder; string imagemagick_executable = MultiInstance_Builder_Settings.ImageMagick_Executable; string executing_directory = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().GetName().CodeBase).Replace("file:\\", ""); string kakadu_directory = Path.Combine(executing_directory, "kakadu"); // Are there images that need to be processed here? if (!String.IsNullOrEmpty(imagemagick_executable)) { // Get the list of image files first List <string> imageFiles = new List <string>(); foreach (string imageExtension in image_extensions) { imageFiles.AddRange(Directory.GetFiles(resourceFolder, "*" + imageExtension)); } // Only continue if some exist if (imageFiles.Count > 0) { //// Build the list of files listed in the metadata //Dictionary<string, SobekCM_File_Info> names_to_mets_file = new Dictionary<string, SobekCM_File_Info>(StringComparer.OrdinalIgnoreCase); //List<SobekCM_File_Info> files = Resource.Metadata.Divisions.Physical_Tree.All_Files; //foreach (SobekCM_File_Info thisFile in files) //{ // names_to_mets_file[thisFile.System_Name] = thisFile; //} // Step through all the image files and find the collection of page images Dictionary <string, List <string> > imageRootFiles = new Dictionary <string, List <string> >(StringComparer.OrdinalIgnoreCase); List <string> possibleThumbnails = new List <string>(); foreach (string thisImageFile in imageFiles) { // Skip .QC.JPG files if (thisImageFile.IndexOf(".qc.jpg", StringComparison.OrdinalIgnoreCase) > 0) { continue; } // If this might be a thumbnail image, save it for the very end for analysis if ((thisImageFile.IndexOf("thm.jpg", StringComparison.OrdinalIgnoreCase) > 0) && (Path.GetFileNameWithoutExtension(thisImageFile).Length > 3)) { // Save for final analysis possibleThumbnails.Add(thisImageFile); } else { // Get this filename without the extension string filename_sans_extension = Path.GetFileNameWithoutExtension(thisImageFile); // Has this root, or image grouping, already been analyzed? if (imageRootFiles.ContainsKey(filename_sans_extension)) { imageRootFiles[filename_sans_extension].Add(thisImageFile); } else { imageRootFiles.Add(filename_sans_extension, new List <string> { thisImageFile }); } } } // Now, re-analyze those files that could have potentially been a thumbnail jpeg foreach (string thisPossibleThumbnail in possibleThumbnails) { // Get this filename without the extension string filename_sans_extension = Path.GetFileNameWithoutExtension(thisPossibleThumbnail); // Remove the final 'thm' from the name first and look for a match string filename_sans_thumb_extension = filename_sans_extension.Substring(0, filename_sans_extension.Length - 3); // Has this root, or image grouping, already been analyzed? if (imageRootFiles.ContainsKey(filename_sans_thumb_extension)) { imageRootFiles[filename_sans_thumb_extension].Add(thisPossibleThumbnail); } else { imageRootFiles.Add(filename_sans_extension, new List <string> { thisPossibleThumbnail }); } } // Create the image process object for creating Image_Derivative_Creation_Processor imageProcessor = new Image_Derivative_Creation_Processor(imagemagick_executable, kakadu_directory, true, true, Settings.Resources.JPEG_Width, Settings.Resources.JPEG_Height, false, Settings.Resources.Thumbnail_Width, Settings.Resources.Thumbnail_Height, null); imageProcessor.New_Task_String += imageProcessor_New_Task_String; imageProcessor.Error_Encountered += imageProcessor_Error_Encountered; // Step through each file grouping and look for the newest file and jpeg and thumbnail dates string jpeg_file; string jpeg_thumb_file; string jpeg2000_file; foreach (string thisImageRoot in imageRootFiles.Keys) { // Ready for the next set of images jpeg_file = String.Empty; jpeg_thumb_file = String.Empty; jpeg2000_file = String.Empty; // Get the list of all related files List <string> theseImageFiles = imageRootFiles[thisImageRoot]; // Look for the jpeg and thumbnail derivatives int image_index = 0; while (image_index < theseImageFiles.Count) { // Get the extenxstion of this file string extension = Path.GetExtension(theseImageFiles[image_index]).ToUpper(); // Was this a special image file type (i.e., jpeg or jpeg2000?) if ((extension == ".JPG") || (extension == ".JP2")) { // If JPEG, does this appear to be the thumbnail? if (extension == ".JPG") { if (String.Compare(Path.GetFileNameWithoutExtension(theseImageFiles[image_index]), thisImageRoot + "thm", StringComparison.OrdinalIgnoreCase) == 0) { jpeg_thumb_file = theseImageFiles[image_index]; } else { jpeg_file = theseImageFiles[image_index]; } } else { jpeg2000_file = theseImageFiles[image_index]; } // Since this was a standard derivative file, remove it from the list (and don't icrement image_index) theseImageFiles.RemoveAt(image_index); } else { // Since this looks like source image (and not a standard derivative) // just keep it in the list and move to the next one image_index++; } } // Having separated the derivatives from the possible source files, let's determine if derivatives should be created // based on the dates for the files DateTime?jpeg_file_lastModTime = null; if (!String.IsNullOrEmpty(jpeg_file)) { jpeg_file_lastModTime = File.GetLastWriteTime(jpeg_file); } DateTime?jpeg_thumb_file_lastModTime = null; if (!String.IsNullOrEmpty(jpeg_thumb_file)) { jpeg_thumb_file_lastModTime = File.GetLastWriteTime(jpeg_thumb_file); } DateTime?jpeg2000_file_lastModTime = null; if (!String.IsNullOrEmpty(jpeg2000_file)) { jpeg2000_file_lastModTime = File.GetLastWriteTime(jpeg2000_file); } // Were there some ordinary source files left, that may need to be analyzed? if (theseImageFiles.Count > 0) { // Keep track of newest source file and date string newest_source_file = String.Empty; DateTime newest_source_file_date = new DateTime(1900, 1, 1); // Find the newest source file foreach (string thisSourceFile in theseImageFiles) { DateTime lastModTime = File.GetLastWriteTime(thisSourceFile); if (lastModTime.CompareTo(newest_source_file_date) > 0) { newest_source_file_date = lastModTime; newest_source_file = thisSourceFile; } } // Now, see if some of the basic derivatives are missing or too old if (((!jpeg_file_lastModTime.HasValue) || (jpeg_file_lastModTime.Value.CompareTo(newest_source_file_date) < 0)) || ((!jpeg_thumb_file_lastModTime.HasValue) || (jpeg_thumb_file_lastModTime.Value.CompareTo(newest_source_file_date) < 0)) || ((!jpeg2000_file_lastModTime.HasValue) || (jpeg2000_file_lastModTime.Value.CompareTo(newest_source_file_date) < 0))) { // Create all the derivatives string name_sans_extension = Path.GetFileNameWithoutExtension(newest_source_file); // Create the JPEG derivatives from the JPEG2000 imageProcessor.ImageMagick_Create_JPEG(newest_source_file, resourceFolder + "\\" + name_sans_extension + "thm.jpg", Settings.Resources.Thumbnail_Width, Settings.Resources.Thumbnail_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); imageProcessor.ImageMagick_Create_JPEG(newest_source_file, resourceFolder + "\\" + name_sans_extension + ".jpg", Settings.Resources.JPEG_Width, Settings.Resources.JPEG_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); imageProcessor.Create_JPEG2000(newest_source_file, name_sans_extension + ".jp2", resourceFolder, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); //// If the JPEG exists with width/height information clear the information //if (names_to_mets_file.ContainsKey(name_sans_extension + ".jpg")) //{ // names_to_mets_file[name_sans_extension + ".jpg"].Height = 0; // names_to_mets_file[name_sans_extension + ".jpg"].Width = 0; //} } } else { // No derivate source files found, but we may build the derivatives from the JPEG2000 file if (!String.IsNullOrEmpty(jpeg2000_file)) { //if (( jpeg_file_lastModTime.HasValue ) && ( jpeg_file_lastModTime.Value.Month == 9 ) && ( jpeg_file_lastModTime.Value.Day == 6 )) // Now, see if the other derivatives are missing or too old if (((!jpeg_file_lastModTime.HasValue) || (jpeg_file_lastModTime.Value.CompareTo(jpeg2000_file_lastModTime.Value.AddMinutes(-2)) < 0)) || ((!jpeg_thumb_file_lastModTime.HasValue) || (jpeg_thumb_file_lastModTime.Value.CompareTo(jpeg2000_file_lastModTime.Value.AddMinutes(-2)) < 0))) { string name_sans_extension = Path.GetFileNameWithoutExtension(jpeg2000_file); //// Create a temporary, full-size file //string temp_file = resourceFolder + "\\" + name_sans_extension + "_sobektemp.tif"; //imageProcessor.ImageMagick_Create_JPEG(jpeg2000_file, temp_file, -1, -1, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); // Create the JPEG derivatives from the JPEG2000 imageProcessor.ImageMagick_Create_JPEG(jpeg2000_file, resourceFolder + "\\" + name_sans_extension + "thm.jpg", Settings.Resources.Thumbnail_Width, Settings.Resources.Thumbnail_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); imageProcessor.ImageMagick_Create_JPEG(jpeg2000_file, resourceFolder + "\\" + name_sans_extension + ".jpg", Settings.Resources.JPEG_Width, Settings.Resources.JPEG_Height, Resource.BuilderLogId, Resource.BibID + ":" + Resource.VID); //// If the JPEG exists with width/height information clear the information //if (names_to_mets_file.ContainsKey(name_sans_extension + ".jpg")) //{ // names_to_mets_file[name_sans_extension + ".jpg"].Height = 0; // names_to_mets_file[name_sans_extension + ".jpg"].Width = 0; //} } } } } } } return(returnValue); }
private void Process_Single_Incoming_Package(Incoming_Digital_Resource ResourcePackage) { ResourcePackage.BuilderLogId = Add_NonError_To_Log("........Processing '" + ResourcePackage.Folder_Name + "'", "Standard", ResourcePackage.BibID + ":" + ResourcePackage.VID, ResourcePackage.METS_Type_String, -1); // Clear any existing error linked to this item Library.Database.SobekCM_Database.Builder_Clear_Item_Error_Log(ResourcePackage.BibID, ResourcePackage.VID, "SobekCM Builder"); // Before we save this or anything, let's see if this is truly a new resource ResourcePackage.NewPackage = !(itemTable.Select("BibID='" + ResourcePackage.BibID + "' and VID='" + ResourcePackage.VID + "'").Length > 0); ResourcePackage.Package_Time = DateTime.Now; try { // Do all the item processing per instance config foreach (iSubmissionPackageModule thisModule in BuilderSettings.ItemProcessModules) { //if ( superverbose) //{ // Add_NonError_To_Log("Running module " + thisModule.GetType().ToString(), true, ResourcePackage.BibID + ":" + ResourcePackage.VID, String.Empty, ResourcePackage.BuilderLogId); //} if (!thisModule.DoWork(ResourcePackage)) { Add_Error_To_Log("Unable to complete new/replacement for " + ResourcePackage.BibID + ":" + ResourcePackage.VID, ResourcePackage.BibID + ":" + ResourcePackage.VID, String.Empty, ResourcePackage.BuilderLogId); // Try to move the whole package to the failures folder string final_failures_folder = Path.Combine(ResourcePackage.Source_Folder.Failures_Folder, ResourcePackage.BibID + "_" + ResourcePackage.VID); if (Directory.Exists(final_failures_folder)) { final_failures_folder = final_failures_folder + "_" + DateTime.Now.Year + "_" + DateTime.Now.Month.ToString().PadLeft(2, '0') + "_" + DateTime.Now.Day.ToString().PadLeft(2, '0') + "_" + DateTime.Now.Hour.ToString().PadLeft(2, '0') + "_" + DateTime.Now.Minute.ToString().PadLeft(2, '0') + "_" + DateTime.Now.Second.ToString().PadLeft(2, '0'); } try { Directory.Move(ResourcePackage.Resource_Folder, final_failures_folder); } catch { } return; } } // Save these collections to mark them for refreshing the RSS feeds, etc.. Add_Process_Info_To_PostProcess_Lists(ResourcePackage.BibID, ResourcePackage.VID, ResourcePackage.Metadata.Behaviors.Aggregation_Code_List); // Finally, clear the memory a little bit ResourcePackage.Clear_METS(); } catch (Exception ee) { StreamWriter errorWriter = new StreamWriter(logFileDirectory + "\\error.log", true); errorWriter.WriteLine("Message: " + ee.Message); errorWriter.WriteLine("Stack Trace: " + ee.StackTrace); errorWriter.Flush(); errorWriter.Close(); Add_Error_To_Log("Unable to complete new/replacement for " + ResourcePackage.BibID + ":" + ResourcePackage.VID, ResourcePackage.BibID + ":" + ResourcePackage.VID, String.Empty, ResourcePackage.BuilderLogId, ee); } }