private void Recreate_Library_XML_and_RSS(long Builderid, InstanceWide_Settings Settings, string WorkSpaceDirectory, string PrimaryUrl) { // Update the RSS Feeds and Item Lists for ALL // Build the simple XML result for this build OnProcess("........Building XML list for all digital resources", Builderid); try { DataSet simple_list = Engine_Database.Simple_Item_List(String.Empty, null); if (simple_list != null) { try { string aggregation_list_file = Settings.Servers.Static_Pages_Location + "\\all.xml"; if (File.Exists(aggregation_list_file)) { File.Delete(aggregation_list_file); } simple_list.WriteXml(aggregation_list_file, XmlWriteMode.WriteSchema); } catch (Exception ee) { OnError("........Error in building XML list for all digital resources on " + Settings.Servers.Static_Pages_Location + "\n" + ee.Message, Builderid); } } } catch (Exception ee) { OnError("........Error in building XML list for all digital resources\n" + ee.Message, Builderid); } // Create the RSS feed for all ufdc items try { OnProcess("........Building RSS feed for all digital resources", Builderid); DataSet complete_list = Engine_Database.Simple_Item_List(String.Empty, null); Create_RSS_Feed("all", WorkSpaceDirectory, "All Items", complete_list, PrimaryUrl); try { File.Copy(Path.Combine(WorkSpaceDirectory, "all_rss.xml"), Path.Combine(Settings.Servers.Static_Pages_Location, "rss", "all_rss.xml"), true); File.Copy(Path.Combine(WorkSpaceDirectory, "all_short_rss.xml"), Path.Combine(Settings.Servers.Static_Pages_Location, "rss", "all_short_rss.xml"), true); } catch (Exception ee) { OnError("........Error in copying RSS feed for all digital resources to " + Settings.Servers.Static_Pages_Location + "\n" + ee.Message, Builderid); } } catch (Exception ee) { OnError("........Error in building RSS feed for all digital resources\n" + ee.Message, Builderid); } }
/// <summary> Indexes all the items within a SobekCM library or a single item aggregation within a SobekCM library </summary> /// <param name="SolrDocumentUrl"> URL for the solr/lucene core used for searching for a single document within the library </param> /// <param name="SolrPageUrl"> URL for the solr/lucene core used for searching within a single document for matching pages </param> /// <param name="File_Location"> Location where all resource files are located </param> /// <param name="Collection"> Code the item aggreagtion to index, or empty string to index the entire library </param> public static void Index_Collection(string SolrDocumentUrl, string SolrPageUrl, string File_Location, string Collection) { // Initialize the document-level Solr/Lucene worker and add the solr url Startup.Init <SolrDocument>(SolrDocumentUrl); var solrDocumentWorker = ServiceLocator.Current.GetInstance <ISolrOperations <SolrDocument> >(); // Initialize the page-level Solr/Lucene worker and add the solr url Startup.Init <SolrPage>(SolrPageUrl); var solrPageWorker = ServiceLocator.Current.GetInstance <ISolrOperations <SolrPage> >(); // Get the start time DateTime startTime = DateTime.Now; // Get the list of all items in this collection int itemcount = 1; int sincelastcommit = 0; DataSet items = Engine_Database.Simple_Item_List(Collection, null); List <SolrDocument> index_files = new List <SolrDocument>(); List <SolrPage> index_pages = new List <SolrPage>(); // Temporarily write each bib:vid StreamWriter bibVidWriter = new StreamWriter("bib_vid_list.txt"); foreach (DataRow thisRow in items.Tables[0].Rows) { string bibid = thisRow[0].ToString(); string vid = thisRow[1].ToString(); bibVidWriter.WriteLine(bibid + ":" + vid); } bibVidWriter.Flush(); bibVidWriter.Close(); // Temporarily log this StreamWriter logWriter = new StreamWriter("log" + DateTime.Now.Year + DateTime.Now.Month.ToString().PadLeft(2, '0') + DateTime.Now.Day.ToString().PadLeft(2, '0') + ".txt", false); // Step through each row foreach (DataRow thisRow in items.Tables[0].Rows) { string bibid = thisRow[0].ToString(); string vid = thisRow[1].ToString(); string directory = File_Location + bibid.Substring(0, 2) + "\\" + bibid.Substring(2, 2) + "\\" + bibid.Substring(4, 2) + "\\" + bibid.Substring(6, 2) + "\\" + bibid.Substring(8) + "\\" + vid.PadLeft(5, '0'); string metsFile = directory + "\\" + bibid + "_" + vid + ".mets.xml"; if ((Directory.Exists(directory)) && (File.Exists(metsFile))) { Console.WriteLine(itemcount.ToString() + @":" + bibid + @":" + vid); // Read a METS file SobekCM_Item item = SobekCM_Item.Read_METS(metsFile); // Only continue if this is not NULL if (item != null) { logWriter.WriteLine(itemcount.ToString() + ":" + bibid + ":" + vid); // Pull some data from the database DataSet itemInfoSet = Engine_Database.Get_Item_Information(bibid, vid, true, null); if ((itemInfoSet != null) && (itemInfoSet.Tables[0].Rows.Count > 0)) { DataRow itemRow = itemInfoSet.Tables[0].Rows[0]; // Copy over the serial hierarchy item.Behaviors.Serial_Info.Clear(); string level1_text = itemRow["Level1_Text"].ToString(); if (level1_text.Length > 0) { item.Behaviors.Serial_Info.Add_Hierarchy(0, Convert.ToInt32(itemRow["Level1_Index"]), level1_text); string level2_text = itemRow["Level2_Text"].ToString(); if (level2_text.Length > 0) { item.Behaviors.Serial_Info.Add_Hierarchy(0, Convert.ToInt32(itemRow["Level2_Index"]), level2_text); string level3_text = itemRow["Level3_Text"].ToString(); if (level1_text.Length > 0) { item.Behaviors.Serial_Info.Add_Hierarchy(0, Convert.ToInt32(itemRow["Level3_Index"]), level3_text); } } } // Copy the main thumbnail item.Behaviors.Main_Thumbnail = itemRow["MainThumbnailFile"].ToString(); long aleph = Convert.ToInt64(itemRow["ALEPH_Number"]); long oclc = Convert.ToInt64(itemRow["OCLC_Number"]); if (aleph > 1) { item.Bib_Info.ALEPH_Record = aleph.ToString(); } if (oclc > 1) { item.Bib_Info.OCLC_Record = oclc.ToString(); } // Set the aggregationPermissions item.Behaviors.Clear_Aggregations(); foreach (DataRow thisAggrRow in itemInfoSet.Tables[1].Rows) { string code = thisAggrRow["Code"].ToString(); string name = thisAggrRow["Name"].ToString(); item.Behaviors.Add_Aggregation(code, name); } } // Add this document to the list of documents to index index_files.Add(new SolrDocument(item, directory)); // Index five documents at a time, since this could be alot of pages at a time if (index_files.Count > 4) { logWriter.Flush(); // Add to document index logWriter.WriteLine("ADDING TO DOCUMENT INDEX"); Console.WriteLine(@"Adding to Lucene/Solr Document Index"); bool document_success = false; int document_attempts = 0; while (!document_success) { try { solrDocumentWorker.Add(index_files); document_success = true; } catch (Exception) { if (document_attempts > 5) { throw; } document_attempts++; logWriter.WriteLine("ERROR " + document_attempts); Console.WriteLine(@"ERROR " + document_attempts); Thread.Sleep(document_attempts * 1000); } } // Add each page to be indexed foreach (SolrDocument document in index_files) { index_pages.AddRange(document.Solr_Pages); } // Add to page index logWriter.WriteLine("ADDING TO PAGE INDEX"); Console.WriteLine(@"Adding to Lucene/Solr Page Index"); bool page_success = false; int page_attempts = 0; while (!page_success) { try { solrPageWorker.Add(index_pages); page_success = true; } catch (Exception) { if (page_attempts > 5) { throw; } page_attempts++; logWriter.WriteLine("ERROR " + page_attempts); Console.WriteLine(@"ERROR " + page_attempts); Thread.Sleep(page_attempts * 1000); } } // Clear the documents and pages index_files.Clear(); index_pages.Clear(); if (sincelastcommit > 500) { logWriter.WriteLine("DOCUMENT COMMIT ( " + DateTime.Now.ToString() + " )"); Console.WriteLine(@"Comitting Changes to Lucene/Solr Document Index ( {0} )", DateTime.Now.ToString()); try { solrDocumentWorker.Commit(); } catch { logWriter.WriteLine("ERROR CAUGHT DURING COMMIT ( " + DateTime.Now.ToString() + " )"); Console.WriteLine(@"Error caught during document commit ( {0} )", DateTime.Now.ToString()); Thread.Sleep(10 * 60 * 1000); } logWriter.WriteLine("PAGE COMMIT ( " + DateTime.Now.ToString() + " )"); Console.WriteLine(@"Comitting Changes to Lucene/Solr Page Index ( {0} )", DateTime.Now.ToString()); try { solrPageWorker.Commit(); } catch { logWriter.WriteLine("ERROR CAUGHT DURING COMMIT ( " + DateTime.Now.ToString() + " )"); Console.WriteLine(@"Error caught during document commit ( {0} )", DateTime.Now.ToString()); Thread.Sleep(10 * 60 * 1000); } sincelastcommit = 0; //if (commitssinceoptimize >= 5) //{ // logWriter.WriteLine("DOCUMENT OPTIMIZE ( " + DateTime.Now.ToString() + " )"); // Console.WriteLine("Optimizing Lucene/Solr Document Index ( " + DateTime.Now.ToString() + " )"); // try // { // solrDocumentWorker.Optimize(); // } // catch (Exception ee) // { // logWriter.WriteLine("ERROR CAUGHT DURING OPTIMIZE ( " + DateTime.Now.ToString() + " )"); // Console.WriteLine("Error caught during document optimize ( " + DateTime.Now.ToString() + " )"); // Thread.Sleep(10 * 60 * 1000); // } // logWriter.WriteLine("PAGE OPTIMIZE ( " + DateTime.Now.ToString() + " )"); // Console.WriteLine("Optimizing Lucene/Solr Page Index ( " + DateTime.Now.ToString() + " )"); // try // { // solrPageWorker.Optimize(); // } // catch (Exception ee) // { // logWriter.WriteLine("ERROR CAUGHT DURING OPTIMIZE ( " + DateTime.Now.ToString() + " )"); // Console.WriteLine("Error caught during document optimize ( " + DateTime.Now.ToString() + " )"); // Thread.Sleep(10 * 60 * 1000); // } // commitssinceoptimize = 0; //} } } } sincelastcommit++; } itemcount++; } if (index_files.Count > 0) { logWriter.Flush(); // Add to document index Console.WriteLine(@"Adding to Lucene/Solr Document Index"); solrDocumentWorker.Add(index_files); // Add each page to be indexed foreach (SolrDocument document in index_files) { index_pages.AddRange(document.Solr_Pages); } // Add to page index Console.WriteLine(@"Adding to Lucene/Solr Page Index"); solrPageWorker.Add(index_pages); // Clear the documents and pages index_files.Clear(); index_pages.Clear(); } // Comit the changes to the solr/lucene index logWriter.WriteLine("DOCUMENT COMMIT ( " + DateTime.Now.ToString() + " )"); Console.WriteLine(@"Comitting Changes to Lucene/Solr Document Index ( {0} )", DateTime.Now.ToString()); try { solrDocumentWorker.Commit(); } catch { logWriter.WriteLine("ERROR CAUGHT DURING COMMIT ( " + DateTime.Now.ToString() + " )"); Console.WriteLine(@"Error caught during document commit ( {0} )", DateTime.Now.ToString()); Thread.Sleep(10 * 60 * 1000); } logWriter.WriteLine("PAGE COMMIT ( " + DateTime.Now.ToString() + " )"); Console.WriteLine(@"Comitting Changes to Lucene/Solr Page Index ( {0} )", DateTime.Now.ToString()); try { solrPageWorker.Commit(); } catch { logWriter.WriteLine("ERROR CAUGHT DURING COMMIT ( " + DateTime.Now.ToString() + " )"); Console.WriteLine(@"Error caught during document commit ( {0} )", DateTime.Now.ToString()); Thread.Sleep(10 * 60 * 1000); } logWriter.WriteLine("Final document optimize"); Console.WriteLine(@"Final document optimize"); try { solrDocumentWorker.Optimize(); } catch (Exception) { // Do not do anything here. It may throw an exception when it runs very longs } Thread.Sleep(30 * 60 * 1000); logWriter.WriteLine("Final page optimize"); Console.WriteLine(@"Final page optimize"); try { solrPageWorker.Optimize(); } catch (Exception) { // Do not do anything here. It may throw an exception when it runs very longs } Thread.Sleep(30 * 60 * 1000); // Add final meessage Console.WriteLine(@"Process Complete at {0}", DateTime.Now.ToString()); Console.WriteLine(@"Process Started at {0}", startTime.ToString()); Console.WriteLine(); Console.WriteLine(@"Enter any key to exit:"); Console.ReadKey(); logWriter.Flush(); logWriter.Close(); }
/// <summary> Builds all of the site map files which point to the static HTML pages </summary> /// <param name="DestinationPath"> Destination folder for all the generated site maps </param> /// <param name="PrimaryUrl"> Primary URL for this instance of SobekCM </param> /// <returns> Number of site maps created ( Only 30,000 links are included in each site map ) </returns> public int Build_Site_Maps(string DestinationPath, string PrimaryUrl) { try { int site_map_index = 1; string site_map_file = "sitemap" + site_map_index + ".xml"; int record_count = 0; StreamWriter writer = new StreamWriter(Path.Combine(DestinationPath, site_map_file), false); writer.WriteLine("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); writer.WriteLine("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); DataSet item_list_table = Engine_Database.Simple_Item_List(String.Empty, null); foreach (DataRow thisRow in item_list_table.Tables[0].Rows) { // Ready to start the next site map? if (record_count > 30000) { writer.WriteLine("</urlset>"); writer.Flush(); writer.Close(); site_map_index++; site_map_file = "sitemap" + site_map_index + ".xml"; writer = new StreamWriter(Path.Combine(DestinationPath, site_map_file), false); record_count = 0; writer.WriteLine("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); writer.WriteLine("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); } // Determine the folder string bibid = thisRow["BibID"].ToString(); string vid = thisRow["VID"].ToString(); DateTime?lastModifiedDate = null; if (thisRow["LastSaved"] != DBNull.Value) { DateTime tryParseDate; if (DateTime.TryParse(thisRow["LastSaved"].ToString(), out tryParseDate)) { lastModifiedDate = tryParseDate; } } writer.WriteLine("\t<url>"); writer.WriteLine("\t\t<loc>" + PrimaryUrl + bibid + "/" + vid + "</loc>"); if (lastModifiedDate.HasValue) { writer.WriteLine("\t\t<lastmod>" + lastModifiedDate.Value.Year + "-" + lastModifiedDate.Value.Month.ToString().PadLeft(2, '0') + "-" + lastModifiedDate.Value.Day.ToString().PadLeft(2, '0') + "</lastmod>"); } writer.WriteLine("\t</url>"); record_count++; } writer.WriteLine("</urlset>"); writer.Flush(); writer.Close(); return(site_map_index); } catch { return(-1); } }
public void ReCreate_Aggregation_Level_Pages(List <string> AggregationsAffected, InstanceWide_Settings Settings, long UpdateID) { // Determine, and create the local work space string localWorkArea = Path.Combine(MultiInstance_Builder_Settings.Builder_Executable_Directory, "temp"); try { if (!Directory.Exists(localWorkArea)) { Directory.CreateDirectory(localWorkArea); } } catch { OnError("Error creating the temporary work area in BuildAggregationBrowsesModule: " + localWorkArea, UpdateID); return; } // Build the primary URL string primaryUrl = Settings.Servers.Application_Server_URL; if (String.IsNullOrEmpty(primaryUrl)) { OnError("Primary system URL is not set", UpdateID); return; } if (primaryUrl[primaryUrl.Length - 1] != '/') { primaryUrl = primaryUrl + "/"; } // Create the new statics page builder // IN THIS CASE, WE DO NEED TO SET THE SINGLETON, SINCE THIS CALLS THE LIBRARIES Engine_ApplicationCache_Gateway.Settings = Settings; // Static_Pages_Builder staticBuilder = new Static_Pages_Builder(Settings.Servers.Application_Server_URL, Settings.Servers.Static_Pages_Location, Settings.Servers.Application_Server_Network); try { // Step through each aggregation with new items foreach (string thisAggrCode in AggregationsAffected) { // Some aggregations can be excluded if ((thisAggrCode != "ALL") && (thisAggrCode.Length > 1)) { // Get the display aggregation code (lower leading 'i') string display_code = thisAggrCode; if (display_code[0] == 'I') { display_code = 'i' + display_code.Substring(1); } // Get this item aggregations Complete_Item_Aggregation aggregationCompleteObj = Engine_Database.Get_Item_Aggregation(thisAggrCode, false, null); Item_Aggregation aggregationObj = Item_Aggregation_Utilities.Get_Item_Aggregation(aggregationCompleteObj, Settings.System.Default_UI_Language, null); // Get the list of items for this aggregation DataSet aggregation_items = Engine_Database.Simple_Item_List(thisAggrCode, null); // Create the XML list for this aggregation OnProcess("........Building XML item list for " + display_code, UpdateID); try { string aggregation_list_file = Settings.Servers.Static_Pages_Location + "\\" + thisAggrCode.ToLower() + ".xml"; if (File.Exists(aggregation_list_file)) { File.Delete(aggregation_list_file); } aggregation_items.WriteXml(aggregation_list_file, XmlWriteMode.WriteSchema); } catch (Exception ee) { OnError("........Error in building XML list for " + display_code + " on " + Settings.Servers.Static_Pages_Location + "\n" + ee.Message, UpdateID); } OnProcess("........Building RSS feed for " + display_code, UpdateID); try { if (Create_RSS_Feed(thisAggrCode.ToLower(), localWorkArea, aggregationObj.Name, aggregation_items, primaryUrl)) { try { // Copy the two generated RSS files over to the server File.Copy(Path.Combine(localWorkArea, thisAggrCode.ToLower() + "_rss.xml"), Path.Combine(Settings.Servers.Static_Pages_Location, "rss", thisAggrCode.ToLower() + "_rss.xml"), true); File.Copy(Path.Combine(localWorkArea, thisAggrCode.ToLower() + "_short_rss.xml"), Path.Combine(Settings.Servers.Static_Pages_Location, "rss", thisAggrCode.ToLower() + "_short_rss.xml"), true); // Delete the temporary files as well File.Delete(Path.Combine(localWorkArea, thisAggrCode.ToLower() + "_rss.xml")); File.Delete(Path.Combine(localWorkArea, thisAggrCode.ToLower() + "_short_rss.xml")); } catch (Exception ee) { OnError("........Error in copying RSS feed for " + display_code + " to " + Settings.Servers.Static_Pages_Location + "\n" + ee.Message, UpdateID); } } } catch (Exception ee) { OnError("........Error in building RSS feed for " + display_code + "\n" + ee.Message, UpdateID); } OnProcess("........Building static HTML browse page of links for " + display_code, UpdateID); try { string destinationFile = Path.Combine(localWorkArea, thisAggrCode.ToLower() + "_all.html"); if (Build_All_Browse(aggregationObj, aggregation_items, destinationFile, primaryUrl, UpdateID)) { try { File.Copy(destinationFile, Path.Combine(Settings.Servers.Static_Pages_Location, thisAggrCode.ToLower() + "_all.html"), true); } catch (Exception ee) { OnError("........Error in copying HTML browse for " + display_code + " to " + Settings.Servers.Static_Pages_Location + "\n" + ee.Message, UpdateID); } } } catch (Exception ee) { OnError("........Error in building HTML browse for " + display_code + "\n" + ee.Message, UpdateID); } } } // Build the full instance-wide XML and RSS here as well Recreate_Library_XML_and_RSS(UpdateID, Settings, localWorkArea, primaryUrl); } catch (Exception ee) { OnError("Exception caught in BuildAggregationBrowsesModule", UpdateID); OnError(ee.Message, UpdateID); OnError(ee.StackTrace, UpdateID); } }