/// <summary> Constructor for a new instance of the SolrDocument class </summary> /// <param name="Digital_Object"> Digital object to create an easily indexable view object for </param> /// <param name="File_Location"> Location for all of the text files associated with this item </param> /// <remarks> Some work is done in the constructor; in particular, work that eliminates the number of times /// iterations must be made through objects which may be indexed in a number of places. /// This includes subject keywords, spatial information, genres, and information from the table of contents </remarks> public SolrDocument(SobekCM_Item Digital_Object, string File_Location) { digitalObject = Digital_Object; fileLocation = File_Location; // Add the subjects if (digitalObject.Bib_Info.Subjects_Count > 0) { foreach (Subject_Info thisSubject in digitalObject.Bib_Info.Subjects) { // Add every subject to the complete list of subjects allsubjects.Add(thisSubject.ToString(false)); // Add name subjects if (thisSubject.Class_Type == Subject_Info_Type.Name) { namesubject.Add(thisSubject.ToString(false)); } // Add title subjects if (thisSubject.Class_Type == Subject_Info_Type.TitleInfo) { titlesubject.Add(thisSubject.ToString(false)); } // Add the subject keywords if ((thisSubject.Class_Type == Subject_Info_Type.Standard) && (thisSubject.ID.IndexOf("690") < 0) && (thisSubject.ID.IndexOf("691") < 0)) { // Cast to the hierarchical subject type Subject_Info_Standard standSubj = (Subject_Info_Standard)thisSubject; if (standSubj.Genres_Count > 0) { foreach (string genreTerm in standSubj.Genres) { genre.Add(genreTerm.ToLower()); } } if (standSubj.Geographics_Count > 0) { foreach (string geoTerm in standSubj.Geographics) { spatialcoverage.Add(geoTerm); } } if (standSubj.Topics_Count > 0) { foreach (string topicTerm in standSubj.Topics) { subjectkeyword.Add(topicTerm); } } } // Add hierarchical spatial info if (thisSubject.Class_Type == Subject_Info_Type.Hierarchical_Spatial) { // Cast to the hierarchical subject type Subject_Info_HierarchicalGeographic hiero = (Subject_Info_HierarchicalGeographic)thisSubject; // Check for existing subfacets and add if not there if ((hiero.Continent.Length > 0) && (!spatialcoverage.Contains(hiero.Continent))) { spatialcoverage.Add(hiero.Continent); } if ((hiero.Country.Length > 0) && (!country.Contains(hiero.Country))) { country.Add(hiero.Country); } if ((hiero.State.Length > 0) && (!state.Contains(hiero.State))) { state.Add(hiero.State); } if ((hiero.County.Length > 0) && (!county.Contains(hiero.County))) { county.Add(hiero.County); } if ((hiero.City.Length > 0) && (!city.Contains(hiero.City))) { city.Add(hiero.City); } if ((hiero.Island.Length > 0) && (!spatialcoverage.Contains(hiero.Island))) { spatialcoverage.Add(hiero.Island); } } } } // Add the individual genre information (just to be done with genre) if (digitalObject.Bib_Info.Genres_Count > 0) { foreach (Genre_Info thisGenre in digitalObject.Bib_Info.Genres) { genre.Add(thisGenre.Genre_Term.ToLower()); } } // Add all the temporal subjects if (digitalObject.Bib_Info.TemporalSubjects_Count > 0) { foreach (Temporal_Info thisTemporal in digitalObject.Bib_Info.TemporalSubjects) { if (thisTemporal.TimePeriod.Length > 0) { allsubjects.Add(thisTemporal.TimePeriod); temporalsubject.Add(thisTemporal.TimePeriod); } } } // Prepare to step through all the divisions/pages in this item int pageorder = 1; tocterms = new List <string>(); solrpages = new List <SolrPage>(); List <abstract_TreeNode> divsAndPages = digitalObject.Divisions.Physical_Tree.Divisions_PreOrder; // Get the list of all TXT files in this division string[] text_files = Directory.GetFiles(File_Location, "*.txt"); Dictionary <string, string> text_files_existing = new Dictionary <string, string>(); foreach (string thisTextFile in text_files) { string filename = (new FileInfo(thisTextFile)).Name.ToUpper(); text_files_existing[filename] = filename; } // Get the list of all THM.JPG files in this division string[] thumbnail_files = Directory.GetFiles(File_Location, "*thm.jpg"); Dictionary <string, string> thumbnail_files_existing = new Dictionary <string, string>(); foreach (string thisTextFile in thumbnail_files) { string filename = (new FileInfo(thisTextFile)).Name; thumbnail_files_existing[filename.ToUpper().Replace("THM.JPG", "")] = filename; } // Step through all division nodes from the physical tree here List <string> text_files_included = new List <string>(); foreach (abstract_TreeNode thisNode in divsAndPages) { if (thisNode.Page) { // Cast to a page to continnue Page_TreeNode pageNode = (Page_TreeNode)thisNode; // If this is a unique page label, add it if (pageNode.Label.Length > 0) { if (pageNode.Label.ToUpper().IndexOf("PAGE ") < 0) { tocterms.Add(pageNode.Label); } } // Look for the root filename and then look for a matching TEXT file if (pageNode.Files.Count > 0) { string root = pageNode.Files[0].File_Name_Sans_Extension; if (text_files_existing.ContainsKey(root.ToUpper() + ".TXT")) { try { // SInce this is marked to be included, save this name text_files_included.Add(root.ToUpper() + ".TXT"); // Read the page text StreamReader reader = new StreamReader(File_Location + "\\" + root + ".txt"); string pageText = reader.ReadToEnd().Trim(); reader.Close(); // Look for a matching thumbnail string thumbnail = String.Empty; if (thumbnail_files_existing.ContainsKey(root.ToUpper())) { thumbnail = thumbnail_files_existing[root.ToUpper()]; } SolrPage newPage = new SolrPage(digitalObject.BibID, digitalObject.VID, pageorder, pageNode.Label, pageText, thumbnail); solrpages.Add(newPage); } catch { } } } // Increment the page order for the next page irregardless pageorder++; } else { // Add the label or type for this division if (thisNode.Label.Length > 0) { tocterms.Add(thisNode.Label); } else if (thisNode.Type.Length > 0) { tocterms.Add(thisNode.Type); } } } // Now, check for any other valid text files additional_text_files = new List <string>(); foreach (string thisTextFile in text_files_existing.Keys) { if ((!text_files_included.Contains(thisTextFile.ToUpper())) && (thisTextFile.ToUpper() != "AGREEMENT.TXT") && (thisTextFile.ToUpper().IndexOf("REQUEST") != 0)) { additional_text_files.Add(thisTextFile); } } }
/// <summary> Constructor for a new instance of the SolrDocument class </summary> /// <param name="Digital_Object"> Digital object to create an easily indexable view object for </param> /// <param name="File_Location"> Location for all of the text files associated with this item </param> /// <remarks> Some work is done in the constructor; in particular, work that eliminates the number of times /// iterations must be made through objects which may be indexed in a number of places. /// This includes subject keywords, spatial information, genres, and information from the table of contents </remarks> public SolrDocument(SobekCM_Item Digital_Object, string File_Location) { digitalObject = Digital_Object; fileLocation = File_Location; // Add the subjects if (digitalObject.Bib_Info.Subjects_Count > 0) { foreach (Subject_Info thisSubject in digitalObject.Bib_Info.Subjects) { // Add every subject to the complete list of subjects allsubjects.Add(thisSubject.ToString(false)); // Add name subjects if (thisSubject.Class_Type == Subject_Info_Type.Name) { namesubject.Add(thisSubject.ToString(false)); } // Add title subjects if (thisSubject.Class_Type == Subject_Info_Type.TitleInfo) { titlesubject.Add(thisSubject.ToString(false)); } // Add the subject keywords if ((thisSubject.Class_Type == Subject_Info_Type.Standard) && (thisSubject.ID.IndexOf("690") < 0) && (thisSubject.ID.IndexOf("691") < 0)) { // Cast to the hierarchical subject type Subject_Info_Standard standSubj = (Subject_Info_Standard) thisSubject; if (standSubj.Genres_Count > 0) { foreach (string genreTerm in standSubj.Genres) { genre.Add(genreTerm.ToLower()); } } if (standSubj.Geographics_Count > 0) { foreach (string geoTerm in standSubj.Geographics) { spatialcoverage.Add(geoTerm); } } if (standSubj.Topics_Count > 0) { foreach (string topicTerm in standSubj.Topics) { subjectkeyword.Add(topicTerm); } } } // Add hierarchical spatial info if (thisSubject.Class_Type == Subject_Info_Type.Hierarchical_Spatial) { // Cast to the hierarchical subject type Subject_Info_HierarchicalGeographic hiero = (Subject_Info_HierarchicalGeographic) thisSubject; // Check for existing subfacets and add if not there if ((hiero.Continent.Length > 0) && (!spatialcoverage.Contains(hiero.Continent))) { spatialcoverage.Add(hiero.Continent); } if ((hiero.Country.Length > 0) && (!country.Contains(hiero.Country))) { country.Add(hiero.Country); } if ((hiero.State.Length > 0) && (!state.Contains(hiero.State))) { state.Add(hiero.State); } if ((hiero.County.Length > 0) && (!county.Contains(hiero.County))) { county.Add(hiero.County); } if ((hiero.City.Length > 0) && (!city.Contains(hiero.City))) { city.Add(hiero.City); } if ((hiero.Island.Length > 0) && (!spatialcoverage.Contains(hiero.Island))) { spatialcoverage.Add(hiero.Island); } } } } // Add the individual genre information (just to be done with genre) if (digitalObject.Bib_Info.Genres_Count > 0) { foreach (Genre_Info thisGenre in digitalObject.Bib_Info.Genres) { genre.Add(thisGenre.Genre_Term.ToLower()); } } // Add all the temporal subjects if (digitalObject.Bib_Info.TemporalSubjects_Count > 0) { foreach (Temporal_Info thisTemporal in digitalObject.Bib_Info.TemporalSubjects) { if (thisTemporal.TimePeriod.Length > 0) { allsubjects.Add(thisTemporal.TimePeriod); temporalsubject.Add(thisTemporal.TimePeriod); } } } // Prepare to step through all the divisions/pages in this item int pageorder = 1; tocterms = new List<string>(); solrpages = new List<SolrPage>(); List<abstract_TreeNode> divsAndPages = digitalObject.Divisions.Physical_Tree.Divisions_PreOrder; // Get the list of all TXT files in this division string[] text_files = Directory.GetFiles(File_Location, "*.txt"); Dictionary<string, string> text_files_existing = new Dictionary<string, string>(); foreach (string thisTextFile in text_files) { string filename = (new FileInfo(thisTextFile)).Name.ToUpper(); text_files_existing[filename] = filename; } // Get the list of all THM.JPG files in this division string[] thumbnail_files = Directory.GetFiles(File_Location, "*thm.jpg"); Dictionary<string, string> thumbnail_files_existing = new Dictionary<string, string>(); foreach (string thisTextFile in thumbnail_files) { string filename = (new FileInfo(thisTextFile)).Name; thumbnail_files_existing[filename.ToUpper().Replace("THM.JPG", "")] = filename; } // Step through all division nodes from the physical tree here List<string> text_files_included = new List<string>(); foreach (abstract_TreeNode thisNode in divsAndPages) { if (thisNode.Page) { // Cast to a page to continnue Page_TreeNode pageNode = (Page_TreeNode) thisNode; // If this is a unique page label, add it if (pageNode.Label.Length > 0) { if (pageNode.Label.ToUpper().IndexOf("PAGE ") < 0) tocterms.Add(pageNode.Label); } // Look for the root filename and then look for a matching TEXT file if (pageNode.Files.Count > 0) { string root = pageNode.Files[0].File_Name_Sans_Extension; if (text_files_existing.ContainsKey(root.ToUpper() + ".TXT")) { try { // SInce this is marked to be included, save this name text_files_included.Add(root.ToUpper() + ".TXT"); // Read the page text StreamReader reader = new StreamReader(File_Location + "\\" + root + ".txt"); string pageText = reader.ReadToEnd().Trim(); reader.Close(); // Look for a matching thumbnail string thumbnail = String.Empty; if (thumbnail_files_existing.ContainsKey(root.ToUpper())) thumbnail = thumbnail_files_existing[root.ToUpper()]; SolrPage newPage = new SolrPage(digitalObject.BibID, digitalObject.VID, pageorder, pageNode.Label, pageText, thumbnail); solrpages.Add(newPage); } catch { } } } // Increment the page order for the next page irregardless pageorder++; } else { // Add the label or type for this division if (thisNode.Label.Length > 0) tocterms.Add(thisNode.Label); else if (thisNode.Type.Length > 0) tocterms.Add(thisNode.Type); } } // Now, check for any other valid text files additional_text_files = new List<string>(); foreach (string thisTextFile in text_files_existing.Keys) { if ((!text_files_included.Contains(thisTextFile.ToUpper())) && (thisTextFile.ToUpper() != "AGREEMENT.TXT") && (thisTextFile.ToUpper().IndexOf("REQUEST") != 0)) { additional_text_files.Add(thisTextFile); } } }