/// <summary> Reads metadata from an open stream and saves to the provided item/package </summary> /// <param name="Input_Stream"> Open stream to read metadata from </param> /// <param name="Return_Package"> Package into which to read the metadata </param> /// <param name="Options"> Dictionary of any options which this metadata reader/writer may utilize </param> /// <param name="Error_Message">[OUTPUT] Explanation of the error, if an error occurs during reading </param> /// <returns>TRUE if successful, otherwise FALSE </returns> /// <remarks> Accepts two options: (1) 'METS_File_ReaderWriter:Minimize_File_Info' which tells whether the reader /// should just skip the file reading portion completely, and just read the bibliographic data ( Default is FALSE). /// (2) 'METS_File_ReaderWriter:Support_Divisional_dmdSec_amdSec' </remarks> public bool Read_Metadata(Stream Input_Stream, SobekCM_Item Return_Package, Dictionary<string, object> Options, out string Error_Message) { Error_Message = String.Empty; // Read the options from the dictionary of options bool minimizeFileInfo = false; if (Options != null) { if (Options.ContainsKey("METS_File_ReaderWriter:Minimize_File_Info")) bool.TryParse(Options["METS_File_ReaderWriter:Minimize_File_Info"].ToString(), out minimizeFileInfo); if (Options.ContainsKey("METS_File_ReaderWriter:Support_Divisional_dmdSec_amdSec")) { bool supportDivisionalDmdSecAmdSec; bool.TryParse(Options["METS_File_ReaderWriter:Support_Divisional_dmdSec_amdSec"].ToString(), out supportDivisionalDmdSecAmdSec); } } // Keep a list of all the files created, by file id, as additional data is gathered // from the different locations ( amdSec, fileSec, structmap ) Dictionary<string, SobekCM_File_Info> files_by_fileid = new Dictionary<string, SobekCM_File_Info>(); // For now, to do support for old way of doing downloads, build a list to hold // the deprecated download files List<Download_Info_DEPRECATED> deprecatedDownloads = new List<Download_Info_DEPRECATED>(); // Need to store the unanalyzed sections of dmdSec and amdSec until we determine if // the scope is the whole package, or the top-level div. We use lists as the value since // several sections may have NO id and the METS may even (incorrectly) have multiple sections // with the same ID Dictionary<string, List<Unanalyzed_METS_Section>> dmdSec = new Dictionary<string, List<Unanalyzed_METS_Section>>(); Dictionary<string, List<Unanalyzed_METS_Section>> amdSec = new Dictionary<string, List<Unanalyzed_METS_Section>>(); // Dictionaries store the link between dmdSec and amdSec id's to single divisions Dictionary<string, abstract_TreeNode> division_dmdids = new Dictionary<string, abstract_TreeNode>(); Dictionary<string, abstract_TreeNode> division_amdids = new Dictionary<string, abstract_TreeNode>(); try { // Try to read the XML XmlReader r = new XmlTextReader(Input_Stream); // Begin stepping through each of the XML nodes while (r.Read()) { #region Handle some processing instructions requested by Florida SUS's / FLVC (hope to deprecate) // Handle some processing instructions requested by Florida SUS's / FLVC if (r.NodeType == XmlNodeType.ProcessingInstruction) { if (r.Name.ToLower() == "fcla") { string value = r.Value.ToLower(); if (value.IndexOf("fda=\"yes\"") >= 0) { DAITSS_Info daitssInfo = Return_Package.Get_Metadata_Module(GlobalVar.DAITSS_METADATA_MODULE_KEY) as DAITSS_Info; if (daitssInfo == null) { daitssInfo = new DAITSS_Info(); Return_Package.Add_Metadata_Module(GlobalVar.DAITSS_METADATA_MODULE_KEY, daitssInfo); } daitssInfo.toArchive = true; } if (value.IndexOf("fda=\"no\"") >= 0) { DAITSS_Info daitssInfo2 = Return_Package.Get_Metadata_Module(GlobalVar.DAITSS_METADATA_MODULE_KEY) as DAITSS_Info; if (daitssInfo2 == null) { daitssInfo2 = new DAITSS_Info(); Return_Package.Add_Metadata_Module(GlobalVar.DAITSS_METADATA_MODULE_KEY, daitssInfo2); } daitssInfo2.toArchive = false; } } } #endregion if (r.NodeType == XmlNodeType.Element) { switch (r.Name.Replace("METS:", "")) { case "mets": if (r.MoveToAttribute("OBJID")) Return_Package.METS_Header.ObjectID = r.Value; break; case "metsHdr": read_mets_header(r.ReadSubtree(), Return_Package); break; case "dmdSec": case "dmdSecFedora": Unanalyzed_METS_Section thisDmdSec = store_dmd_sec(r.ReadSubtree()); if ( dmdSec.ContainsKey(thisDmdSec.ID)) dmdSec[thisDmdSec.ID].Add(thisDmdSec); else { List<Unanalyzed_METS_Section> newDmdSecList = new List<Unanalyzed_METS_Section>(); newDmdSecList.Add(thisDmdSec); dmdSec[thisDmdSec.ID] = newDmdSecList; } break; case "amdSec": Unanalyzed_METS_Section thisAmdSec = store_amd_sec(r.ReadSubtree()); if (amdSec.ContainsKey(thisAmdSec.ID)) amdSec[thisAmdSec.ID].Add(thisAmdSec); else { List<Unanalyzed_METS_Section> newAmdSecList = new List<Unanalyzed_METS_Section> {thisAmdSec}; amdSec[thisAmdSec.ID] = newAmdSecList; } break; case "fileSec": read_file_sec(r.ReadSubtree(), minimizeFileInfo, files_by_fileid); break; case "structMap": if (!r.IsEmptyElement) { read_struct_map(r.ReadSubtree(), Return_Package, files_by_fileid, division_dmdids, division_amdids); } break; case "behaviorSec": read_behavior_sec(r.ReadSubtree(), Return_Package); break; } } } // writer.Close(); r.Close(); } catch { // Do nothinh } Input_Stream.Close(); // Load some options for interoperability Dictionary<string, object> options = new Dictionary<string, object>(); options.Add("SobekCM_FileInfo_METS_amdSec_ReaderWriter:Files_By_FileID", files_by_fileid); #region Process the previously stored dmd sections // Now, process the previously stored dmd sections foreach (string thisDmdSecId in dmdSec.Keys) { // Could be multiple stored sections with the same (or no) ID foreach (Unanalyzed_METS_Section metsSection in dmdSec[thisDmdSecId]) { XmlReader reader = XmlReader.Create(new StringReader(metsSection.Inner_XML)); string mdtype = String.Empty; string othermdtype = String.Empty; while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { if (reader.Name.ToLower().Replace("mets:", "") == "mdwrap") { if (reader.MoveToAttribute("MDTYPE")) mdtype = reader.Value; if (reader.MoveToAttribute("OTHERMDTYPE")) othermdtype = reader.Value; // NOt crazy about this part, but sometimes people do not use the OTHERMDTYPE // tag correctly, and just use the LABEL to differentiate the types if ((mdtype == "OTHER") && (othermdtype.Length == 0) && (reader.MoveToAttribute("LABEL"))) othermdtype = reader.Value; // Now, determine if this was a division-level read, or a package-wide if (division_dmdids.ContainsKey(thisDmdSecId)) { // Division level dmdSec // Get the division abstract_TreeNode node = division_dmdids[thisDmdSecId]; // Get an appropriate reader from the metadata configuration iDivision_dmdSec_ReaderWriter rw = ResourceObjectSettings.MetadataConfig.Get_Division_DmdSec_ReaderWriter(mdtype, othermdtype); // Is this dmdSec analyzable? (i.e., did we find an appropriate reader/writer?) if (rw == null) { node.Add_Unanalyzed_DMDSEC(metsSection); } else { rw.Read_dmdSec(reader, node, options); } } else { // Package-level dmdSec // Get an appropriate reader from the metadata configuration iPackage_dmdSec_ReaderWriter rw = ResourceObjectSettings.MetadataConfig.Get_Package_DmdSec_ReaderWriter(mdtype, othermdtype); // Is this dmdSec analyzable? (i.e., did we find an appropriate reader/writer?) if (rw == null) { Return_Package.Add_Unanalyzed_DMDSEC(metsSection); } else { rw.Read_dmdSec(reader, Return_Package, options); } } } } } } } #endregion #region Process the previously stored amd sections // Now, process the previously stored amd sections foreach (string thisAmdSecId in amdSec.Keys) { // Could be multiple stored sections with the same (or no) ID foreach (Unanalyzed_METS_Section metsSection in amdSec[thisAmdSecId]) { XmlReader reader = XmlReader.Create(new StringReader(metsSection.Inner_XML)); string mdtype = String.Empty; string othermdtype = String.Empty; while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { if (reader.Name.ToLower().Replace("mets:", "") == "mdwrap") { if (reader.MoveToAttribute("MDTYPE")) mdtype = reader.Value; if (reader.MoveToAttribute("OTHERMDTYPE")) othermdtype = reader.Value; // Package-level amdSec // Get an appropriate reader from the metadata configuration iPackage_amdSec_ReaderWriter rw = ResourceObjectSettings.MetadataConfig.Get_Package_AmdSec_ReaderWriter(mdtype, othermdtype); // Is this amdSec analyzable? (i.e., did we find an appropriate reader/writer?) if (rw == null) { Return_Package.Add_Unanalyzed_AMDSEC(metsSection); } else { rw.Read_amdSec(reader, Return_Package, options); } } } } } } #endregion #region Special code used for moving downloads into the structure map system, and out of the old SobekCM METS section // For backward compatability, move from the old download system to the // new structure. This has to happen here at the end so that we have access // Were there some downloads added here? if (deprecatedDownloads.Count > 0) { // Get the list of downloads from the download tree List<SobekCM_File_Info> newStructureDownloads = Return_Package.Divisions.Download_Tree.All_Files; // Step through each download in the old system foreach (Download_Info_DEPRECATED thisDownload in deprecatedDownloads) { // Get the label (if there is one) string label = thisDownload.Label; string filename = thisDownload.FileName; bool found = false; if ((filename.Length == 0) && (thisDownload.File_ID.Length > 0)) { if (files_by_fileid.ContainsKey(thisDownload.File_ID)) { SobekCM_File_Info thisDownloadFile = files_by_fileid[thisDownload.File_ID]; filename = thisDownloadFile.System_Name; // Ensure a file of this name doesn't already exist foreach (SobekCM_File_Info existingFile in newStructureDownloads) { if (existingFile.System_Name.ToUpper().Trim() == filename.ToUpper().Trim()) { found = true; break; } } // Not found, so add it if (!found) { // Determine the label if it was missing or identical to file name if ((label.Length == 0) || (label == filename)) { label = filename; int first_period_index = label.IndexOf('.'); if (first_period_index > 0) { label = label.Substring(0, first_period_index); } } // Add the root to the download tree, if not existing Division_TreeNode newRoot; if (Return_Package.Divisions.Download_Tree.Roots.Count == 0) { newRoot = new Division_TreeNode("Main", String.Empty); Return_Package.Divisions.Download_Tree.Roots.Add(newRoot); } else { newRoot = (Division_TreeNode) Return_Package.Divisions.Download_Tree.Roots[0]; } // Add a page for this, with the provided label if there was one Page_TreeNode newPage = new Page_TreeNode(label); newRoot.Nodes.Add(newPage); // Now, add this existing file newPage.Files.Add(thisDownloadFile); // Add to the list of files added (in case it appears twice) newStructureDownloads.Add(thisDownloadFile); } } } else { // Ensure a file of this name doesn't already exist foreach (SobekCM_File_Info existingFile in newStructureDownloads) { if (existingFile.System_Name.ToUpper().Trim() == filename.ToUpper().Trim()) { found = true; break; } } // Not found, so add it if (!found) { // Determine the label if it was missing or identical to file name if ((label.Length == 0) || (label == filename)) { label = filename; int first_period_index = label.IndexOf('.'); if (first_period_index > 0) { label = label.Substring(0, first_period_index); } } // Add the root to the download tree, if not existing Division_TreeNode newRoot; if (Return_Package.Divisions.Download_Tree.Roots.Count == 0) { newRoot = new Division_TreeNode("Main", String.Empty); Return_Package.Divisions.Download_Tree.Roots.Add(newRoot); } else { newRoot = (Division_TreeNode) Return_Package.Divisions.Download_Tree.Roots[0]; } // Add a page for this, with the provided label if there was one Page_TreeNode newPage = new Page_TreeNode(label); newRoot.Nodes.Add(newPage); // Now, add this existing file SobekCM_File_Info thisDownloadFile = new SobekCM_File_Info(filename); newPage.Files.Add(thisDownloadFile); // Add to the list of files added (in case it appears twice) newStructureDownloads.Add(thisDownloadFile); } } } } #endregion #region Special code for distributing any page-level coordinate information read from the old SobekCM coordinate metadata // Get the geospatial data GeoSpatial_Information geoSpatial = Return_Package.Get_Metadata_Module(GlobalVar.GEOSPATIAL_METADATA_MODULE_KEY) as GeoSpatial_Information; if ((geoSpatial != null) && ( geoSpatial.Polygon_Count > 0 )) { // See if any has the page sequence filled out, which means it came from the old metadata system bool redistribute = false; foreach (Coordinate_Polygon thisPolygon in geoSpatial.Polygons) { if (thisPolygon.Page_Sequence > 0) { redistribute = true; break; } } // If we need to redistribute, get started! if (redistribute) { // Get the pages, by sequence List<abstract_TreeNode> pagesBySequence = Return_Package.Divisions.Physical_Tree.Pages_PreOrder; List<Coordinate_Polygon> polygonsToRemove = new List<Coordinate_Polygon>(); // Step through each polygon foreach (Coordinate_Polygon thisPolygon in geoSpatial.Polygons) { if ((thisPolygon.Page_Sequence > 0) && ( thisPolygon.Page_Sequence <= pagesBySequence.Count )) { // Get the page abstract_TreeNode thisPageFromSequence = pagesBySequence[thisPolygon.Page_Sequence - 1]; // We can assume this page does not already have the coordiantes GeoSpatial_Information thisPageCoord = new GeoSpatial_Information(); thisPageFromSequence.Add_Metadata_Module( GlobalVar.GEOSPATIAL_METADATA_MODULE_KEY, thisPageCoord ); thisPageCoord.Add_Polygon( thisPolygon); // Remove this from the package-level coordinates polygonsToRemove.Add(thisPolygon); } } // Now, remove all polygons flagged to be removed foreach (Coordinate_Polygon thisPolygon in polygonsToRemove) { geoSpatial.Remove_Polygon(thisPolygon); } } } #endregion #region Copy any serial hierarchy in the Behaviors.Serial_Info part into the bib portion, if not there // Do some final cleanup on the SERIAL HIERARCHY if ((Return_Package.Behaviors.hasSerialInformation) && (Return_Package.Behaviors.Serial_Info.Count > 0)) { if ((Return_Package.Bib_Info.Series_Part_Info.Enum1.Length == 0) && (Return_Package.Bib_Info.Series_Part_Info.Year.Length == 0)) { if (Return_Package.Bib_Info.SobekCM_Type == TypeOfResource_SobekCM_Enum.Newspaper) { Return_Package.Bib_Info.Series_Part_Info.Year = Return_Package.Behaviors.Serial_Info[0].Display; Return_Package.Bib_Info.Series_Part_Info.Year_Index = Return_Package.Behaviors.Serial_Info[0].Order; if (Return_Package.Behaviors.Serial_Info.Count > 1) { Return_Package.Bib_Info.Series_Part_Info.Month = Return_Package.Behaviors.Serial_Info[1].Display; Return_Package.Bib_Info.Series_Part_Info.Month_Index = Return_Package.Behaviors.Serial_Info[1].Order; } } if (Return_Package.Behaviors.Serial_Info.Count > 2) { Return_Package.Bib_Info.Series_Part_Info.Day = Return_Package.Behaviors.Serial_Info[2].Display; Return_Package.Bib_Info.Series_Part_Info.Day_Index = Return_Package.Behaviors.Serial_Info[2].Order; } } else { Return_Package.Bib_Info.Series_Part_Info.Enum1 = Return_Package.Behaviors.Serial_Info[0].Display; Return_Package.Bib_Info.Series_Part_Info.Enum1_Index = Return_Package.Behaviors.Serial_Info[0].Order; if (Return_Package.Behaviors.Serial_Info.Count > 1) { Return_Package.Bib_Info.Series_Part_Info.Enum2 = Return_Package.Behaviors.Serial_Info[1].Display; Return_Package.Bib_Info.Series_Part_Info.Enum2_Index = Return_Package.Behaviors.Serial_Info[1].Order; } if (Return_Package.Behaviors.Serial_Info.Count > 2) { Return_Package.Bib_Info.Series_Part_Info.Enum3 = Return_Package.Behaviors.Serial_Info[2].Display; Return_Package.Bib_Info.Series_Part_Info.Enum3_Index = Return_Package.Behaviors.Serial_Info[2].Order; } } } #endregion return true; }