public override IEnumerable <object> GetImportData() { if (!Directory.Exists(this.Query)) { Logger.Log("N/A", string.Format("the folder: '{0}' could not be found. Try moving the folder under the webroot.", this.Query), ProcessStatus.ImportDefinitionError); return(Enumerable.Empty <object>()); } List <Dictionary <string, string> > l = new List <Dictionary <string, string> >(); string[] files = Directory.GetFiles(this.Query); foreach (string f in files) { Encoding et = Encoding.GetEncoding("utf-8"); byte[] bytes = GetFileBytes(f); string data = et.GetString(bytes); XmlDocument d = new XmlDocument(); try { d.LoadXml(data); } catch (Exception ex) { Logger.Log("N/A", string.Format("Xml file data was malformed: {0}", ex.Message), ProcessStatus.Error, "File", f); continue; } XmlNode nameNode = d.SelectSingleNode("//STORYAUTHORNAME"); string name = (nameNode != null) ? nameNode.InnerText : string.Empty; string[] nameArr = AuthorHelper.Authors(name).Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); XmlNode emailNode = d.SelectSingleNode("//STORYAUTHOREMAIL"); string email = (emailNode != null) ? emailNode.InnerText : string.Empty; string[] emailArr = email.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < nameArr.Length; i++) { string n = nameArr[i]; List <string> nameParts = n.Trim().Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).ToList(); if (nameParts.Count < 2) { Logger.Log("N/A", string.Format("Author name was too short so it was ignored: {0}", n), ProcessStatus.FieldError, "STORYAUTHORNAME", name); continue; } Dictionary <string, string> ao = new Dictionary <string, string>(); ao.Add("STORYAUTHORNAME", n.Trim()); ao.Add("FIRSTNAME", nameParts[0].Trim()); ao.Add("LASTNAME", string.Join(" ", nameParts.Skip(1).ToArray()).Trim()); string curEmail = (i < emailArr.Length) ? emailArr[i] : string.Empty; ao.Add("EMAIL", curEmail); if (!string.IsNullOrEmpty(curEmail)) { Logger.Log("N/A", string.Format("Matching {0} with {1}", n.Trim(), curEmail)); } l.Add(ao); } } return(l); }
public override IEnumerable <object> GetImportData() { if (!Directory.Exists(this.Query)) { Logger.Log("N/A", string.Format("the folder '{0}' could not be found. Try moving the folder under the webroot.", this.Query), ProcessStatus.ImportDefinitionError); return(Enumerable.Empty <object>()); } List <Dictionary <string, string> > l = new List <Dictionary <string, string> >(); long artNumber = GetNextArticleNumber(); string[] files = Directory.GetFiles(this.Query); foreach (string f in files) { Dictionary <string, string> ao = new Dictionary <string, string>(); XmlDocument d = GetXmlDocument(f); if (d == null) { continue; } //generated field string curFileName = new FileInfo(f).Name; ao["ARTICLE NUMBER"] = $"{PublicationPrefix}{artNumber:D6}"; //escenic field values string authorNode = "STORYAUTHORNAME"; ao.Add(authorNode, AuthorHelper.Authors(GetXMLData(d, authorNode))); string bodyNode = "STORYBODY"; ao.Add(bodyNode, GetXMLData(d, bodyNode)); string titleNode = "STORYTITLE"; string cleanTitleHtml = CleanTitleHtml(GetXMLData(d, titleNode)); ao.Add(titleNode, cleanTitleHtml); ao.Add("FILENAME", cleanTitleHtml); ao.Add("META TITLE OVERRIDE", cleanTitleHtml); ao.Add("ARTICLEID", curFileName.Replace(".xml", "")); l.Add(ao); artNumber++; //autonomy fields string autFile = $@"{this.Query}\..\Autonomy\{curFileName}"; List <string> autNodes = new List <string>() { "CATEGORY", "COMPANY", "STORYUPDATE", "SECTION", "COUNTRY", "KEYWORD", "THERAPY_SECTOR", "TREATABLE_CONDITION" }; //if no autonomy file then fill fields with empty if (!File.Exists(autFile)) { Logger.Log("N/A", "File not found", ProcessStatus.NotFoundError, "File", autFile); foreach (string n in autNodes) { ao.Add(n, string.Empty); } //default back to the date from escenic string dateVal = GetXMLData(d, "DATEPUBLISHED"); DateTime date; if (!DateTimeUtil.ParseInformaDate(dateVal, out date)) { Logger.Log("N/A", "No Date to parse error", ProcessStatus.DateParseError, "Missing Autonomy File Name", autFile); } else { ao["STORYUPDATE"] = dateVal; } continue; } XmlDocument d2 = GetXmlDocument(autFile); if (d2 == null) { continue; } foreach (string n in autNodes) { ao.Add(n, GetXMLData(d2, n)); } } return(l); }
public override IEnumerable <object> GetImportData() { if (!Directory.Exists(this.Query)) { Logger.Log("N/A", string.Format("the folder '{0}' could not be found. Try moving the folder under the webroot.", this.Query), ProcessStatus.ImportDefinitionError); return(Enumerable.Empty <object>()); } List <Dictionary <string, string> > l = new List <Dictionary <string, string> >(); string[] files = Directory.GetFiles(this.Query); var filteredFiles = new List <Tuple <string, string, string, XmlDocument> >(); using (var context = new EscenicIdMappingContext()) { foreach (var f in files) { string curFileName = new FileInfo(f).Name; string articleId = curFileName.Replace(".xml", ""); //autonomy fields XmlDocument dAut = null; string autFile = $@"{this.Query}\..\Autonomy\{curFileName}"; if (File.Exists(autFile)) { dAut = GetXmlDocument(autFile); if (dAut != null) { // ABORT IF OF THIS TYPE string categoryName = GetXMLData(dAut, "CATEGORY") ?? string.Empty; if (categoryName.ToLower().Equals("pdfnewsletter")) { continue; } string sectionName = GetXMLData(dAut, "SECTION") ?? string.Empty; if (sectionName.ToLower().Equals("pdf library")) { continue; } } } string artNumber = SetArticleNumber(context, articleId); filteredFiles.Add(new Tuple <string, string, string, XmlDocument>(curFileName, artNumber, f, dAut)); } context.SaveChanges(); } foreach (Tuple <string, string, string, XmlDocument> pair in filteredFiles) { Dictionary <string, string> ao = new Dictionary <string, string>(); XmlDocument d = GetXmlDocument(pair.Item3); if (d == null) { continue; } //generated field string curFileName = pair.Item1; string articleId = curFileName.Replace(".xml", ""); ao["ARTICLE NUMBER"] = pair.Item2; //escenic field values string authorNode = "STORYAUTHORNAME"; ao.Add(authorNode, AuthorHelper.Authors(GetXMLData(d, authorNode))); string bodyNode = "BODY"; ao.Add(bodyNode, GetXMLData(d, bodyNode)); string titleNode = "TITLE"; string cleanTitleHtml = CleanTitleHtml(GetXMLData(d, titleNode)); ao.Add(titleNode, cleanTitleHtml); ao.Add("FILENAME", cleanTitleHtml); ao.Add("META TITLE OVERRIDE", cleanTitleHtml); ao.Add("ARTICLEID", articleId); //autonomy fields XmlDocument d2 = pair.Item4; List <string> autNodes = new List <string>() { "CATEGORY", "COMPANY", "STORYUPDATE", "SECTION", "COUNTRY", "KEYWORD", "THERAPY_SECTOR", "TREATABLE_CONDITION", "TECHNOLOGYSECTION", "COUNTRYSECTION" }; //if no autonomy file then fill fields with empty if (d2 == null) { Logger.Log("N/A", "File not found", ProcessStatus.NotFoundError, "File", $@"{this.Query}\..\Autonomy\{curFileName}"); foreach (string n in autNodes) { ao.Add(n, string.Empty); } //default back to the date from escenic string dateVal = GetXMLData(d, "DATEPUBLISHED"); DateTime date; if (!DateTimeUtil.ParseInformaDate(dateVal, out date)) { Logger.Log("N/A", "No Date to parse error", ProcessStatus.DateParseError, "Missing Autonomy File Name", $@"{this.Query}\..\Autonomy\{curFileName}"); } else { ao["STORYUPDATE"] = dateVal; } } else { foreach (string n in autNodes) { ao.Add(n, GetXMLData(d2, n)); } } l.Add(ao); } return(l); }