Пример #1
0
        public override IEnumerable <object> GetImportData()
        {
            if (!Directory.Exists(this.Query))
            {
                Logger.Log("N/A", string.Format("the folder: '{0}' could not be found. Try moving the folder under the webroot.", this.Query), ProcessStatus.ImportDefinitionError);
                return(Enumerable.Empty <object>());
            }

            List <Dictionary <string, string> > l = new List <Dictionary <string, string> >();

            string[] files = Directory.GetFiles(this.Query);
            foreach (string f in files)
            {
                Encoding et    = Encoding.GetEncoding("utf-8");
                byte[]   bytes = GetFileBytes(f);
                string   data  = et.GetString(bytes);

                XmlDocument d = new XmlDocument();
                try
                {
                    d.LoadXml(data);
                }
                catch (Exception ex)
                {
                    Logger.Log("N/A", string.Format("Xml file data was malformed: {0}", ex.Message), ProcessStatus.Error, "File", f);
                    continue;
                }

                XmlNode  nameNode = d.SelectSingleNode("//STORYAUTHORNAME");
                string   name     = (nameNode != null) ? nameNode.InnerText : string.Empty;
                string[] nameArr  = AuthorHelper.Authors(name).Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);

                XmlNode  emailNode = d.SelectSingleNode("//STORYAUTHOREMAIL");
                string   email     = (emailNode != null) ? emailNode.InnerText : string.Empty;
                string[] emailArr  = email.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);

                for (int i = 0; i < nameArr.Length; i++)
                {
                    string n = nameArr[i];

                    List <string> nameParts = n.Trim().Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).ToList();
                    if (nameParts.Count < 2)
                    {
                        Logger.Log("N/A", string.Format("Author name was too short so it was ignored: {0}", n), ProcessStatus.FieldError, "STORYAUTHORNAME", name);
                        continue;
                    }
                    Dictionary <string, string> ao = new Dictionary <string, string>();
                    ao.Add("STORYAUTHORNAME", n.Trim());
                    ao.Add("FIRSTNAME", nameParts[0].Trim());
                    ao.Add("LASTNAME", string.Join(" ", nameParts.Skip(1).ToArray()).Trim());
                    string curEmail = (i < emailArr.Length) ? emailArr[i] : string.Empty;
                    ao.Add("EMAIL", curEmail);
                    if (!string.IsNullOrEmpty(curEmail))
                    {
                        Logger.Log("N/A", string.Format("Matching {0} with {1}", n.Trim(), curEmail));
                    }

                    l.Add(ao);
                }
            }

            return(l);
        }
Пример #2
0
        public override IEnumerable <object> GetImportData()
        {
            if (!Directory.Exists(this.Query))
            {
                Logger.Log("N/A", string.Format("the folder '{0}' could not be found. Try moving the folder under the webroot.", this.Query), ProcessStatus.ImportDefinitionError);
                return(Enumerable.Empty <object>());
            }

            List <Dictionary <string, string> > l = new List <Dictionary <string, string> >();

            long artNumber = GetNextArticleNumber();

            string[] files = Directory.GetFiles(this.Query);
            foreach (string f in files)
            {
                Dictionary <string, string> ao = new Dictionary <string, string>();
                XmlDocument d = GetXmlDocument(f);
                if (d == null)
                {
                    continue;
                }

                //generated field
                string curFileName = new FileInfo(f).Name;
                ao["ARTICLE NUMBER"] = $"{PublicationPrefix}{artNumber:D6}";

                //escenic field values
                string authorNode = "STORYAUTHORNAME";
                ao.Add(authorNode, AuthorHelper.Authors(GetXMLData(d, authorNode)));
                string bodyNode = "STORYBODY";
                ao.Add(bodyNode, GetXMLData(d, bodyNode));
                string titleNode      = "STORYTITLE";
                string cleanTitleHtml = CleanTitleHtml(GetXMLData(d, titleNode));
                ao.Add(titleNode, cleanTitleHtml);
                ao.Add("FILENAME", cleanTitleHtml);
                ao.Add("META TITLE OVERRIDE", cleanTitleHtml);
                ao.Add("ARTICLEID", curFileName.Replace(".xml", ""));

                l.Add(ao);
                artNumber++;

                //autonomy fields
                string autFile = $@"{this.Query}\..\Autonomy\{curFileName}";

                List <string> autNodes = new List <string>()
                {
                    "CATEGORY", "COMPANY", "STORYUPDATE", "SECTION", "COUNTRY", "KEYWORD", "THERAPY_SECTOR", "TREATABLE_CONDITION"
                };
                //if no autonomy file then fill fields with empty
                if (!File.Exists(autFile))
                {
                    Logger.Log("N/A", "File not found", ProcessStatus.NotFoundError, "File", autFile);
                    foreach (string n in autNodes)
                    {
                        ao.Add(n, string.Empty);
                    }

                    //default back to the date from escenic
                    string   dateVal = GetXMLData(d, "DATEPUBLISHED");
                    DateTime date;
                    if (!DateTimeUtil.ParseInformaDate(dateVal, out date))
                    {
                        Logger.Log("N/A", "No Date to parse error", ProcessStatus.DateParseError, "Missing Autonomy File Name", autFile);
                    }
                    else
                    {
                        ao["STORYUPDATE"] = dateVal;
                    }

                    continue;
                }

                XmlDocument d2 = GetXmlDocument(autFile);
                if (d2 == null)
                {
                    continue;
                }

                foreach (string n in autNodes)
                {
                    ao.Add(n, GetXMLData(d2, n));
                }
            }

            return(l);
        }
        public override IEnumerable <object> GetImportData()
        {
            if (!Directory.Exists(this.Query))
            {
                Logger.Log("N/A", string.Format("the folder '{0}' could not be found. Try moving the folder under the webroot.", this.Query), ProcessStatus.ImportDefinitionError);
                return(Enumerable.Empty <object>());
            }

            List <Dictionary <string, string> > l = new List <Dictionary <string, string> >();

            string[] files         = Directory.GetFiles(this.Query);
            var      filteredFiles = new List <Tuple <string, string, string, XmlDocument> >();

            using (var context = new EscenicIdMappingContext())
            {
                foreach (var f in files)
                {
                    string curFileName = new FileInfo(f).Name;
                    string articleId   = curFileName.Replace(".xml", "");

                    //autonomy fields
                    XmlDocument dAut    = null;
                    string      autFile = $@"{this.Query}\..\Autonomy\{curFileName}";
                    if (File.Exists(autFile))
                    {
                        dAut = GetXmlDocument(autFile);

                        if (dAut != null)
                        {
                            // ABORT IF OF THIS TYPE
                            string categoryName = GetXMLData(dAut, "CATEGORY") ?? string.Empty;
                            if (categoryName.ToLower().Equals("pdfnewsletter"))
                            {
                                continue;
                            }

                            string sectionName = GetXMLData(dAut, "SECTION") ?? string.Empty;
                            if (sectionName.ToLower().Equals("pdf library"))
                            {
                                continue;
                            }
                        }
                    }

                    string artNumber = SetArticleNumber(context, articleId);

                    filteredFiles.Add(new Tuple <string, string, string, XmlDocument>(curFileName, artNumber, f, dAut));
                }

                context.SaveChanges();
            }

            foreach (Tuple <string, string, string, XmlDocument> pair in filteredFiles)
            {
                Dictionary <string, string> ao = new Dictionary <string, string>();
                XmlDocument d = GetXmlDocument(pair.Item3);
                if (d == null)
                {
                    continue;
                }

                //generated field
                string curFileName = pair.Item1;
                string articleId   = curFileName.Replace(".xml", "");

                ao["ARTICLE NUMBER"] = pair.Item2;

                //escenic field values
                string authorNode = "STORYAUTHORNAME";
                ao.Add(authorNode, AuthorHelper.Authors(GetXMLData(d, authorNode)));
                string bodyNode = "BODY";
                ao.Add(bodyNode, GetXMLData(d, bodyNode));
                string titleNode      = "TITLE";
                string cleanTitleHtml = CleanTitleHtml(GetXMLData(d, titleNode));
                ao.Add(titleNode, cleanTitleHtml);
                ao.Add("FILENAME", cleanTitleHtml);
                ao.Add("META TITLE OVERRIDE", cleanTitleHtml);
                ao.Add("ARTICLEID", articleId);

                //autonomy fields
                XmlDocument d2 = pair.Item4;

                List <string> autNodes = new List <string>()
                {
                    "CATEGORY", "COMPANY", "STORYUPDATE", "SECTION", "COUNTRY", "KEYWORD", "THERAPY_SECTOR", "TREATABLE_CONDITION", "TECHNOLOGYSECTION", "COUNTRYSECTION"
                };
                //if no autonomy file then fill fields with empty
                if (d2 == null)
                {
                    Logger.Log("N/A", "File not found", ProcessStatus.NotFoundError, "File", $@"{this.Query}\..\Autonomy\{curFileName}");
                    foreach (string n in autNodes)
                    {
                        ao.Add(n, string.Empty);
                    }

                    //default back to the date from escenic
                    string   dateVal = GetXMLData(d, "DATEPUBLISHED");
                    DateTime date;
                    if (!DateTimeUtil.ParseInformaDate(dateVal, out date))
                    {
                        Logger.Log("N/A", "No Date to parse error", ProcessStatus.DateParseError, "Missing Autonomy File Name", $@"{this.Query}\..\Autonomy\{curFileName}");
                    }
                    else
                    {
                        ao["STORYUPDATE"] = dateVal;
                    }
                }
                else
                {
                    foreach (string n in autNodes)
                    {
                        ao.Add(n, GetXMLData(d2, n));
                    }
                }

                l.Add(ao);
            }

            return(l);
        }