Beispiel #1
0
 public static void CheckEntity(this ProfileEntity entity)
 {
     if (entity.Key.Trim() == string.Empty)
     {
         throw new Exception("Empty Key!");
     }
     if (entity.EntityType != ProfileEntityType.Category && entity.Value.Trim() == string.Empty && entity.Note.Trim() == string.Empty && entity.Children.Count == 0)
     {
         throw new Exception("No value not child entities!");
     }
 }
Beispiel #2
0
        public static void PrintEntity(this ProfileEntity entity)
        {
            if (entity.Key.Trim() == string.Empty)
            {
                throw new Exception("Empty Key!");
            }
            if (entity.Value.Trim() == string.Empty && entity.Note.Trim() == string.Empty && entity.Children.Count == 0)
            {
                throw new Exception("No value not child entities!");
            }

            if (entity.EntityType == ProfileEntityType.Category)
            {
                Console.ForegroundColor = ConsoleColor.Green;
                Console.WriteLine("\n================================================================");
                Console.WriteLine(entity.Key);
                Console.WriteLine("==================================================================\n");
                foreach (var child in entity.Children)
                {
                    child.PrintEntity();
                }
            }
            else if (entity.EntityType == ProfileEntityType.Field)
            {
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("\n----------------------------------------------------------------");
                Console.WriteLine($"  +{entity.Key}");
                Console.WriteLine("------------------------------------------------------------------\n");

                if (entity.Value.Trim() != string.Empty)
                {
                    Console.WriteLine($"  {entity.Value}\n");
                }
                foreach (var child in entity.Children)
                {
                    child.PrintEntity();
                }

                Console.WriteLine();
                Console.ForegroundColor = ConsoleColor.Yellow;


                if (entity.Note.Trim() != string.Empty)
                {
                    Console.WriteLine($"  note: {entity.Note}");
                }
                if (entity.Date.Trim() != string.Empty)
                {
                    Console.WriteLine($"  date: {entity.Date}");
                }
                if (entity.ComparisonRank.HasValue)
                {
                    Console.WriteLine($"  country comparison to the world:: {entity.ComparisonRank}");
                }
            }
            else if (entity.EntityType == ProfileEntityType.SubField)
            {
                Console.ForegroundColor = ConsoleColor.Red;
                if (entity.Key == "*")
                {
                    Console.WriteLine($"      * {entity.Value} {entity.Note} {entity.Date}");
                }
                else
                {
                    Console.WriteLine($"      +{entity.Key}: {entity.Value} {entity.Note} {entity.Date}");
                }
            }
        }
        /// <summary>
        /// Parses the HTML content of the country profile data and converts them to a list of profile entities
        /// </summary>
        /// <param name="content">HTML content of the country profile data</param>
        /// <returns></returns>
        public static List <ProfileEntity> ParseProfileData(string content)
        {
            List <ProfileEntity> entities = new List <ProfileEntity>();
            var parser = new HtmlParser();
            var doc    = parser.ParseDocument(content);
            //Category Lists
            var cat_lis = doc.QuerySelectorAll("li").Where(x =>
                                                           x.HasAttribute("id") && x.GetAttribute("id").EndsWith("-category-section-anchor"));

            foreach (var cat_li in cat_lis)
            {
                var           catElem  = cat_li.Children.Where(x => x.LocalName.ToLower() == "div" && x.HasAttribute("sectiontitle")).First();
                ProfileEntity Category = new ProfileEntity()
                {
                    EntityType = ProfileEntityType.Category,
                    Key        = catElem.GetAttribute("sectiontitle"),
                    Children   = new List <ProfileEntity> ()
                };
                var field_divs = doc.QuerySelectorAll("div").Where(x =>
                                                                   x.LocalName.ToLower() == "div" && x.HasAttribute("id") &&
                                                                   x.GetAttribute("id").StartsWith($"field-anchor-{Category.Key.Trim().Replace(" ", "-").ToLower()}-"));
                foreach (var field_div in field_divs)
                {
                    string fieldname = field_div.GetAttribute("id").Trim();
                    fieldname = fieldname.Replace($"-anchor-{Category.Key.Trim().Replace(" ", "-").ToLower()}", "");
                    ProfileEntity Field = new ProfileEntity()
                    {
                        EntityType = ProfileEntityType.Field,
                        Key        = field_div.TextContent.Trim(new char[] { ':' }).Replace(":", "").Trim(),
                        Children   = new List <ProfileEntity> ()
                    };

                    //Get field content div
                    var field_content_div = doc.QuerySelectorAll("div").Where(x => x.HasAttribute("id") && x.GetAttribute("id").Trim() == fieldname).First();
                    //Iterate through sub-fields
                    //--------------------------
                    //1.0 Check for Notes - directly for the field
                    var notes = field_content_div.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class").Trim() == "category_data note");
                    foreach (var note in notes)
                    {
                        Field.Note += note.TextContent.Replace("note:", "").Trim() + "\n";
                    }
                    //2.0 Go through subfields
                    foreach (var subfield in field_content_div.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class").Contains("subfield") &&
                                                                              !x.GetAttribute("class").Contains("note")))
                    {
                        //2.1 Check if the sub-filed has a direct value
                        if (subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class").Contains("subfield")).Count() == 0)
                        {
                            Field.Value += subfield.TextContent + "\n";
                        }
                        else //3.2 Contains sub-fields
                        {
                            ProfileEntity SubField = new ProfileEntity()
                            {
                                EntityType = ProfileEntityType.SubField
                            };
                            //3.2.1 Handle historic fields
                            if (subfield.GetAttribute("class").Contains("historic"))
                            {
                                if (subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class").Contains("subfield-name")).Count() != 0) //Sub Field is Numeric
                                {
                                    SubField.Key = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-name").First().TextContent.Replace(":", "").Trim();
                                }
                                else //Historic sub field with no key
                                {
                                    SubField.Key = "*";
                                }
                                SubField.IsHistoricEntity = true;
                                SubField.Value            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-number").FirstOrDefault()?.TextContent ?? "";
                                SubField.Note             = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-note").FirstOrDefault()?.TextContent ?? "";
                                SubField.Date             = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-date").FirstOrDefault()?.TextContent ?? "";
                            }
                            //3.2.2 Handle numeric fields
                            else if (subfield.GetAttribute("class").Contains("numeric"))
                            {
                                if (subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class").Contains("subfield-name")).Count() != 0) //Sub Field is Numeric
                                {
                                    SubField.IsNumericEntity = true;
                                    SubField.Key             = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-name").First().TextContent.Replace(":", "").Trim();
                                    SubField.Value           = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-number").FirstOrDefault()?.TextContent ?? "";
                                    SubField.Note            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-note").FirstOrDefault()?.TextContent ?? "";
                                    SubField.Date            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-date").FirstOrDefault()?.TextContent ?? "";
                                }
                                else //Parent field is numeric
                                {
                                    Field.IsNumericEntity = true;
                                    Field.Value           = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-number").FirstOrDefault()?.TextContent ?? "";
                                    Field.Note            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-note").FirstOrDefault()?.TextContent ?? "";
                                    Field.Date            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-date").FirstOrDefault()?.TextContent ?? "";
                                    continue;
                                }
                            }
                            //3.3.4 Handle grouped subfield case
                            else if (subfield.GetAttribute("class").Contains("grouped_subfield"))
                            {
                                if (subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class").Contains("subfield-name")).Count() != 0) //Sub Field is Numeric
                                {
                                    SubField.IsGroupedEntity = true;
                                    SubField.Key             = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-name").First().TextContent.Replace(":", "").Trim();
                                    SubField.Value           = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-number").First().TextContent;
                                    SubField.Note            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-note").FirstOrDefault()?.TextContent ?? "";
                                    SubField.Date            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-date").FirstOrDefault()?.TextContent ?? "";
                                }
                                else //Parent field is grouped
                                {
                                    Field.IsGroupedEntity = true;
                                    Field.Value           = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-number").First().TextContent;
                                    Field.Note            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-note").FirstOrDefault()?.TextContent ?? "";
                                    Field.Date            = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-date").FirstOrDefault()?.TextContent ?? "";
                                    continue;
                                }
                            }
                            //3.3.4 Handle text cases
                            else if (subfield.GetAttribute("class").Contains("text"))
                            {
                                if (subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class").Contains("subfield-name")).Count() != 0) //Is a sub filed
                                {
                                    var keyNode = subfield.Children.Where(x => x.GetAttribute("class") == "subfield-name").First();
                                    SubField.Key = keyNode.TextContent.Trim(new char[] { ':', ' ' });
                                    subfield.RemoveChild(keyNode);
                                    SubField.Value = subfield.TextContent.Trim();
                                }
                                else
                                {
                                    string note = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-note").FirstOrDefault()?.TextContent.Trim() ?? "";
                                    string date = subfield.Children.Where(x => x.HasAttribute("class") && x.GetAttribute("class") == "subfield-date").FirstOrDefault()?.TextContent.Trim() ?? "";
                                    string val  = subfield.TextContent.Trim();
                                    if (note != string.Empty)
                                    {
                                        val.Replace(note, "");
                                    }
                                    if (date != string.Empty)
                                    {
                                        val.Replace(date, "");
                                    }
                                    Field.Value += val + "\n";
                                    Field.Note  += note + "\n";
                                    Field.Date  += date + "\n";
                                    continue;
                                }
                            }
                            SubField.Value = SubField.Value.Trim();
                            SubField.Note  = SubField.Note.Trim();
                            SubField.Date  = SubField.Date.Trim();
                            Field.Children.Add(SubField);
                        }
                    }
                    //Check for country comparison for the field
                    AngleSharp.Dom.IElement compareField = null;
                    if ((compareField = field_content_div.Children.Where(x => x.LocalName.ToLower() == "div" && !x.HasAttribute("class") && x.TextContent.ToLower().Contains("country comparison to the world")).FirstOrDefault()) != null)
                    {
                        int rank = 0;
                        if (int.TryParse(compareField.Children.Where(x => x.GetAttribute("class") == "category_data").FirstOrDefault()?.TextContent ?? "".Trim(), out rank))
                        {
                            Field.ComparisonRank = rank;
                        }
                    }
                    Field.Value = Field.Value.Trim();
                    Field.Note  = Field.Note.Trim();
                    Field.Date  = Field.Date.Trim();
                    Category.Children.Add(Field);
                }
                entities.Add(Category);
            }
            return(entities);
        }