Beispiel #1
0
        private BorgerDkElement[] ParseElements(string content)
        {
            HtmlDocument htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(content);
            htmlDocument.OptionOutputAsXml = false;

            List <BorgerDkElement> elements = new List <BorgerDkElement>();

            foreach (HtmlNode node in htmlDocument.DocumentNode.ChildNodes)
            {
                if (node is HtmlTextNode || node.Attributes["id"] == null)
                {
                    continue;
                }

                string id = node.Attributes["id"].Value;

                if (id == "kernetekst")
                {
                    BorgerDkBlockElement block = new BorgerDkBlockElement {
                        Id = id
                    };

                    List <BorgerDkMicroArticle> microArticles = new List <BorgerDkMicroArticle>();

                    foreach (HtmlNode child in node.ChildNodes)
                    {
                        if (child is HtmlTextNode || child.Attributes["id"] == null)
                        {
                            continue;
                        }

                        // Get the ID of the micro article
                        string microId = child.Attributes["id"].Value.Replace("microArticle_", "");

                        HtmlNode[] children = GetNonTextChildren(child);

                        // Trigger exception if empty
                        if (children.Length == 0)
                        {
                            throw new Exception("What's happening? #1 (" + microId + ")");
                        }

                        // Trigger exception if no <h2> (or <h3> - thanks for that change)
                        if (children[0].Name != "h2" && children[0].Name != "h3")
                        {
                            throw new Exception("What's happening? #2 (" + microId + ")");
                        }

                        // Get the title from the <h2>
                        string title = children[0].InnerText;

                        // Initialize a new micro article
                        BorgerDkMicroArticle micro = new BorgerDkMicroArticle {
                            Parent    = block,
                            Id        = microId,
                            Title     = title.Trim(),
                            TitleType = children[0].Name,
                            Content   = FixSimpleErrors(child.InnerHtml.Trim())
                        };

                        microArticles.Add(micro);
                    }

                    block.MicroArticles = microArticles.ToArray();

                    elements.Add(block);
                }
                else if (id == "byline")
                {
                    BorgerDkTextElement element = new BorgerDkTextElement {
                        Id      = id,
                        Title   = "Skrevet af",
                        Content = node.InnerText.Trim()
                    };

                    // Add the element
                    elements.Add(element);
                }
                else
                {
                    HtmlNode[] children = GetNonTextChildren(node);

                    // Handle if empty
                    if (children.Length == 0)
                    {
                        // throw new Exception("What's happening? #1 (" + id + ")");
                        continue;
                    }

                    // Handle if no <h3>
                    if (children[0].Name != "h3")
                    {
                        //throw new Exception("What's happening? #2 (" + id + ")");
                        continue;
                    }

                    // Get the title from the <h3>
                    string title = children[0].InnerText;

                    BorgerDkTextElement element = new BorgerDkTextElement {
                        Id      = id,
                        Title   = title,
                        Content = FixSimpleErrors(node.InnerHtml)
                    };

                    // Add the element
                    elements.Add(element);
                }
            }

            return(elements.ToArray());
        }
Beispiel #2
0
        public static BorgerDkArticle GetFromArticle(BorgerDkService service, Article article, BorgerDkMunicipality municipality)
        {
            municipality = municipality ?? BorgerDkMunicipality.NoMunicipality;

            // Check if "service" or "article" is null
            if (service == null)
            {
                throw new ArgumentNullException("service");
            }
            if (article == null)
            {
                throw new ArgumentNullException("article");
            }

            BorgerDkArticle temp = new BorgerDkArticle {
                Id           = article.ArticleID,
                Domain       = service.Endpoint.Domain,
                Url          = article.ArticleUrl.Split('?')[0],
                Municipality = municipality,
                Title        = HttpUtility.HtmlDecode(article.ArticleTitle),
                Header       = HttpUtility.HtmlDecode(article.ArticleHeader),
                Published    = article.PublishingDate,
                Modified     = article.LastUpdated,
                Content      = article.Content
            };

            HtmlDocument htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(article.Content);
            htmlDocument.OptionOutputAsXml = false;

            List <BorgerDkElement> elements = new List <BorgerDkElement>();

            foreach (HtmlNode node in htmlDocument.DocumentNode.ChildNodes)
            {
                if (node is HtmlTextNode || node.Attributes["id"] == null)
                {
                    continue;
                }

                string id = node.Attributes["id"].Value;

                if (id == "kernetekst")
                {
                    BorgerDkBlockElement block = new BorgerDkBlockElement {
                        Type = "kernetekst"
                    };

                    List <BorgerDkMicroArticle> microArticles = new List <BorgerDkMicroArticle>();

                    foreach (HtmlNode child in node.ChildNodes)
                    {
                        if (child is HtmlTextNode || child.Attributes["id"] == null)
                        {
                            continue;
                        }

                        // Get the ID of the micro article
                        string microId = child.Attributes["id"].Value.Replace("microArticle_", "");

                        HtmlNode[] children = BorgerDkHelpers.GetNonTextChildren(child);

                        // Trigger exception if empty
                        if (children.Length == 0)
                        {
                            throw new Exception("What's happening? #1 (" + microId + ")");
                        }

                        // Trigger exception if no <h2> (or <h3> - thanks for that change)
                        if (children[0].Name != "h2" && children[0].Name != "h3")
                        {
                            throw new Exception("What's happening? #2 (" + microId + ")");
                        }

                        // Get the title from the <h2>
                        string title = children[0].InnerText;

                        BorgerDkMicroArticle micro = new BorgerDkMicroArticle {
                            Parent    = block,
                            Id        = microId,
                            Title     = title.Trim(),
                            TitleType = children[0].Name,
                            Content   = child.InnerHtml.Trim(),
                            Children  = (
                                from n in children
                                let e = BorgerDkHelpers.ToXElement(n)
                                        where e.Attributes().Count() >= 0 && e.Value != ""
                                        select BorgerDkHelpers.CleanMicroArticle(e)
                                )
                        };

                        microArticles.Add(micro);
                    }

                    block.MicroArticles = microArticles.ToArray();

                    elements.Add(block);
                }
                else if (id == "byline")
                {
                    XElement xChild = BorgerDkHelpers.ToXElement(node);

                    if (xChild.Elements().Count() == 1)
                    {
                        XElement xDiv = xChild.Element("div");
                        if (xDiv != null && !xDiv.Elements().Any())
                        {
                            xDiv.Remove();
                            xChild.Add(xDiv.Value);
                        }
                    }

                    temp.ByLine = node.InnerHtml.Trim();

                    BorgerDkTextElement element = new BorgerDkTextElement {
                        Type     = id,
                        Title    = "Skrevet af",
                        Content  = temp.ByLine,
                        Children = new [] { xChild }
                    };

                    // Add the element
                    elements.Add(element);
                }
                else
                {
                    HtmlNode[] children = BorgerDkHelpers.GetNonTextChildren(node);

                    // Handle if empty
                    if (children.Length == 0)
                    {
                        // throw new Exception("What's happening? #1 (" + id + ")");
                        continue;
                    }

                    // Handle if no <h3>
                    if (children[0].Name != "h3")
                    {
                        //throw new Exception("What's happening? #2 (" + id + ")");
                        continue;
                    }

                    // Get the title from the <h3>
                    string title = children[0].InnerText;

                    BorgerDkTextElement element = new BorgerDkTextElement {
                        Type     = id,
                        Title    = title,
                        Content  = node.InnerHtml,
                        Children = (
                            from child in children
                            //where child.Name != "h3"
                            let e = BorgerDkHelpers.ToXElement(child)
                                    where e.Attributes().Count() >= 0 && e.Value != ""
                                    select BorgerDkHelpers.CleanLists(e)
                            )
                    };

                    // Add the element
                    elements.Add(element);
                }
            }

            temp.Elements = elements.ToArray();

            return(temp);
        }