private BorgerDkElement[] ParseElements(string content) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(content); htmlDocument.OptionOutputAsXml = false; List <BorgerDkElement> elements = new List <BorgerDkElement>(); foreach (HtmlNode node in htmlDocument.DocumentNode.ChildNodes) { if (node is HtmlTextNode || node.Attributes["id"] == null) { continue; } string id = node.Attributes["id"].Value; if (id == "kernetekst") { BorgerDkBlockElement block = new BorgerDkBlockElement { Id = id }; List <BorgerDkMicroArticle> microArticles = new List <BorgerDkMicroArticle>(); foreach (HtmlNode child in node.ChildNodes) { if (child is HtmlTextNode || child.Attributes["id"] == null) { continue; } // Get the ID of the micro article string microId = child.Attributes["id"].Value.Replace("microArticle_", ""); HtmlNode[] children = GetNonTextChildren(child); // Trigger exception if empty if (children.Length == 0) { throw new Exception("What's happening? #1 (" + microId + ")"); } // Trigger exception if no <h2> (or <h3> - thanks for that change) if (children[0].Name != "h2" && children[0].Name != "h3") { throw new Exception("What's happening? #2 (" + microId + ")"); } // Get the title from the <h2> string title = children[0].InnerText; // Initialize a new micro article BorgerDkMicroArticle micro = new BorgerDkMicroArticle { Parent = block, Id = microId, Title = title.Trim(), TitleType = children[0].Name, Content = FixSimpleErrors(child.InnerHtml.Trim()) }; microArticles.Add(micro); } block.MicroArticles = microArticles.ToArray(); elements.Add(block); } else if (id == "byline") { BorgerDkTextElement element = new BorgerDkTextElement { Id = id, Title = "Skrevet af", Content = node.InnerText.Trim() }; // Add the element elements.Add(element); } else { HtmlNode[] children = GetNonTextChildren(node); // Handle if empty if (children.Length == 0) { // throw new Exception("What's happening? #1 (" + id + ")"); continue; } // Handle if no <h3> if (children[0].Name != "h3") { //throw new Exception("What's happening? #2 (" + id + ")"); continue; } // Get the title from the <h3> string title = children[0].InnerText; BorgerDkTextElement element = new BorgerDkTextElement { Id = id, Title = title, Content = FixSimpleErrors(node.InnerHtml) }; // Add the element elements.Add(element); } } return(elements.ToArray()); }
public static BorgerDkArticle GetFromArticle(BorgerDkService service, Article article, BorgerDkMunicipality municipality) { municipality = municipality ?? BorgerDkMunicipality.NoMunicipality; // Check if "service" or "article" is null if (service == null) { throw new ArgumentNullException("service"); } if (article == null) { throw new ArgumentNullException("article"); } BorgerDkArticle temp = new BorgerDkArticle { Id = article.ArticleID, Domain = service.Endpoint.Domain, Url = article.ArticleUrl.Split('?')[0], Municipality = municipality, Title = HttpUtility.HtmlDecode(article.ArticleTitle), Header = HttpUtility.HtmlDecode(article.ArticleHeader), Published = article.PublishingDate, Modified = article.LastUpdated, Content = article.Content }; HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(article.Content); htmlDocument.OptionOutputAsXml = false; List <BorgerDkElement> elements = new List <BorgerDkElement>(); foreach (HtmlNode node in htmlDocument.DocumentNode.ChildNodes) { if (node is HtmlTextNode || node.Attributes["id"] == null) { continue; } string id = node.Attributes["id"].Value; if (id == "kernetekst") { BorgerDkBlockElement block = new BorgerDkBlockElement { Type = "kernetekst" }; List <BorgerDkMicroArticle> microArticles = new List <BorgerDkMicroArticle>(); foreach (HtmlNode child in node.ChildNodes) { if (child is HtmlTextNode || child.Attributes["id"] == null) { continue; } // Get the ID of the micro article string microId = child.Attributes["id"].Value.Replace("microArticle_", ""); HtmlNode[] children = BorgerDkHelpers.GetNonTextChildren(child); // Trigger exception if empty if (children.Length == 0) { throw new Exception("What's happening? #1 (" + microId + ")"); } // Trigger exception if no <h2> (or <h3> - thanks for that change) if (children[0].Name != "h2" && children[0].Name != "h3") { throw new Exception("What's happening? #2 (" + microId + ")"); } // Get the title from the <h2> string title = children[0].InnerText; BorgerDkMicroArticle micro = new BorgerDkMicroArticle { Parent = block, Id = microId, Title = title.Trim(), TitleType = children[0].Name, Content = child.InnerHtml.Trim(), Children = ( from n in children let e = BorgerDkHelpers.ToXElement(n) where e.Attributes().Count() >= 0 && e.Value != "" select BorgerDkHelpers.CleanMicroArticle(e) ) }; microArticles.Add(micro); } block.MicroArticles = microArticles.ToArray(); elements.Add(block); } else if (id == "byline") { XElement xChild = BorgerDkHelpers.ToXElement(node); if (xChild.Elements().Count() == 1) { XElement xDiv = xChild.Element("div"); if (xDiv != null && !xDiv.Elements().Any()) { xDiv.Remove(); xChild.Add(xDiv.Value); } } temp.ByLine = node.InnerHtml.Trim(); BorgerDkTextElement element = new BorgerDkTextElement { Type = id, Title = "Skrevet af", Content = temp.ByLine, Children = new [] { xChild } }; // Add the element elements.Add(element); } else { HtmlNode[] children = BorgerDkHelpers.GetNonTextChildren(node); // Handle if empty if (children.Length == 0) { // throw new Exception("What's happening? #1 (" + id + ")"); continue; } // Handle if no <h3> if (children[0].Name != "h3") { //throw new Exception("What's happening? #2 (" + id + ")"); continue; } // Get the title from the <h3> string title = children[0].InnerText; BorgerDkTextElement element = new BorgerDkTextElement { Type = id, Title = title, Content = node.InnerHtml, Children = ( from child in children //where child.Name != "h3" let e = BorgerDkHelpers.ToXElement(child) where e.Attributes().Count() >= 0 && e.Value != "" select BorgerDkHelpers.CleanLists(e) ) }; // Add the element elements.Add(element); } } temp.Elements = elements.ToArray(); return(temp); }