private Task<bool> ArticleToArticleModel(NzzArticle na, ArticleModel am) { return ExecuteSafe(async () => { am.Content.Clear(); for (int i = 0; i < na.body.Length; i++) { if (na.body[i].style == "h4") na.body[i].style = "h2"; if (na.body[i].style == "h3") na.body[i].style = "h1"; string starttag = "<" + na.body[i].style + ">"; string endtag = "</" + na.body[i].style + ">"; if (string.IsNullOrWhiteSpace(na.body[i].text)) { foreach (var nzzBox in na.body[i].boxes) { if (nzzBox.type == "image") { var uri = ParseImageUri(nzzBox.path); if (uri != null) am.Content.Add(new ImageContentModel() { Url = uri, Text = TextHelper.TextToTextModel(nzzBox.caption) }); } else if (nzzBox.type == "video" || nzzBox.type == "html") { //dont do shit } else if (nzzBox.type == "infobox") { var newContent = HtmlConverter.CreateOnce(am.Feed.Source.PublicBaseUrl).HtmlToParagraph("<p>" + nzzBox.body + "</p>"); foreach (var paragraphModel in newContent) { var ntm = new TextModel() { Children = paragraphModel.Children, TextType = TextType.Cursive }; paragraphModel.Children = new List<TextModel> { ntm }; } if (!string.IsNullOrWhiteSpace(nzzBox.title)) newContent.Insert(0, new ParagraphModel() { ParagraphType = ParagraphType.Title, Children = new List<TextModel>() { new TextModel() { Text = nzzBox.title, TextType = TextType.Cursive } } }); if (newContent.Any()) am.Content.Add(new TextContentModel() { Content = newContent }); } else LogHelper.Instance.LogInfo("nzz content type not found: " + nzzBox.mimeType, this); } } else { if (!na.body[i].text.StartsWith("Mehr zum Thema")) { var content = HtmlConverter.CreateOnce(am.Feed.Source.PublicBaseUrl).HtmlToParagraph(starttag + na.body[i].text + endtag); if (content != null && content.Count > 0) am.Content.Add(new TextContentModel() { Content = content }); } } } if (!am.Content.Any()) am.Content.Add(TextHelper.TextToTextModel("Der Inhalt dieses Artikels wird nicht unterstützt. Öffne den Artikel im Browser um mehr zu sehen.")); if (na.authors != null) foreach (var nzzAuthor in na.authors) { if (!string.IsNullOrEmpty(nzzAuthor.name)) { am.Author = nzzAuthor.name; if (!string.IsNullOrEmpty(nzzAuthor.abbreviation)) am.Author += ", " + nzzAuthor.abbreviation; } else am.Author = nzzAuthor.abbreviation; } if (!string.IsNullOrEmpty(na.agency)) am.Author += " " + na.agency; if (string.IsNullOrWhiteSpace(am.Author)) am.Author = "NZZ"; if (!string.IsNullOrEmpty(na.leadText)) am.Teaser = na.leadText; am.Themes.Clear(); await AddThemesAsync(am, na.departments); return true; }); }
private static void AddInlineChildren(Span span, TextModel model) { if (model.Children != null) { foreach (var textModel in model.Children) { var span2 = RenderTextContent(textModel); if (span2 != null) span.Inlines.Add(span2); } } }
private static Span RenderTextContent(TextModel text) { if (text.TextType == TextType.Bold) { var span = new Bold(); if (!string.IsNullOrWhiteSpace(text.Text)) span.Inlines.Add(new Run() { Text = text.Text }); AddInlineChildren(span, text); foreach (var inline in span.Inlines) { inline.FontWeight = FontWeights.Bold; } return span; } if (text.TextType == TextType.Cursive) { var span = new Span(); if (!string.IsNullOrWhiteSpace(text.Text)) span.Inlines.Add(new Run() { Text = text.Text }); AddInlineChildren(span, text); foreach (var inline in span.Inlines) { inline.FontStyle = FontStyle.Italic; } return span; } if (text.TextType == TextType.Hyperlink) { var span = new Hyperlink() { NavigateUri = new Uri(text.Text) }; AddInlineChildren(span, text); return span; } if (text.TextType == TextType.Underline) { var span = new Underline(); if (!string.IsNullOrWhiteSpace(text.Text)) span.Inlines.Add(new Run() { Text = text.Text }); AddInlineChildren(span, text); return span; } else//(text.TextType == TextType.Normal) { var span = new Span(); span.Inlines.Add( new Run() { Text = text.Text }); AddInlineChildren(span, text); return span; } }
private static List<SpritzWord> ToSpritzWords(TextModel model) { var words = new List<SpritzWord>(); if (model.TextType == TextType.Hyperlink) { var list = ToSpritzWords(model.Children); words.AddRange(list); } else { string[] splitresult = model.Text?.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); if (splitresult != null) { var wordlist = new List<string>(splitresult); for (int i = 0; i < wordlist.Count; i++) { if (wordlist[i].Contains("-") && wordlist[i].IndexOf("-", StringComparison.Ordinal) != wordlist[i].Length - 1 && !Regex.IsMatch(wordlist[i], "{0-9}-{0-9}")) { int index = wordlist[i].IndexOf("-", StringComparison.Ordinal) + 1; wordlist.Insert(i + 1, wordlist[i].Substring(index)); wordlist[i] = wordlist[i].Substring(0, index); } //ab 14 muss getrennt werden, versuche dann, das word zu splitten if (wordlist[i].Length > 13) { //wenn wort länger als 13 + 13 werden einfach die ersten 13 buchstaben genommen if (wordlist[i].Length > 26) { wordlist.Insert(i + 1, wordlist[i].Substring(13)); wordlist[i] = wordlist[i].Substring(0, 13); } else { //wordlist wird zweigeteilt int count = wordlist[i].Length / 2; wordlist.Insert(i + 1, wordlist[i].Substring(count)); wordlist[i] = wordlist[i].Substring(0, count); } } var sw = new SpritzWord(); if (wordlist[i].Length == 1) sw.Middle = wordlist[i][0]; else if (wordlist[i].Length >= 2 && wordlist[i].Length <= 5) { sw.Before = wordlist[i][0].ToString(); sw.Middle = wordlist[i][1]; sw.After = wordlist[i].Substring(2); } else if (wordlist[i].Length >= 6 && wordlist[i].Length <= 9) { sw.Before = wordlist[i].Substring(0, 2); sw.Middle = wordlist[i][2]; sw.After = wordlist[i].Substring(3); } else //(wordlist[i].Length >= 10) { sw.Before = wordlist[i].Substring(0, 3); sw.Middle = wordlist[i][3]; sw.After = wordlist[i].Substring(4); } if (sw.After != null) { if (sw.After.Contains(".")) { sw.Lenght = 4; words.Add(sw); words.Add(new SpritzWord() { Lenght = 5 }); } else if (sw.After.Contains(";") || sw.After.Contains(",") || sw.After.Contains(":") || sw.Middle == '-') { sw.Lenght = 4; words.Add(sw); } else { sw.Lenght = 1; words.Add(sw); } } else { sw.Lenght = 1; words.Add(sw); } } } } return words; }
private TextModel ParseText(HtmlNode parentNode) { var model = new TextModel(); var texts = new[] { "h1", "h2", "h3", "h4", "p" }; var bolds = new[] { "b", "strong", "em" }; var cursives = new[] { "i" }; var underlines = new[] { "u" }; var hyperlink = new[] { "a" }; if (!parentNode.ChildNodes.Any() && parentNode.NodeType == HtmlNodeType.Text) { model.Text = TextHelper.NormalizeString(TextHelper.StripHtml(parentNode.InnerText)); if (string.IsNullOrWhiteSpace(model.Text)) return null; return model; } if (texts.Any(predicate => predicate == parentNode.Name)) model.TextType = TextType.Normal; else if (bolds.Any(predicate => predicate == parentNode.Name)) model.TextType = TextType.Bold; else if (cursives.Any(predicate => predicate == parentNode.Name)) model.TextType = TextType.Cursive; else if (underlines.Any(predicate => predicate == parentNode.Name)) model.TextType = TextType.Underline; else if (hyperlink.Any(predicate => predicate == parentNode.Name)) { model.TextType = TextType.Hyperlink; model.Text = TextHelper.NormalizeString(parentNode.Attributes["href"]?.Value); if (string.IsNullOrWhiteSpace(model.Text)) model.TextType = TextType.Normal; else { if (model.Text.StartsWith("www")) model.Text = "http://" + model.Text; if (!model.Text.StartsWith("http://")) { if (model.Text.StartsWith("/")) model.Text = _baseUrl + model.Text.Substring(1); else model.Text = _baseUrl + model.Text; } if (!Uri.IsWellFormedUriString(model.Text, UriKind.Absolute)) { //todo: do additional repair stuff //parse utf8 caracters like: http://www.ragnar%C3%B6k-spektakel.ch model.Text = _baseUrl; } } } else return null; //shortcut for once node stuff if (parentNode.ChildNodes.Count() == 1 && parentNode.ChildNodes.FirstOrDefault().NodeType == HtmlNodeType.Text && model.TextType != TextType.Hyperlink) { model.Text = TextHelper.NormalizeString(parentNode.ChildNodes.FirstOrDefault().InnerText.Trim()); if (string.IsNullOrWhiteSpace(model.Text)) return null; return model; } foreach (var node in parentNode.ChildNodes) { var tm = ParseText(node); if (tm != null) model.Children.Add(tm); } if (model.TextType == TextType.Hyperlink && model.Children.Count == 0) return null; return !string.IsNullOrEmpty(model.Text) || model.Children.Any() ? model : null; }
private void CollapseModelsIfNecessary(TextModel model, List<TextType> knownTextTypes) { while (model.Children.Count == 1 && knownTextTypes.Contains(model.TextType)) { model.Text = model.Children[0].Text; model.TextType = model.Children[0].TextType; model.Children = model.Children[0].Children; } knownTextTypes.Add(model.TextType); foreach (var textModel in model.Children) { CollapseModelsIfNecessary(textModel, knownTextTypes); } }