Example #1
0
        private Task<bool> ArticleToArticleModel(NzzArticle na, ArticleModel am)
        {
            return ExecuteSafe(async () =>
            {
                am.Content.Clear();
                for (int i = 0; i < na.body.Length; i++)
                {
                    if (na.body[i].style == "h4")
                        na.body[i].style = "h2";
                    if (na.body[i].style == "h3")
                        na.body[i].style = "h1";
                    string starttag = "<" + na.body[i].style + ">";
                    string endtag = "</" + na.body[i].style + ">";
                    if (string.IsNullOrWhiteSpace(na.body[i].text))
                    {
                        foreach (var nzzBox in na.body[i].boxes)
                        {
                            if (nzzBox.type == "image")
                            {
                                var uri = ParseImageUri(nzzBox.path);
                                if (uri != null)
                                    am.Content.Add(new ImageContentModel()
                                    {
                                        Url = uri,
                                        Text = TextHelper.TextToTextModel(nzzBox.caption)
                                    });
                            }
                            else if (nzzBox.type == "video" || nzzBox.type == "html")
                            {
                                //dont do shit
                            }
                            else if (nzzBox.type == "infobox")
                            {
                                var newContent = HtmlConverter.CreateOnce(am.Feed.Source.PublicBaseUrl).HtmlToParagraph("<p>" + nzzBox.body + "</p>");

                                foreach (var paragraphModel in newContent)
                                {
                                    var ntm = new TextModel()
                                    {
                                        Children = paragraphModel.Children,
                                        TextType = TextType.Cursive
                                    };
                                    paragraphModel.Children = new List<TextModel> { ntm };
                                }
                                if (!string.IsNullOrWhiteSpace(nzzBox.title))
                                    newContent.Insert(0, new ParagraphModel()
                                    {
                                        ParagraphType = ParagraphType.Title,
                                        Children = new List<TextModel>()
                                    {
                                        new TextModel()
                                        {
                                            Text = nzzBox.title,
                                            TextType = TextType.Cursive
                                        }
                                    }
                                    });
                                if (newContent.Any())
                                    am.Content.Add(new TextContentModel()
                                    {
                                        Content = newContent
                                    });
                            }
                            else
                                LogHelper.Instance.LogInfo("nzz content type not found: " + nzzBox.mimeType, this);
                        }
                    }
                    else
                    {
                        if (!na.body[i].text.StartsWith("Mehr zum Thema"))
                        {
                            var content = HtmlConverter.CreateOnce(am.Feed.Source.PublicBaseUrl).HtmlToParagraph(starttag + na.body[i].text + endtag);
                            if (content != null && content.Count > 0)
                                am.Content.Add(new TextContentModel()
                                {
                                    Content = content
                                });
                        }
                    }
                }

                if (!am.Content.Any())
                    am.Content.Add(TextHelper.TextToTextModel("Der Inhalt dieses Artikels wird nicht unterstützt. Öffne den Artikel im Browser um mehr zu sehen."));

                if (na.authors != null)
                    foreach (var nzzAuthor in na.authors)
                    {
                        if (!string.IsNullOrEmpty(nzzAuthor.name))
                        {
                            am.Author = nzzAuthor.name;
                            if (!string.IsNullOrEmpty(nzzAuthor.abbreviation))
                                am.Author += ", " + nzzAuthor.abbreviation;
                        }
                        else
                            am.Author = nzzAuthor.abbreviation;
                    }

                if (!string.IsNullOrEmpty(na.agency))
                    am.Author += " " + na.agency;

                if (string.IsNullOrWhiteSpace(am.Author))
                    am.Author = "NZZ";

                if (!string.IsNullOrEmpty(na.leadText))
                    am.Teaser = na.leadText;

                am.Themes.Clear();
                await AddThemesAsync(am, na.departments);

                return true;
            });
        }
Example #2
0
 private static void AddInlineChildren(Span span, TextModel model)
 {
     if (model.Children != null)
     {
         foreach (var textModel in model.Children)
         {
             var span2 = RenderTextContent(textModel);
             if (span2 != null)
                 span.Inlines.Add(span2);
         }
     }
 }
Example #3
0
 private static Span RenderTextContent(TextModel text)
 {
     if (text.TextType == TextType.Bold)
     {
         var span = new Bold();
         if (!string.IsNullOrWhiteSpace(text.Text))
             span.Inlines.Add(new Run()
             {
                 Text = text.Text
             });
         AddInlineChildren(span, text);
         foreach (var inline in span.Inlines)
         {
             inline.FontWeight = FontWeights.Bold;
         }
         return span;
     }
     if (text.TextType == TextType.Cursive)
     {
         var span = new Span();
         if (!string.IsNullOrWhiteSpace(text.Text))
             span.Inlines.Add(new Run()
             {
                 Text = text.Text
             });
         AddInlineChildren(span, text);
         foreach (var inline in span.Inlines)
         {
             inline.FontStyle = FontStyle.Italic;
         }
         return span;
     }
     if (text.TextType == TextType.Hyperlink)
     {
         var span = new Hyperlink()
         {
             NavigateUri = new Uri(text.Text)
         };
         AddInlineChildren(span, text);
         return span;
     }
     if (text.TextType == TextType.Underline)
     {
         var span = new Underline();
         if (!string.IsNullOrWhiteSpace(text.Text))
             span.Inlines.Add(new Run()
             {
                 Text = text.Text
             });
         AddInlineChildren(span, text);
         return span;
     }
     else//(text.TextType == TextType.Normal)
     {
         var span = new Span();
         span.Inlines.Add(
             new Run()
             {
                 Text = text.Text
             });
         AddInlineChildren(span, text);
         return span;
     }
 }
Example #4
0
        private static List<SpritzWord> ToSpritzWords(TextModel model)
        {
            var words = new List<SpritzWord>();

            if (model.TextType == TextType.Hyperlink)
            {
                var list = ToSpritzWords(model.Children);
                words.AddRange(list);
            }
            else
            {
                string[] splitresult = model.Text?.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                if (splitresult != null)
                {
                    var wordlist = new List<string>(splitresult);

                    for (int i = 0; i < wordlist.Count; i++)
                    {
                        if (wordlist[i].Contains("-") && wordlist[i].IndexOf("-", StringComparison.Ordinal) != wordlist[i].Length - 1 && !Regex.IsMatch(wordlist[i], "{0-9}-{0-9}"))
                        {
                            int index = wordlist[i].IndexOf("-", StringComparison.Ordinal) + 1;
                            wordlist.Insert(i + 1, wordlist[i].Substring(index));
                            wordlist[i] = wordlist[i].Substring(0, index);
                        }

                        //ab 14 muss getrennt werden, versuche dann, das word zu splitten
                        if (wordlist[i].Length > 13)
                        {
                            //wenn wort länger als 13 + 13 werden einfach die ersten 13 buchstaben genommen
                            if (wordlist[i].Length > 26)
                            {
                                wordlist.Insert(i + 1, wordlist[i].Substring(13));
                                wordlist[i] = wordlist[i].Substring(0, 13);
                            }
                            else
                            {
                                //wordlist wird zweigeteilt
                                int count = wordlist[i].Length / 2;
                                wordlist.Insert(i + 1, wordlist[i].Substring(count));
                                wordlist[i] = wordlist[i].Substring(0, count);
                            }
                        }

                        var sw = new SpritzWord();
                        if (wordlist[i].Length == 1)
                            sw.Middle = wordlist[i][0];
                        else if (wordlist[i].Length >= 2 && wordlist[i].Length <= 5)
                        {
                            sw.Before = wordlist[i][0].ToString();
                            sw.Middle = wordlist[i][1];
                            sw.After = wordlist[i].Substring(2);
                        }
                        else if (wordlist[i].Length >= 6 && wordlist[i].Length <= 9)
                        {
                            sw.Before = wordlist[i].Substring(0, 2);
                            sw.Middle = wordlist[i][2];
                            sw.After = wordlist[i].Substring(3);
                        }
                        else //(wordlist[i].Length >= 10)
                        {
                            sw.Before = wordlist[i].Substring(0, 3);
                            sw.Middle = wordlist[i][3];
                            sw.After = wordlist[i].Substring(4);
                        }
                        if (sw.After != null)
                        {
                            if (sw.After.Contains("."))
                            {
                                sw.Lenght = 4;
                                words.Add(sw);
                                words.Add(new SpritzWord() { Lenght = 5 });
                            }
                            else if (sw.After.Contains(";") || sw.After.Contains(",") || sw.After.Contains(":") || sw.Middle == '-')
                            {
                                sw.Lenght = 4;
                                words.Add(sw);
                            }
                            else
                            {
                                sw.Lenght = 1;
                                words.Add(sw);
                            }
                        }
                        else
                        {
                            sw.Lenght = 1;
                            words.Add(sw);
                        }
                    }
                }
            }
            return words;
        }
Example #5
0
        private TextModel ParseText(HtmlNode parentNode)
        {
            var model = new TextModel();
            var texts = new[] { "h1", "h2", "h3", "h4", "p" };
            var bolds = new[] { "b", "strong", "em" };
            var cursives = new[] { "i" };
            var underlines = new[] { "u" };
            var hyperlink = new[] { "a" };

            if (!parentNode.ChildNodes.Any() && parentNode.NodeType == HtmlNodeType.Text)
            {
                model.Text = TextHelper.NormalizeString(TextHelper.StripHtml(parentNode.InnerText));
                if (string.IsNullOrWhiteSpace(model.Text))
                    return null;
                return model;
            }

            if (texts.Any(predicate => predicate == parentNode.Name))
                model.TextType = TextType.Normal;
            else if (bolds.Any(predicate => predicate == parentNode.Name))
                model.TextType = TextType.Bold;
            else if (cursives.Any(predicate => predicate == parentNode.Name))
                model.TextType = TextType.Cursive;
            else if (underlines.Any(predicate => predicate == parentNode.Name))
                model.TextType = TextType.Underline;
            else if (hyperlink.Any(predicate => predicate == parentNode.Name))
            {
                model.TextType = TextType.Hyperlink;
                model.Text = TextHelper.NormalizeString(parentNode.Attributes["href"]?.Value);

                if (string.IsNullOrWhiteSpace(model.Text))
                    model.TextType = TextType.Normal;
                else
                {
                    if (model.Text.StartsWith("www"))
                        model.Text = "http://" + model.Text;

                    if (!model.Text.StartsWith("http://"))
                    {
                        if (model.Text.StartsWith("/"))
                            model.Text = _baseUrl + model.Text.Substring(1);
                        else
                            model.Text = _baseUrl + model.Text;
                    }

                    if (!Uri.IsWellFormedUriString(model.Text, UriKind.Absolute))
                    {
                        //todo: do additional repair stuff
                        //parse utf8 caracters like: http://www.ragnar%C3%B6k-spektakel.ch
                        model.Text = _baseUrl;
                    }
                }
            }
            else
                return null;

            //shortcut for once node stuff
            if (parentNode.ChildNodes.Count() == 1 && parentNode.ChildNodes.FirstOrDefault().NodeType == HtmlNodeType.Text && model.TextType != TextType.Hyperlink)
            {
                model.Text = TextHelper.NormalizeString(parentNode.ChildNodes.FirstOrDefault().InnerText.Trim());
                if (string.IsNullOrWhiteSpace(model.Text))
                    return null;
                return model;
            }

            foreach (var node in parentNode.ChildNodes)
            {
                var tm = ParseText(node);
                if (tm != null)
                    model.Children.Add(tm);
            }

            if (model.TextType == TextType.Hyperlink && model.Children.Count == 0)
                return null;

            return !string.IsNullOrEmpty(model.Text) || model.Children.Any() ? model : null;
        }
Example #6
0
        private void CollapseModelsIfNecessary(TextModel model, List<TextType> knownTextTypes)
        {
            while (model.Children.Count == 1 && knownTextTypes.Contains(model.TextType))
            {
                model.Text = model.Children[0].Text;
                model.TextType = model.Children[0].TextType;
                model.Children = model.Children[0].Children;
            }

            knownTextTypes.Add(model.TextType);
            foreach (var textModel in model.Children)
            {
                CollapseModelsIfNecessary(textModel, knownTextTypes);
            }
        }