private async Task SaveArticleToFile(Article article , string content, IProgress<DownloadStringTaskAsyncExProgress> progress) { if (progress != null) { progress.Report(new DownloadStringTaskAsyncExProgress() { Text = string.Format("{0} [Save] {1} ({2}) {0}" , Environment.NewLine , article.Title , article.URL) }); } var filePath = Path.Combine(BaseFolder, article.Title.ToValidFileName() + ".htm"); await SaveToFileAsync(content, filePath); }
public abstract Task<bool> ExtractArticleContent(Article article , IProgress<DownloadStringTaskAsyncExProgress> progress);
private async Task<List<Catalog>> ExtractCatalogsFromWebPage(string url) { List<Catalog> reusltCatalogs = new List<Catalog>(); var uri = new Uri(url); var browser1 = new ScrapingBrowser(); browser1.Encoding = Encoding.UTF8; var html1 = browser1.DownloadString(uri); var doc = new HtmlDocument(); doc.LoadHtml(html1); var html = doc.DocumentNode; foreach (var script in doc.DocumentNode.Descendants("script").ToArray()) { script.Remove(); } foreach (var style in doc.DocumentNode.Descendants("style").ToArray()) { style.Remove(); } foreach (var comment in doc.DocumentNode.SelectNodes("//comment()").ToArray()) { comment.Remove(); } var days = html.CssSelect("div.day"); if (!days.Any()) { days = html.CssSelect("div#container >div#wrapper >div#content"); } foreach (var day in days) { var catalog = new Catalog(); catalog.IsChecked = true; var title = day.CssSelect("div.dayTitle").FirstOrDefault(); if(title!=null) { catalog.Title = title.InnerText.ClearNotWords(); var atricles = day.CssSelect("div.postTitle"); foreach (var atricle in atricles) { var article = new Article(); article.Title = atricle.InnerText.ClearNotWords(); var articleTitleEl = atricle.CssSelect("a.postTitle2"); article.URL = articleTitleEl.First().Attributes["href"].Value; catalog.Articles.Add(article); } } else { catalog.Title = "CataLog"+DateTime.Now.ToShortTimeString(); var atricles = day.CssSelect("div.post"); foreach (var atricle in atricles) { var article = new Article(); var articleTitleEl = atricle.CssSelect("a.PostTitle"); article.Title = articleTitleEl.First().InnerText.ClearNotWords(); article.URL = articleTitleEl.First().Attributes["href"].Value; catalog.Articles.Add(article); } } reusltCatalogs.Add(catalog); } return reusltCatalogs; }
/// <summary> /// Initializes a new instance of the ArchiveViewModel class. /// </summary> public ArticleViewModel(Article article) { CurrentEntity = article; }
public override async Task<bool> ExtractArticleContent(Article article ,IProgress<DownloadStringTaskAsyncExProgress> progress ) { if (article.IsLoaded) { return true; } web.URL = article.URL; if (progress != null) { progress.Report(new DownloadStringTaskAsyncExProgress() { Text = string.Format("{0} [Load] {1} ({2}) {0}" , Environment.NewLine, article.Title , article.URL) }); } string content = await Task.Run(() => web.Get()).ConfigureAwait(false); Match match = reg_con.Match(content); if (match.Success) { content = match.Groups[1].Value.Trim(); article.Content= htmlString.Replace("{0}", article.Title).Replace("\n{1}", content); article.IsLoaded=true; } return match.Success; }