public void Download(DownloadServiceOption options) { var worker = new WebRequestWorker(); foreach (var urlAndPattern in options.UrlsAndPatterns) { var uri = urlAndPattern.Key; var configFile = urlAndPattern.Value; Console.WriteLine("Start processing " + uri); var categoryResult = worker.DownloadResponse(new CrawlingOption(uri)); if (categoryResult.StatusCode == HttpStatusCode.OK) { var configs = SerializationHelper.DeserializeFrom<List<DataItem>>(configFile); var pageLinkPattern = configs.Get(Constants.PagePattern).Value; var fileLinkPattern = configs.Get(Constants.FilePattern).Value; var categoryPages = GetArticleLinks(categoryResult.ReadAsText(), pageLinkPattern); ProcessAllPages(categoryPages, fileLinkPattern, options.TargetFolder); } else { Console.WriteLine(categoryResult.StatusDescription); } } }
public static IHtmlString GetRemoteContent(this HtmlHelper html, string fileUri) { if (string.IsNullOrWhiteSpace(fileUri)) { return MvcHtmlString.Empty; } var fileContent = new WebRequestWorker().DownloadResponse(new CrawlingOption(fileUri)).ReadAsText(); return html.Raw(fileContent); }