public void Download(DownloadServiceOption options)
        {
            var worker = new WebRequestWorker();

            foreach (var urlAndPattern in options.UrlsAndPatterns)
            {
                var uri = urlAndPattern.Key;
                var configFile = urlAndPattern.Value;

                Console.WriteLine("Start processing " + uri);

                var categoryResult = worker.DownloadResponse(new CrawlingOption(uri));

                if (categoryResult.StatusCode == HttpStatusCode.OK)
                {
                    var configs = SerializationHelper.DeserializeFrom<List<DataItem>>(configFile);
                    var pageLinkPattern = configs.Get(Constants.PagePattern).Value;
                    var fileLinkPattern = configs.Get(Constants.FilePattern).Value;

                    var categoryPages = GetArticleLinks(categoryResult.ReadAsText(), pageLinkPattern);
                    ProcessAllPages(categoryPages, fileLinkPattern, options.TargetFolder);
                }
                else
                {
                    Console.WriteLine(categoryResult.StatusDescription);
                }
            }
        }
示例#2
0
        public static IHtmlString GetRemoteContent(this HtmlHelper html, string fileUri)
        {
            if (string.IsNullOrWhiteSpace(fileUri))
            {
                return MvcHtmlString.Empty;
            }

            var fileContent = new WebRequestWorker().DownloadResponse(new CrawlingOption(fileUri)).ReadAsText();
            return html.Raw(fileContent);
        }