Exemple #1
0
        public void Download(DownloadServiceOption options)
        {
            var worker = new WebRequestWorker();

            foreach (var urlAndPattern in options.UrlsAndPatterns)
            {
                var uri        = urlAndPattern.Key;
                var configFile = urlAndPattern.Value;

                Console.WriteLine("Start processing " + uri);

                var categoryResult = worker.DownloadResponse(new CrawlingOption(uri));

                if (categoryResult.StatusCode == HttpStatusCode.OK)
                {
                    var configs         = SerializationHelper.DeserializeFrom <List <DataItem> >(configFile);
                    var pageLinkPattern = configs.Get(Constants.PagePattern).Value;
                    var fileLinkPattern = configs.Get(Constants.FilePattern).Value;

                    var categoryPages = GetArticleLinks(categoryResult.ReadAsText(), pageLinkPattern);
                    ProcessAllPages(categoryPages, fileLinkPattern, options.TargetFolder);
                }
                else
                {
                    Console.WriteLine(categoryResult.StatusDescription);
                }
            }
        }
        public void Download(DownloadServiceOption options)
        {
            var worker = new WebRequestWorker();

            foreach (var urlAndPattern in options.UrlsAndPatterns)
            {
                var uri = urlAndPattern.Key;
                var configFile = urlAndPattern.Value;

                Console.WriteLine("Start processing " + uri);

                var categoryResult = worker.DownloadResponse(new CrawlingOption(uri));

                if (categoryResult.StatusCode == HttpStatusCode.OK)
                {
                    var configs = SerializationHelper.DeserializeFrom<List<DataItem>>(configFile);
                    var pageLinkPattern = configs.Get(Constants.PagePattern).Value;
                    var fileLinkPattern = configs.Get(Constants.FilePattern).Value;

                    var categoryPages = GetArticleLinks(categoryResult.ReadAsText(), pageLinkPattern);
                    ProcessAllPages(categoryPages, fileLinkPattern, options.TargetFolder);
                }
                else
                {
                    Console.WriteLine(categoryResult.StatusDescription);
                }
            }
        }
        public void CanDownloadFiles()
        {
            var service = new DhtnDownloadService();
            var options = new DownloadServiceOption()
            {
                TargetFolder    = @"D:\\Wip\\Practices\\OpenSource\\ts2015\\DHTN",
                UrlsAndPatterns = new Dictionary <string, string>()
                {
                    { "http://tuyensinh.tnu.edu.vn/article/details/363", @"/article/Download/\d+" }
                }
            };

            service.Download(options);
        }
        public void CanDownloadFiles()
        {
            var service =
                new DhxdDownloadService("*****@*****.**",
                    @"D:\Wip\Practices\ts2015\Docs\GoogleServiceAccount\tuyensinhquocgia-f36c1d0e7788.p12", "TSQG");

            var options = new DownloadServiceOption()
            {
                TargetFolder = @"E:\Projects\github\ts2015\DHXD-HN",
                UrlsAndPatterns = new Dictionary<string, string>()
                {
                    {"http://tuyensinh.nuce.edu.vn/tin-tuc/thong-bao-diem-chuan-va-danh-sach-trung-tuyen-dai-hoc-he-chinh-quy", @".*\/(.*?.pdf)$|google.com"}
                }
            };

            service.Download(options);
        }
        public void CanDownloadFiles()
        {
            var service =
                new DhxdDownloadService("*****@*****.**",
                                        @"D:\Wip\Practices\ts2015\Docs\GoogleServiceAccount\tuyensinhquocgia-f36c1d0e7788.p12", "TSQG");

            var options = new DownloadServiceOption()
            {
                TargetFolder    = @"E:\Projects\github\ts2015\DHXD-HN",
                UrlsAndPatterns = new Dictionary <string, string>()
                {
                    { "http://tuyensinh.nuce.edu.vn/tin-tuc/thong-bao-diem-chuan-va-danh-sach-trung-tuyen-dai-hoc-he-chinh-quy", @".*\/(.*?.pdf)$|google.com" }
                }
            };

            service.Download(options);
        }
Exemple #6
0
        protected override void ProcessAllPages(IEnumerable <string> pages, string filePattern, string targetFolder)
        {
            var pageDownloadService = new DhdlPageDownloadService();

            foreach (var pageLink in pages)
            {
                var options = new DownloadServiceOption()
                {
                    TargetFolder    = targetFolder,
                    UrlsAndPatterns = new Dictionary <string, string>()
                    {
                        { pageLink, filePattern }
                    }
                };

                pageDownloadService.Download(options);
            }
        }
        protected override void ProcessAllPages(IEnumerable<string> pages, string filePattern, string targetFolder)
        {
            var pageDownloadService = new HvtcPageDownloadService();

            foreach (var pageLink in pages)
            {
                var options = new DownloadServiceOption()
                {
                    TargetFolder = targetFolder,
                    UrlsAndPatterns = new Dictionary<string, string>()
                    {
                        {pageLink, filePattern}
                    }
                };

                pageDownloadService.Download(options);
            }
        }
        private void ProcessDownload(DownloadServiceOption options, string file, KeyValuePair<string, IEnumerable<string>> kvp)
        {
            try
            {
                var pattern = options.UrlsAndPatterns.First(x => x.Key == kvp.Key).Value;
                var fileName = GetFileName(file, pattern);

                if (!string.IsNullOrWhiteSpace(fileName) &&
                    File.Exists(Path.Combine(options.TargetFolder, fileName)))
                {
                    Console.WriteLine(" -> Exists");
                    return;
                }

                var fileResult = _worker.DownloadResponse(new CrawlingOption(file));
                if (string.IsNullOrWhiteSpace(fileName) && !string.IsNullOrWhiteSpace(fileResult.ResponseUri))
                {
                    fileName = Path.GetFileName(fileResult.ResponseUri);
                }

                var filePath = Path.Combine(options.TargetFolder, fileName);
                if (File.Exists(filePath))
                {
                    Console.WriteLine(" -> Exists");
                    return;
                }

                if (!Directory.Exists(options.TargetFolder))
                {
                    Directory.CreateDirectory(options.TargetFolder);
                }

                File.WriteAllBytes(filePath, fileResult.Content);
                Console.WriteLine(fileName + " -> Done");
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
        public void Download(DownloadServiceOption options)
        {
            var fileLinks = new Dictionary<string, IEnumerable<string>>();

            foreach (var urlAndPattern in options.UrlsAndPatterns)
            {
                var uri = urlAndPattern.Key;
                var filePattern = urlAndPattern.Value;

                Console.WriteLine("Start getting " + uri);

                var articleResult = _worker.DownloadResponse(new CrawlingOption(uri));

                if (articleResult.StatusCode == HttpStatusCode.OK)
                {
                    fileLinks.Add(uri, GetFileLinks(articleResult.ReadAsText(), filePattern));
                }
                else
                {
                    Console.WriteLine(articleResult.StatusDescription);
                }
            }

            var count = 0;
            var total = fileLinks.Values.SelectMany(x => x).Count();

            foreach (var kvp in fileLinks)
            {
                foreach (var file in kvp.Value)
                {
                    count += 1;
                    Console.WriteLine("Process " + count + "/" + total);

                    ProcessDownload(options, file, kvp);
                }
            }
        }