コード例 #1
0
ファイル: Program.cs プロジェクト: shenlei149/MyPrecious
 private static IEnumerable<HtmlNode> GetArticleNodes(string url)
 {
     var standardDoc = new HtmlWeb().Load(url).DocumentNode;
     var articleListNode = standardDoc.SelectSingleNode("//*[@id='list']");
     var articlesNode = articleListNode.Descendants().Where(node => node.Name == "li");
     return articlesNode;
 }
コード例 #2
0
ファイル: Program.cs プロジェクト: shenlei149/MyPrecious
        static void Main(string[] args)
        {
            Console.WriteLine("Start to parse the config file...");
            var configFile = XDocument.Load("Downloader.config");
            var configs = configFile.Elements("configuration");

            BaseUrl = configs.Elements("VOA_Base_Url").First().Value;
            StandardEnglishUrl = configs.Elements("VOA_Standard_English").First().Value;
            SpecialEnglishUrl = configs.Elements("VOA_Special_English").First().Value; ;
            DownloadPath = configs.Elements("Download_Path").First().Value;
            DownloadedDate = DateTime.Parse(configs.Elements("Downloaded_Date").First().Value);

            Console.WriteLine("Start to parse the standard & special english page...");
            IEnumerable<HtmlNode> standardArticleNodes = GetArticleNodes(BaseUrl + StandardEnglishUrl);
            IEnumerable<HtmlNode> specialArticleNodes = GetArticleNodes(BaseUrl + SpecialEnglishUrl);

            var articles = new List<Article>();

            foreach (var node in standardArticleNodes)
            {
                articles.Add(Node2Article(node, ArticleType.Standard));
            }

            foreach (var node in specialArticleNodes)
            {
                articles.Add(Node2Article(node, ArticleType.Special));
            }

            Console.WriteLine("Start to parse the details page...");
            var needToDownload = articles.Where(a => a.Date.CompareTo(DownloadedDate) > 0).ToArray();
            var downloadItems = new List<DownloadItem>();
            var last = DownloadedDate;
            foreach (var article in needToDownload)
            {
                var fileName = article.Title + "=="
                    + article.Date.ToString("yyyyMMdd") + "=="
                    + article.Type + "=="
                    + article.Category;
                RemoveInvalidChars(ref fileName);
                var articleDoc = new HtmlWeb().Load(BaseUrl + article.Url).DocumentNode;
                var mp3Node = articleDoc.SelectSingleNode("//*[@id='mp3']");
                var mp3DownloadLink = mp3Node.Attributes["href"].Value;
                var contentNode = articleDoc.SelectSingleNode("//*[@id='content']");
                downloadItems.Add(new DownloadItem(fileName, mp3DownloadLink, contentNode.InnerHtml));

                if (last.CompareTo(article.Date) < 0)
                {
                    last = article.Date;
                }
            }

            Console.WriteLine("Start to download files...");
            var currentFolder = Environment.CurrentDirectory;
            var path = Path.Combine(currentFolder, DownloadPath);
            var webClient = new WebClient();
            for (int i = 0; i < downloadItems.Count; i++)
            {
                var item = downloadItems[i];
                var name = item.FileName.Substring(0, item.FileName.IndexOf('='));
                Console.WriteLine("Start to download {0} [{1}/{2}]", name, i + 1, downloadItems.Count);

                using (var contentWriter = new StreamWriter(Path.Combine(path, item.FileName)))
                {
                    contentWriter.Write(item.Script);
                }

                webClient.DownloadFile(item.DownloadLink, Path.Combine(path, item.FileName + ".mp3"));
            }

            webClient.Dispose();

            configs.Elements("Downloaded_Date").First().Value = last.ToString("yyyy-MM-dd");
            configFile.Save("Downloader.config");

            Console.WriteLine("Please press any key to close this program...");
            Console.ReadKey();
        }