private static IEnumerable<HtmlNode> GetArticleNodes(string url) { var standardDoc = new HtmlWeb().Load(url).DocumentNode; var articleListNode = standardDoc.SelectSingleNode("//*[@id='list']"); var articlesNode = articleListNode.Descendants().Where(node => node.Name == "li"); return articlesNode; }
static void Main(string[] args) { Console.WriteLine("Start to parse the config file..."); var configFile = XDocument.Load("Downloader.config"); var configs = configFile.Elements("configuration"); BaseUrl = configs.Elements("VOA_Base_Url").First().Value; StandardEnglishUrl = configs.Elements("VOA_Standard_English").First().Value; SpecialEnglishUrl = configs.Elements("VOA_Special_English").First().Value; ; DownloadPath = configs.Elements("Download_Path").First().Value; DownloadedDate = DateTime.Parse(configs.Elements("Downloaded_Date").First().Value); Console.WriteLine("Start to parse the standard & special english page..."); IEnumerable<HtmlNode> standardArticleNodes = GetArticleNodes(BaseUrl + StandardEnglishUrl); IEnumerable<HtmlNode> specialArticleNodes = GetArticleNodes(BaseUrl + SpecialEnglishUrl); var articles = new List<Article>(); foreach (var node in standardArticleNodes) { articles.Add(Node2Article(node, ArticleType.Standard)); } foreach (var node in specialArticleNodes) { articles.Add(Node2Article(node, ArticleType.Special)); } Console.WriteLine("Start to parse the details page..."); var needToDownload = articles.Where(a => a.Date.CompareTo(DownloadedDate) > 0).ToArray(); var downloadItems = new List<DownloadItem>(); var last = DownloadedDate; foreach (var article in needToDownload) { var fileName = article.Title + "==" + article.Date.ToString("yyyyMMdd") + "==" + article.Type + "==" + article.Category; RemoveInvalidChars(ref fileName); var articleDoc = new HtmlWeb().Load(BaseUrl + article.Url).DocumentNode; var mp3Node = articleDoc.SelectSingleNode("//*[@id='mp3']"); var mp3DownloadLink = mp3Node.Attributes["href"].Value; var contentNode = articleDoc.SelectSingleNode("//*[@id='content']"); downloadItems.Add(new DownloadItem(fileName, mp3DownloadLink, contentNode.InnerHtml)); if (last.CompareTo(article.Date) < 0) { last = article.Date; } } Console.WriteLine("Start to download files..."); var currentFolder = Environment.CurrentDirectory; var path = Path.Combine(currentFolder, DownloadPath); var webClient = new WebClient(); for (int i = 0; i < downloadItems.Count; i++) { var item = downloadItems[i]; var name = item.FileName.Substring(0, item.FileName.IndexOf('=')); Console.WriteLine("Start to download {0} [{1}/{2}]", name, i + 1, downloadItems.Count); using (var contentWriter = new StreamWriter(Path.Combine(path, item.FileName))) { contentWriter.Write(item.Script); } webClient.DownloadFile(item.DownloadLink, Path.Combine(path, item.FileName + ".mp3")); } webClient.Dispose(); configs.Elements("Downloaded_Date").First().Value = last.ToString("yyyy-MM-dd"); configFile.Save("Downloader.config"); Console.WriteLine("Please press any key to close this program..."); Console.ReadKey(); }