/// <summary> /// // key: url, value: 保存地址 /// </summary> /// <param name="downloadItems"></param> private void DownadFiles(string albumTitle, Dictionary <string, string> downloadItems, int paralleCount = 4) { var totalCount = downloadItems.Values?.Count; if (totalCount == 0) { return; } System.Console.WriteLine($"开始下载专辑{albumTitle}, 共{totalCount}个音频"); Stopwatch timer = Stopwatch.StartNew(); var options = new ParallelOptions { MaxDegreeOfParallelism = paralleCount }; Parallel.ForEach(downloadItems, options, downloadItem => { var url = downloadItem.Key; var savePath = downloadItem.Value; try { DownloadSingleFile(url, savePath); } catch (Exception ex) { HtmlParseLogger.Error($"下载{Path.GetFileName(savePath)}出错({url}), Exception: {ex.Message}"); } Console.WriteLine($"下载完成, 剩余: {--totalCount}个"); }); timer.Stop(); System.Console.WriteLine($"专辑{albumTitle}下载完成, 用时{timer.Elapsed.TotalSeconds}s, 共{totalCount}个音频"); }
/// <summary> /// 部分页面没有MP3, 只有MP4视频, 需要检测MP4实际地址 (如: 乌盆记-1 言菊朋 http://www.bavc.com.cn/w10279097.htm?page=1) /// 找到播放器id部分, 根据id推测出下载地址 /// player id: <div id="piv_d69fff2eae3766ef7aff664b0ab2b61d_d"></div> /// 下载地址模板: http://mpv.videocc.net/d69fff2eae/{0}/{1}_1.mp4 /// </summary> /// /// 以 piv_d69fff2eae3766ef7aff664b0ab2b61d_d 为例 /// id部分只要两个_之间的字符 d69fff2eae3766ef7aff664b0ab2b61d /// mp4地址模板中: /// {0} 为 "d69fff2eae3766ef7aff664b0ab2b61d" 最后一个字母d /// {1} 为 ”d69fff2eae3766ef7aff664b0ab2b61d“ /// 则实际MP4地址为: https://mpv.videocc.net/d69fff2eae/d/d69fff2eae3766ef7aff664b0ab2b61d_1.mp4 public static string GuessMp4DownloadUrl(string title, string mp3InfoUrl) { // 复制Program.LoadMp3Info var homePageNode = HtmlCacheParser.LoadHtmlNode(mp3InfoUrl); // <div id="plv_d69fff2eae176f68eb79b5e0575cc75b_d"></div> var main2 = homePageNode.SelectSingleNode("/html/body/table[4]/tbody/tr"); var targetNode = main2.SelectNodes(".//div").FirstOrDefault(x => x.Attributes["id"] != null && x.Attributes["id"].Value.StartsWith("plv_")); if (targetNode == null) { HtmlParseLogger.Error($"mp4 解析错误 {title}({mp3InfoUrl})页面信息不存在"); } var mp4UrlTemplate = "https://mpv.videocc.net/d69fff2eae/{0}/{1}_1.mp4"; var idVal = targetNode.Attributes["id"].Value; var first_ = idVal.IndexOf("_"); var last_ = idVal.LastIndexOf("_"); if (first_ > -1 && last_ > -1 && first_ < last_) { //"plv_d69fff2eae176f68eb79b5e0575cc75b_d" // d69fff2eae176f68eb79b5e0575cc75b var mp4ItemKey = idVal.Substring(first_ + 1, last_ - first_ - 1); return(string.Format(mp4UrlTemplate, mp4ItemKey[mp4ItemKey.Length - 1], mp4ItemKey)); } return(string.Empty); }
public void DownloadMp4Items(List <Mp4Item> mp4Items, bool deleteIfExist = false) { var mp4DownloadDIr = System.IO.Path.Combine( StaticVariables.GetDownloadDir(), StaticVariables.MP4_FOLDER_NAME); if (Directory.Exists(mp4DownloadDIr)) { Directory.CreateDirectory(mp4DownloadDIr); } foreach (var categoryInfo in mp4Items.GroupBy(x => x.CategoryName)) { var categoryName = categoryInfo.Key; foreach (var albumInfo in categoryInfo.GroupBy(x => x.AblumName)) { // albumInfo var albumName = albumInfo.Key; var albumMp4Items = albumInfo.ToArray(); if (albumMp4Items.Length == 0) { continue; } // key: url, value: 保存地址 Dictionary <string, string> downloadItems = new Dictionary <string, string>(); foreach (var item in albumMp4Items) { if (string.IsNullOrEmpty(item.Mp4DownloadUrl)) { HtmlParseLogger.Error($"{item.Mp3FileName} 没有下载地址,跳过"); continue; } var dir = Path.Combine(mp4DownloadDIr, categoryName, albumName); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var mp3Name = Path.Combine(dir, $"{item.Mp3FileName}.mp4"); downloadItems.Add(item.Mp4DownloadUrl, mp3Name); } DownadFiles(albumName, downloadItems); } //OperaHtmlParser.Mp4Item } }
// 下载专辑 private void DownloadMediaItem(string mp3DownloadDir, string mp4DownloadDir, MediaItem mediaItem) { if (!Directory.Exists(mp3DownloadDir)) { Directory.CreateDirectory(mp3DownloadDir); } if (!Directory.Exists(mp4DownloadDir)) { Directory.CreateDirectory(mp4DownloadDir); } System.Console.WriteLine($"下载专辑: {mediaItem.Title}..."); // key: url, value: 文件保存地址 Dictionary <string, string> downloadItems = new Dictionary <string, string>(); // 下载封面图片 "_cover.jpg" 放到MP3文件夹中 var coverImgName = Path.Combine(mp3DownloadDir, StaticVariables.ALBUM_COVER_NAME); downloadItems.Add(mediaItem.ImageUrl, coverImgName); // 准备mp3/mp4下载地址 foreach (var mp3Info in mediaItem.Mp3Items) { var saveFilePath = string.Empty; var downloadUrl = string.Empty; if (!string.IsNullOrEmpty(mp3Info.Mp3DownloadUrl)) { downloadUrl = mp3Info.Mp3DownloadUrl; saveFilePath = Path.Combine(mp3DownloadDir, $"{mp3Info.Title}.mp3"); } else if (!string.IsNullOrEmpty(mp3Info.Mp4DownloadUrl)) { downloadUrl = mp3Info.Mp4DownloadUrl; saveFilePath = Path.Combine(mp4DownloadDir, $"{mp3Info.Title}.mp4"); } else { HtmlParseLogger.Error($"{mediaItem.Title} 没有下载地址,跳过"); continue; } downloadItems.Add(downloadUrl, saveFilePath); } // 开始下载 var totalCount = mediaItem.Mp3Items?.Count; System.Console.WriteLine($"开始下载专辑{mediaItem.Title}, 共{totalCount}个音频"); DownadFiles(mediaItem.Title, downloadItems); }
/// url为某个具体MP3播放页面 /// 从url中读取MP3相关信息 static void LoadMp3Info(Mp3Info mp3Info) { if (mp3Info == null || string.IsNullOrEmpty(mp3Info.Url)) { throw new ArgumentException(nameof(mp3Info)); } // 播放页面左侧有导航栏的情况(少数是这样) // 无导航: http://www.bavc.com.cn/w10276740.htm?page=1 // 有导航: http://www.bavc.com.cn/w10276738.htm?page=1 var homePageNode = HtmlCacheParser.LoadHtmlNode(mp3Info.Url); var tdNodes = homePageNode.SelectNodes("/html/body/table[4]/tbody/tr/td"); if (tdNodes == null || tdNodes.Count == 0) { HtmlParseLogger.Error($"{mp3Info.Title}({mp3Info.Url})页面信息不存在"); } // 音频页面有导航栏xpath不同 bool hasSidebar = tdNodes.Count > 1; HtmlNode mainNode = null; if (!hasSidebar) { mainNode = homePageNode.SelectSingleNode("/html/body/table[4]/tbody/tr/td/table[4]"); } else { mainNode = homePageNode.SelectSingleNode("/html/body/table[4]/tbody/tr/td[3]/center/table[last()]"); } if (mainNode == null) { HtmlParseLogger.Error($"{mp3Info.Title}({mp3Info.Url})页面信息不存在"); } // 标题(列表页已经获取了) /html/body/table[4]/tbody/tr/td/table[4]/tbody/tr[2]/td/table/tbody/tr[1]/td/p[1]/span // 当前读取方式会显示‘Your browser does not support’,<a>写出了地址 mp3Info.Mp3DownloadUrl = mainNode.SelectSingleNode(".//table[1]//table[1]//a")?.Attributes["href"]?.Value; // 如果没有MP3, 检测MP4 if (string.IsNullOrEmpty(mp3Info.Mp3DownloadUrl)) { mp3Info.Mp4DownloadUrl = GuessMp4DownloadUrl(mp3Info.Title, mp3Info.Url); } // 歌词标题和歌词在一个td里, 标题带有<strong> // /html/body/table[4]/tbody/tr/td/table[4]/tbody/tr[2]/td/table/tbody/tr[5]/td/strong // /html/body/table[4]/tbody/tr/td/table[4]/tbody/tr[2]/td/table/tbody/tr[4]/td // 歌词部分有两种可能 HtmlNode lyricNode = mainNode.SelectSingleNode("./tbody/tr[last()-1]/td/table/tbody/tr[last()]"); if (lyricNode == null) { lyricNode = mainNode.SelectSingleNode("./tbody/tr[last()-2]/td/table/tbody/tr[last()]"); } if (lyricNode == null) { HtmlParseLogger.Error($"{mp3Info.Title}({mp3Info.Url})歌词信息不存在, 跳过"); } // 大部分标题在<p><strong>{Title}</strong></p>中 // 目前只发现13. 《梨园名票唱腔选》第三集 "06.武家坡-2 夏山楼主 高亭"没有<strong>, 标题<p>{Title}</p>中 var lyricTitle = (lyricNode?.SelectSingleNode(".//strong") ?? lyricNode?.SelectSingleNode(".//p"))?.InnerText; var lyric = lyricNode?.InnerText; // .Replace(" ", "") lyric = lyric?.Replace("\r\n", ""); // 歌词去掉标题部分 if (!string.IsNullOrEmpty(lyricTitle) && !string.IsNullOrEmpty(lyric)) { var titleIndex = lyric.IndexOf(lyricTitle); // 标题出现在开头 if (titleIndex == 0 && lyric.Length > lyricTitle.Length) { lyric = lyric.Substring(lyricTitle.Length); } } mp3Info.Lyric = lyric; mp3Info.LyricTitle = lyricTitle; }