コード例 #1
0
        /// <summary>
        /// // key: url, value: 保存地址
        /// </summary>
        /// <param name="downloadItems"></param>
        private void DownadFiles(string albumTitle, Dictionary <string, string> downloadItems, int paralleCount = 4)
        {
            var totalCount = downloadItems.Values?.Count;

            if (totalCount == 0)
            {
                return;
            }

            System.Console.WriteLine($"开始下载专辑{albumTitle}, 共{totalCount}个音频");
            Stopwatch timer = Stopwatch.StartNew();

            var options = new ParallelOptions {
                MaxDegreeOfParallelism = paralleCount
            };

            Parallel.ForEach(downloadItems, options, downloadItem =>
            {
                var url      = downloadItem.Key;
                var savePath = downloadItem.Value;

                try
                {
                    DownloadSingleFile(url, savePath);
                }
                catch (Exception ex)
                {
                    HtmlParseLogger.Error($"下载{Path.GetFileName(savePath)}出错({url}), Exception: {ex.Message}");
                }

                Console.WriteLine($"下载完成, 剩余: {--totalCount}个");
            });
            timer.Stop();
            System.Console.WriteLine($"专辑{albumTitle}下载完成, 用时{timer.Elapsed.TotalSeconds}s, 共{totalCount}个音频");
        }
コード例 #2
0
        /// <summary>
        /// 部分页面没有MP3, 只有MP4视频, 需要检测MP4实际地址 (如: 乌盆记-1 言菊朋 http://www.bavc.com.cn/w10279097.htm?page=1)
        /// 找到播放器id部分, 根据id推测出下载地址
        /// player id: <div id="piv_d69fff2eae3766ef7aff664b0ab2b61d_d"></div>
        /// 下载地址模板: http://mpv.videocc.net/d69fff2eae/{0}/{1}_1.mp4
        /// </summary>
        ///
        /// 以 piv_d69fff2eae3766ef7aff664b0ab2b61d_d 为例
        /// id部分只要两个_之间的字符 d69fff2eae3766ef7aff664b0ab2b61d
        /// mp4地址模板中:
        /// {0} 为 "d69fff2eae3766ef7aff664b0ab2b61d" 最后一个字母d
        /// {1} 为 ”d69fff2eae3766ef7aff664b0ab2b61d“
        /// 则实际MP4地址为: https://mpv.videocc.net/d69fff2eae/d/d69fff2eae3766ef7aff664b0ab2b61d_1.mp4
        public static string GuessMp4DownloadUrl(string title, string mp3InfoUrl)
        {
            // 复制Program.LoadMp3Info
            var homePageNode = HtmlCacheParser.LoadHtmlNode(mp3InfoUrl);

            // <div id="plv_d69fff2eae176f68eb79b5e0575cc75b_d"></div>
            var main2      = homePageNode.SelectSingleNode("/html/body/table[4]/tbody/tr");
            var targetNode = main2.SelectNodes(".//div").FirstOrDefault(x => x.Attributes["id"] != null && x.Attributes["id"].Value.StartsWith("plv_"));

            if (targetNode == null)
            {
                HtmlParseLogger.Error($"mp4 解析错误 {title}({mp3InfoUrl})页面信息不存在");
            }

            var mp4UrlTemplate = "https://mpv.videocc.net/d69fff2eae/{0}/{1}_1.mp4";

            var idVal  = targetNode.Attributes["id"].Value;
            var first_ = idVal.IndexOf("_");
            var last_  = idVal.LastIndexOf("_");

            if (first_ > -1 && last_ > -1 && first_ < last_)
            {
                //"plv_d69fff2eae176f68eb79b5e0575cc75b_d"
                // d69fff2eae176f68eb79b5e0575cc75b
                var mp4ItemKey = idVal.Substring(first_ + 1, last_ - first_ - 1);

                return(string.Format(mp4UrlTemplate, mp4ItemKey[mp4ItemKey.Length - 1], mp4ItemKey));
            }

            return(string.Empty);
        }
コード例 #3
0
        public void DownloadMp4Items(List <Mp4Item> mp4Items, bool deleteIfExist = false)
        {
            var mp4DownloadDIr = System.IO.Path.Combine(
                StaticVariables.GetDownloadDir(),
                StaticVariables.MP4_FOLDER_NAME);

            if (Directory.Exists(mp4DownloadDIr))
            {
                Directory.CreateDirectory(mp4DownloadDIr);
            }

            foreach (var categoryInfo in mp4Items.GroupBy(x => x.CategoryName))
            {
                var categoryName = categoryInfo.Key;
                foreach (var albumInfo in categoryInfo.GroupBy(x => x.AblumName))
                {
                    // albumInfo
                    var albumName     = albumInfo.Key;
                    var albumMp4Items = albumInfo.ToArray();
                    if (albumMp4Items.Length == 0)
                    {
                        continue;
                    }

                    // key: url, value: 保存地址
                    Dictionary <string, string> downloadItems = new Dictionary <string, string>();

                    foreach (var item in albumMp4Items)
                    {
                        if (string.IsNullOrEmpty(item.Mp4DownloadUrl))
                        {
                            HtmlParseLogger.Error($"{item.Mp3FileName} 没有下载地址,跳过");
                            continue;
                        }

                        var dir = Path.Combine(mp4DownloadDIr,
                                               categoryName,
                                               albumName);
                        if (!Directory.Exists(dir))
                        {
                            Directory.CreateDirectory(dir);
                        }

                        var mp3Name = Path.Combine(dir, $"{item.Mp3FileName}.mp4");
                        downloadItems.Add(item.Mp4DownloadUrl, mp3Name);
                    }

                    DownadFiles(albumName, downloadItems);
                }
                //OperaHtmlParser.Mp4Item
            }
        }
コード例 #4
0
        // 下载专辑
        private void DownloadMediaItem(string mp3DownloadDir, string mp4DownloadDir, MediaItem mediaItem)
        {
            if (!Directory.Exists(mp3DownloadDir))
            {
                Directory.CreateDirectory(mp3DownloadDir);
            }
            if (!Directory.Exists(mp4DownloadDir))
            {
                Directory.CreateDirectory(mp4DownloadDir);
            }

            System.Console.WriteLine($"下载专辑: {mediaItem.Title}...");

            // key: url, value: 文件保存地址
            Dictionary <string, string> downloadItems = new Dictionary <string, string>();

            // 下载封面图片 "_cover.jpg" 放到MP3文件夹中
            var coverImgName = Path.Combine(mp3DownloadDir, StaticVariables.ALBUM_COVER_NAME);

            downloadItems.Add(mediaItem.ImageUrl, coverImgName);

            // 准备mp3/mp4下载地址
            foreach (var mp3Info in mediaItem.Mp3Items)
            {
                var saveFilePath = string.Empty;
                var downloadUrl  = string.Empty;
                if (!string.IsNullOrEmpty(mp3Info.Mp3DownloadUrl))
                {
                    downloadUrl  = mp3Info.Mp3DownloadUrl;
                    saveFilePath = Path.Combine(mp3DownloadDir, $"{mp3Info.Title}.mp3");
                }
                else if (!string.IsNullOrEmpty(mp3Info.Mp4DownloadUrl))
                {
                    downloadUrl  = mp3Info.Mp4DownloadUrl;
                    saveFilePath = Path.Combine(mp4DownloadDir, $"{mp3Info.Title}.mp4");
                }
                else
                {
                    HtmlParseLogger.Error($"{mediaItem.Title} 没有下载地址,跳过");
                    continue;
                }

                downloadItems.Add(downloadUrl, saveFilePath);
            }

            // 开始下载
            var totalCount = mediaItem.Mp3Items?.Count;

            System.Console.WriteLine($"开始下载专辑{mediaItem.Title}, 共{totalCount}个音频");

            DownadFiles(mediaItem.Title, downloadItems);
        }
コード例 #5
0
        /// url为某个具体MP3播放页面
        /// 从url中读取MP3相关信息
        static void LoadMp3Info(Mp3Info mp3Info)
        {
            if (mp3Info == null || string.IsNullOrEmpty(mp3Info.Url))
            {
                throw new ArgumentException(nameof(mp3Info));
            }

            // 播放页面左侧有导航栏的情况(少数是这样)
            // 无导航: http://www.bavc.com.cn/w10276740.htm?page=1
            // 有导航: http://www.bavc.com.cn/w10276738.htm?page=1
            var homePageNode = HtmlCacheParser.LoadHtmlNode(mp3Info.Url);
            var tdNodes      = homePageNode.SelectNodes("/html/body/table[4]/tbody/tr/td");

            if (tdNodes == null || tdNodes.Count == 0)
            {
                HtmlParseLogger.Error($"{mp3Info.Title}({mp3Info.Url})页面信息不存在");
            }

            // 音频页面有导航栏xpath不同
            bool hasSidebar = tdNodes.Count > 1;

            HtmlNode mainNode = null;

            if (!hasSidebar)
            {
                mainNode = homePageNode.SelectSingleNode("/html/body/table[4]/tbody/tr/td/table[4]");
            }
            else
            {
                mainNode = homePageNode.SelectSingleNode("/html/body/table[4]/tbody/tr/td[3]/center/table[last()]");
            }

            if (mainNode == null)
            {
                HtmlParseLogger.Error($"{mp3Info.Title}({mp3Info.Url})页面信息不存在");
            }

            // 标题(列表页已经获取了) /html/body/table[4]/tbody/tr/td/table[4]/tbody/tr[2]/td/table/tbody/tr[1]/td/p[1]/span

            // 当前读取方式会显示‘Your browser does not support’,<a>写出了地址
            mp3Info.Mp3DownloadUrl = mainNode.SelectSingleNode(".//table[1]//table[1]//a")?.Attributes["href"]?.Value;

            // 如果没有MP3, 检测MP4
            if (string.IsNullOrEmpty(mp3Info.Mp3DownloadUrl))
            {
                mp3Info.Mp4DownloadUrl = GuessMp4DownloadUrl(mp3Info.Title, mp3Info.Url);
            }

            // 歌词标题和歌词在一个td里, 标题带有<strong>
            // /html/body/table[4]/tbody/tr/td/table[4]/tbody/tr[2]/td/table/tbody/tr[5]/td/strong
            // /html/body/table[4]/tbody/tr/td/table[4]/tbody/tr[2]/td/table/tbody/tr[4]/td
            // 歌词部分有两种可能
            HtmlNode lyricNode = mainNode.SelectSingleNode("./tbody/tr[last()-1]/td/table/tbody/tr[last()]");

            if (lyricNode == null)
            {
                lyricNode = mainNode.SelectSingleNode("./tbody/tr[last()-2]/td/table/tbody/tr[last()]");
            }

            if (lyricNode == null)
            {
                HtmlParseLogger.Error($"{mp3Info.Title}({mp3Info.Url})歌词信息不存在, 跳过");
            }

            // 大部分标题在<p><strong>{Title}</strong></p>中
            // 目前只发现13. 《梨园名票唱腔选》第三集 "06.武家坡-2 夏山楼主 高亭"没有<strong>, 标题<p>{Title}</p>中
            var lyricTitle = (lyricNode?.SelectSingleNode(".//strong") ??
                              lyricNode?.SelectSingleNode(".//p"))?.InnerText;
            var lyric = lyricNode?.InnerText;

            // .Replace("&nbsp;", "")
            lyric = lyric?.Replace("\r\n", "");

            // 歌词去掉标题部分
            if (!string.IsNullOrEmpty(lyricTitle) &&
                !string.IsNullOrEmpty(lyric))
            {
                var titleIndex = lyric.IndexOf(lyricTitle);
                // 标题出现在开头
                if (titleIndex == 0 && lyric.Length > lyricTitle.Length)
                {
                    lyric = lyric.Substring(lyricTitle.Length);
                }
            }

            mp3Info.Lyric      = lyric;
            mp3Info.LyricTitle = lyricTitle;
        }