Beispiel #1
0
        /// <summary>
        /// 根据C9Article 抓取视频数据
        /// </summary>
        /// <param name="skip">领衔量</param>
        /// <param name="number">数量</param>
        /// <returns></returns>
        public async Task <List <C9Videos> > SaveVideosAsync(int skip = 0, int number = 100)
        {
            Console.WriteLine($"start:{skip}");
            var C9Articles = Context.C9Articles
                             .OrderByDescending(m => m.UpdatedTime)
                             .Skip(skip).Take(number).ToList();

            var videoList = new List <C9Videos>();
            var lastVideo = Context.C9Videos.OrderByDescending(m => m.UpdatedTime).Take(60).ToList();

            Parallel.ForEach(C9Articles, a =>
            {
                // 过滤非视频数据
                if (a.Duration == null)
                {
                    Console.WriteLine("Not Video" + a.Title);
                    return;
                }
                // 数据库去重
                if (lastVideo.Any(m => m.SourceUrl.Equals(a.SourceUrl)))
                {
                    return;
                }

                C9Videos re = _helper.GetPageVideo(a).Result;
                if (re != null)
                {
                    re.Id = Guid.NewGuid();
                    videoList.Add(re);
                }
            });
            try
            {
                Context.AddRange(videoList);
                await Context.SaveChangesAsync();

                return(videoList);
            }
            catch (Exception e)
            {
                Log.Write("C9VideoSaveError.txt", e.Message);
                Console.WriteLine(e);
                return(null);
            }
        }
Beispiel #2
0
        /// <summary>
        /// 根据url抓取video内容
        /// </summary>
        public void SaveVideoByUrl()
        {
            var          file   = new FileInfo("c9videoGetErrors.txt");
            StreamReader stream = file.OpenText();

            string url = stream.ReadLine();
            int    i   = 1;

            while (!stream.EndOfStream)
            {
                if (!IsNullOrEmpty(url))
                {
                    url = url.Trim();
                    C9Videos re = _helper.GetPageVideoByUrl(url);

                    if (Context.C9Videos.Any(m => m.SourceUrl == re.SourceUrl))
                    {
                        url = stream.ReadLine();
                        continue;
                    }
                    re.Id = Guid.NewGuid();
                    Context.C9Videos.Add(re);
                    try
                    {
                        Context.SaveChanges();
                        Console.WriteLine($"strat:{i}");
                        i++;
                    }
                    catch (Exception)
                    {
                        Log.Write("C9Video.error.txt", url);
                    }
                }

                url = stream.ReadLine();
            }
            Console.WriteLine("Done");
        }
Beispiel #3
0
        /// <summary>
        /// 抓取单页视频内容
        /// </summary>
        /// <param name="article"></param>
        /// <returns></returns>
        public async Task <C9Videos> GetPageVideo(C9Articles article)
        {
            var video = new C9Videos
            {
                Duration       = article.Duration,
                SeriesTitle    = article.SeriesTitle,
                SeriesTitleUrl = article.SeriesTitleUrl,
                SourceUrl      = article.SourceUrl,
                Title          = article.Title,
                ThumbnailUrl   = article.ThumbnailUrl
            };

            if (article.SeriesTitleUrl != null)
            {
                video.SeriesType = article.SeriesTitleUrl.Substring(1);
                video.SeriesType = video.SeriesType.Substring(0, video.SeriesType.IndexOf(@"/"));
            }
            string url = C9Daemon + article.SourceUrl;

            try
            {
                var hw = new HtmlWeb();
                // option获取InnerText,需要加以下设置
                HtmlAgilityPack.HtmlNode.ElementsFlags.Remove("option");
                HtmlDocument htmlDoc = await hw.LoadFromWebAsync(url);

                HtmlNode mainNode = htmlDoc.DocumentNode.SelectSingleNode(".//main[@role='main']");

                video.Author = mainNode.SelectSingleNode(".//div[@class='authors']")?.Descendants("a")?.Select(s => s.InnerText)
                               .ToArray().Join();

                video.Language = mainNode.SelectSingleNode(".//div[@class='itemHead holder' and @dir='ltr']")?
                                 .GetAttributeValue("lang", Empty);
                video.Description = mainNode.SelectSingleNode(".//section[@class='ch9tab description']/div[@class='ch9tabContent']")?
                                    .InnerText;
                video.VideoEmbed = mainNode.SelectSingleNode(".//section[@class='ch9tab embed']/div[@class='ch9tabContent']")?
                                   .InnerHtml;
                var downloadUrls = mainNode.SelectNodes(".//section[@class='ch9tab download']//div[@class='download']//ul//li")?
                                   .Select(s => new
                {
                    text  = s.Element("a").Attributes["download"]?.Value,
                    value = s.Element("a").Attributes["href"]?.Value
                }).ToList();

                if (downloadUrls != null)
                {
                    foreach (var downloadUrl in downloadUrls)
                    {
                        var downloadType = downloadUrl.text?.ToLower().Trim();
                        if (string.IsNullOrEmpty(downloadType))
                        {
                            continue;
                        }
                        if (downloadType.Contains(".mp3"))
                        {
                            video.Mp3Url = downloadUrl.value;
                        }
                        else if (downloadType.Contains("low.mp4"))
                        {
                            video.Mp4LowUrl = downloadUrl.value;
                        }
                        else if (downloadType.Contains("mid.mp4"))
                        {
                            video.Mp4MidUrl = downloadUrl.value;
                        }
                        else if (downloadType.Contains("high.mp4"))
                        {
                            video.Mp4HigUrl = downloadUrl.value;
                        }
                    }
                }
                video.Tags = mainNode
                             .SelectNodes(".//section[@class='ch9tab description']//div[@class='ch9tabContent']//div[@class='tags']//a")?
                             .Select(s => s.InnerText).ToArray()?.Join();
                video.Views = 0;
                //获取日期  结构不同时处理
                var date = mainNode.SelectSingleNode(".//time[@class='timeHelper']")?
                           .GetAttributeValue("datetime", Empty);
                if (date == null)
                {
                    //event
                    date = mainNode.SelectSingleNode(".//div[@class='releaseDate']")?.InnerText;
                    Console.WriteLine(date);
                    date = date.Replace("at", "");
                    video.CreatedTime = DateTime.ParseExact(date.Trim(), "MMM dd, yyyy  h:mmtt", CultureInfo.CreateSpecificCulture("en-US"));
                }
                else
                {
                    video.CreatedTime = DateTime.Parse(date.Trim());
                }
                if (video.CreatedTime == null)
                {
                    video.CreatedTime = DateTime.Now;
                }
                video.Caption = mainNode.SelectSingleNode(".//section[@class='ch9tab download']//div[@class='download']/div[2]")?
                                .InnerHtml;
                video.UpdatedTime = video.CreatedTime;
                return(video);
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message + e.StackTrace + url);
                return(default);
Beispiel #4
0
        // 临时补充遗漏
        public C9Videos GetPageVideoByUrl(string fullUrl)
        {
            C9Videos video = new C9Videos();

            try
            {
                var hw = new HtmlWeb();
                // option获取InnerText,需要加以下设置
                HtmlNode.ElementsFlags.Remove("option");
                HtmlDocument htmlDoc  = hw.Load(fullUrl);
                HtmlNode     mainNode = htmlDoc.DocumentNode.SelectSingleNode(".//main[@role='main']");
                video.Duration = mainNode.SelectSingleNode(".//div[@class='playerContainer']//time[@class='caption']")?
                                 .Attributes["datetime"]?.Value;
                // 非视频,返回
                if (video.Duration == null)
                {
                    return(video);
                }
                video.SeriesTitle    = mainNode.SelectSingleNode(".//div[@class='seriesTitle']//a")?.InnerText;
                video.SeriesTitleUrl = mainNode.SelectSingleNode(".//div[@class='seriesTitle']//a")?.Attributes["href"]?.Value;
                video.SourceUrl      = fullUrl;
                video.Title          = mainNode.SelectSingleNode(".//div[@class='itemHead holder']//div[@class='container']//h1")?.InnerText;
                video.ThumbnailUrl   = mainNode.SelectSingleNode(".//div[@class='playerContainer']//a[@class='video']")
                                       ?.Attributes["style"]?.Value;
                if (!IsNullOrEmpty(video.ThumbnailUrl))
                {
                    Regex regex = new Regex(@".+\((.+)\);");
                    video.ThumbnailUrl = regex.Match(video.ThumbnailUrl).Groups[1].Value;
                }
                video.Author = mainNode.SelectSingleNode(".//div[@class='authors']")?.Descendants("a")?.Select(s => s.InnerText)
                               .ToArray().Join();

                video.Language = mainNode.SelectSingleNode(".//div[@class='itemHead holder' and @dir='ltr']")?
                                 .GetAttributeValue("lang", Empty);
                video.Description = mainNode.SelectSingleNode(".//section[@class='ch9tab description']/div[@class='ch9tabContent']")
                                    .InnerHtml;
                var downloadUrls = mainNode.SelectNodes(".//section[@class='ch9tab download']//div[@class='download']//ul//li")?
                                   .Select(s => new
                {
                    text  = s.Element("a").Attributes["download"]?.Value,
                    value = s.Element("a").Attributes["href"]?.Value
                }).ToList();

                Console.WriteLine(JsonConvert.SerializeObject(downloadUrls));
                foreach (var downloadUrl in downloadUrls)
                {
                    var downloadType = downloadUrl.text.ToLower().Trim();
                    if (downloadType.Contains(".mp3"))
                    {
                        video.Mp3Url = downloadUrl.value;
                    }
                    else if (downloadType.Contains("low.mp4"))
                    {
                        video.Mp4LowUrl = downloadUrl.value;
                    }
                    else if (downloadType.Contains("mid.mp4"))
                    {
                        video.Mp4MidUrl = downloadUrl.value;
                    }
                    else if (downloadType.Contains("high.mp4"))
                    {
                        video.Mp4HigUrl = downloadUrl.value;
                    }
                }
                video.Tags = mainNode
                             .SelectNodes(".//section[@class='ch9tab description']//div[@class='ch9tabContent']//div[@class='tags']//a")?
                             .Select(s => s.InnerText).ToArray()?.Join();

                video.Views       = 0;
                video.CreatedTime = DateTime.Parse(mainNode.SelectSingleNode(".//time[@class='timeHelper']")?
                                                   .GetAttributeValue("datetime", Empty));

                video.UpdatedTime = video.CreatedTime;
            }
            catch (Exception e)
            {
                System.Console.WriteLine(e.Message + e.StackTrace);
            }
            return(video);
        }