예제 #1
0
            public async Task <IList <DxxTargetInfo> > ExtractContainerList(DxxUriEx urx)
            {
                if (!IsContainerList(urx))
                {
                    return(null);
                }

                return(await DxxActivityWatcher.Instance.Execute(async (cancellationToken) => {
                    try {
                        var web = new HtmlWeb();
                        DxxLogger.Instance.Comment(LOG_CAT, $"Analyzing: {DxxUrl.GetFileName(urx.Uri)}");
                        var html = await web.LoadFromWebAsync(urx.Url, cancellationToken);
                        var nodes = html.DocumentNode.SelectNodes("//a[contains(@href, '/moviepages/')]");
                        if (null == html)
                        {
                            DxxLogger.Instance.Error(LOG_CAT, $"Load Error (list):{urx.Url}");
                            return null;
                        }
                        cancellationToken.ThrowIfCancellationRequested();
                        if (!Utils.IsNullOrEmpty(nodes))
                        {
                            var list = nodes.Select((v) => {
                                var href = ensureUrl(urx.Uri.Scheme, urx.Uri.Host, v.Attributes["href"]?.Value);
                                if (null == href)
                                {
                                    return null;
                                }
                                string desc = v.InnerText.Trim();
                                if (string.IsNullOrEmpty(desc))
                                {
                                    var img = v.SelectSingleNode("img");
                                    if (null != img)
                                    {
                                        desc = img.Attributes["alt"].Value;
                                    }
                                }
                                var targetUri = new Uri(href);
                                var idx = targetUri.Segments.Count() - 2;
                                var name = idx >= 0 ? targetUri.Segments.ElementAt(idx) : "untitled";

                                return (href != null) ? new DxxTargetInfo(href, name, desc) : null;
                            }).Where((v) => v != null)?.ToList();
                            cancellationToken.ThrowIfCancellationRequested();
                            return list;
                        }
                        DxxLogger.Instance.Error(LOG_CAT, $"No List:{urx.Url}");
                        return null;
                    } catch (Exception e) {
                        if (e is OperationCanceledException)
                        {
                            DxxLogger.Instance.Cancel(LOG_CAT, $"Cancelled (list):{urx.Url}");
                        }
                        else
                        {
                            DxxLogger.Instance.Error(LOG_CAT, $"Error (list):{urx.Url}");
                        }
                        return null;
                    }
                }));
            }
예제 #2
0
            /**
             * コンテナからターゲットを抽出する
             */
            public async Task <IList <DxxTargetInfo> > ExtractTargets(DxxUriEx urx)
            {
                if (!IsContainer(urx))
                {
                    return(null);
                }
                return(await DxxActivityWatcher.Instance.Execute(async (cancellationToken) => {
                    DxxLogger.Instance.Comment(LOG_CAT, $"Analyzing: {DxxUrl.GetFileName(urx.Uri)}");
                    var web = new HtmlWeb();
                    var html = await web.LoadFromWebAsync(urx.Url, cancellationToken);
                    var mp4s = html.DocumentNode.SelectNodes("//a[contains(@href, '.mp') or contains(@href, '.wmv') or contains(@href,'.mov') or contains(@href,'.qt')]")?
                               .Distinct(mAnchorNodeComparator)?
                               .Select((v) => CreateTargetInfo(urx.Uri, v.Attributes["href"].Value, v))?
                               .Where((v) => v != null);
                    if (!Utils.IsNullOrEmpty(mp4s))
                    {
                        return mp4s.ToList();
                    }

                    // js でロードされる、sample_low.mp4 を列挙する
                    // ... sample_low は低解像度の動画で、同じ内容で高解像度のものが存在するようなので、こいつらは列挙しないことにする。
                    var scripts = html.DocumentNode.SelectNodes(".//script[contains(text(),'emvideo')]");
                    IEnumerable <DxxTargetInfo> embedded = null;
                    if (null != scripts)
                    {
                        embedded = scripts.Select((v) => {
                            var regex = new Regex("(?<=var\\s+emvideo\\s*=\\s*[\"\'])(?<path>.*\\.mp4)");
                            var m = regex.Match(v.InnerText);
                            if (m.Success)
                            {
                                var emvideo = m.Groups["path"].Value;
                                if (Uri.TryCreate(urx.Uri, emvideo, out var uri))
                                {
                                    var desc = SafeDescription(html.DocumentNode.SelectSingleNode(".//title").InnerText);
                                    var name = DxxUrl.TrimName(DxxUrl.GetFileName(uri));
                                    return new DxxTargetInfo(uri, name, desc);
                                }
                            }
                            return null;
                        }).Where((v) => v != null);
                        if (null != embedded)
                        {
                            if (mp4s == null)
                            {
                                mp4s = embedded;
                            }
                            else
                            {
                                mp4s = mp4s.Concat(embedded);
                            }
                        }
                    }
                    if (Utils.IsNullOrEmpty(mp4s))
                    {
                        DxxLogger.Instance.Comment(LOG_CAT, $"No Data: {DxxUrl.GetFileName(urx.Uri)}");
                    }
                    return mp4s?.ToList();
                }));
            }
예제 #3
0
 public bool IsContainer(DxxUriEx urx)
 {
     if (!urx.Uri.Host.Contains("heyzo.com"))
     {
         return(false);
     }
     return(urx.Url.Contains("/moviepages/"));
 }
예제 #4
0
 public bool IsContainer(DxxUriEx urx)
 {
     if (!urx.Uri.Host.Contains("dmm.co.jp"))
     {
         return(false);
     }
     return(urx.Url.Contains("/cid="));
 }
예제 #5
0
 public bool IsContainerList(DxxUriEx urx)
 {
     if (!urx.Uri.Host.Contains("heyzo.com"))
     {
         return(false);
     }
     return(urx.Url.Contains("/listpages/"));
     //return true;
 }
예제 #6
0
 public async Task <IList <DxxTargetInfo> > ExtractContainerList(DxxUriEx urx)
 {
     if (!IsContainerList(urx))
     {
         return(null);
     }
     // <div>
     //< p class="tmb"><a href = "https://www.dmm.co.jp/litevideo/-/detail/=/cid=bf00392/" >
     // < span class="img"><img src = "https://pics.dmm.co.jp/digital/video/bf00392/bf00392pt.jpg" alt="美尻にぴったり密着タイトスカートSEX8時間"></span>
     //<span class="txt">美尻にぴったり密着タイト...</span>
     //<!--/tmb--></a></p>
     return(await DxxActivityWatcher.Instance.Execute(async (cancellationToken) => {
         try {
             DxxLogger.Instance.Comment(LOG_CAT, $"Analyzing: {DxxUrl.GetFileName(urx.Uri)}");
             var web = new HtmlWeb();
             var html = await web.LoadFromWebAsync(urx.Url, cancellationToken);
             if (null == html)
             {
                 DxxLogger.Instance.Error(LOG_CAT, $"Load Error (list):{urx.Url}");
                 return null;
             }
             cancellationToken.ThrowIfCancellationRequested();
             var para = html.DocumentNode.SelectNodes("//p[@class='tmb']");
             if (para == null || para.Count == 0)
             {
                 DxxLogger.Instance.Error(LOG_CAT, $"No Targets:{urx.Url}");
                 return null;
             }
             var list = para.Select((p) => {
                 var href = p.SelectSingleNode("a")?.GetAttributeValue("href", null);
                 if (string.IsNullOrEmpty(href))
                 {
                     return null;
                 }
                 var desc = p.SelectSingleNode("a/span/img")?.GetAttributeValue("alt", null);
                 if (desc == null)
                 {
                     desc = p.SelectSingleNode("a/span[@class='txt']")?.InnerText;
                 }
                 return new DxxTargetInfo(href, DxxUrl.GetFileName(href), desc);
             }).Where((v) => v != null);
             cancellationToken.ThrowIfCancellationRequested();
             return list.ToList();
         } catch (Exception e) {
             if (e is OperationCanceledException)
             {
                 DxxLogger.Instance.Cancel(LOG_CAT, $"Cancelled (list):{urx.Url}");
             }
             else
             {
                 DxxLogger.Instance.Error(LOG_CAT, $"Error (list):{urx.Url}");
             }
             return null;
         }
     }));
 }
예제 #7
0
 public async Task <IList <DxxTargetInfo> > ExtractContainerList(DxxUriEx urx)
 {
     return(await DxxActivityWatcher.Instance.Execute(async (cancellationToken) => {
         try {
             var web = new HtmlWeb();
             DxxLogger.Instance.Comment(LOG_CAT, $"Analyzing: {DxxUrl.GetFileName(urx.Uri)}");
             var html = await web.LoadFromWebAsync(urx.Url, cancellationToken);
             cancellationToken.ThrowIfCancellationRequested();
             var nodes1 = html.DocumentNode.SelectNodes("//a[contains(@href, '.mp') or contains(@href, '.wmv') or contains(@href,'.mov') or contains(@href,'.qt')]")
                          ?.Select((v) => {
                 return CreateTargetInfo(urx.Uri, v.Attributes["href"]?.Value, v);
             })
                          ?.Where((v) => v != null);
             var nodes2 = html.DocumentNode.SelectNodes("//video")
                          ?.Select((v) => {
                 return CreateTargetInfo(urx.Uri, v.Attributes["src"]?.Value, v);
             })
                          ?.Where((v) => v != null);
             IList <DxxTargetInfo> result = null;
             if (nodes1 == null)
             {
                 if (nodes2 == null)
                 {
                     DxxLogger.Instance.Comment(LOG_CAT, "No targets.");
                     return null;
                 }
                 else
                 {
                     result = nodes2.ToList();
                 }
             }
             else if (nodes2 == null)
             {
                 result = nodes1.ToList();
             }
             else
             {
                 result = nodes1.Concat(nodes2).ToList();
             }
             DxxLogger.Instance.Comment(LOG_CAT, $"{result.Count} target(s) detected.");
             return result;
         } catch (Exception e) {
             if (e is OperationCanceledException)
             {
                 DxxLogger.Instance.Cancel(LOG_CAT, $"Cancelled (Target):{urx.Url}");
             }
             else
             {
                 DxxLogger.Instance.Error(LOG_CAT, $"Error (Target):{urx.Url}");
             }
             return null;
         }
     }));
 }
예제 #8
0
            public bool IsTarget(DxxUriEx urx)
            {
                var ext = System.IO.Path.GetExtension(DxxUrl.GetFileName(urx.Uri));

                switch (ext)
                {
                case ".mp4":
                case ".mpeg":
                case ".mpg":
                case ".mov":
                case ".wmv":
                case ".qt":
                    return(true);

                default:
                    return(false);
                }
            }
예제 #9
0
 /**
  * コンテナのリストを抽出する
  */
 public async Task <IList <DxxTargetInfo> > ExtractContainerList(DxxUriEx urx)
 {
     if (!IsContainerList(urx))
     {
         return(null);
     }
     return(await DxxActivityWatcher.Instance.Execute(async (cancellationToken) => {
         DxxLogger.Instance.Comment(LOG_CAT, $"Analyzing: {DxxUrl.GetFileName(urx.Uri)}");
         var web = new HtmlWeb();
         var html = await web.LoadFromWebAsync(urx.Url, cancellationToken);
         //var xpath = "//a[contains(@href, '/moviepages/') or contains(@href,'/listpages')]";
         var xpath = "//a[contains(@href, '/moviepages/')]";
         return html.DocumentNode.SelectNodes(xpath)?
         .Distinct(mAnchorNodeComparator)?
         .Select((v) => CreateContainerInfo(urx.Uri, v.Attributes["href"].Value, v))?
         .Where((v) => v != null)?
         .ToList();
     }));
 }
예제 #10
0
 public async Task <IList <DxxTargetInfo> > ExtractTargets(DxxUriEx urx)
 {
     if (!IsContainer(urx))
     {
         return(null);
     }
     return(await DxxActivityWatcher.Instance.Execute(async (cancellationToken) => {
         try {
             DxxLogger.Instance.Comment(LOG_CAT, $"Analyzing: {DxxUrl.GetFileName(urx.Uri)}");
             var web = new HtmlWeb();
             var html = await web.LoadFromWebAsync(urx.Url);
             var nodes = html.DocumentNode.SelectNodes("//script[contains(text(),'Movie =')]");
             var list = new List <DxxTargetInfo>();
             if (!Utils.IsNullOrEmpty(nodes))
             {
                 foreach (var node in nodes)
                 {
                     var script = node.InnerText;
                     var regex = new Regex("Movie = (?<json>{.*})");
                     var v = regex.Match(script);
                     if (v.Success)
                     {
                         // var Movie = {
                         //   is_vip: "0",
                         //   is_supervip: "1",
                         //   is_annual: "0",
                         //   movie_type: "5",
                         //   movie_id: "011015-780",
                         //   movie_seq: "20290",
                         //   has_gallery: "1",
                         //   is_recurring: "0",
                         //   sample_flash_exists: "1",
                         //   sample_flash_url:
                         //     "https://smovie.caribbeancom.com/sample/movies/011015-780/480p.mp4",
                         //   sample_m_flash_exists: "1",
                         //   sample_m_flash_url:
                         //     "https://smovie.caribbeancom.com/sample/movies/011015-780/sample_m.mp4",
                         //   is_movie_expired: "0",
                         //   movie_streaming_type: "5",
                         //   is_mp4: "1",
                         //   sampleexclude_flag: "0"
                         // };
                         var js = JObject.Parse(v.Groups["json"].Value);
                         if (null != js)
                         {
                             var url = js["sample_flash_url"].Value <string>();
                             if (null == url)
                             {
                                 url = js["sample_m_flash_url"].Value <string>();
                             }
                             var name = js["movie_id"].Value <string>() ?? "untitled";
                             if (null != url)
                             {
                                 var desc = html.DocumentNode.SelectNodes("//h1[@itemprop='name']")?[0]?.InnerText ?? "";
                                 list.Add(new DxxTargetInfo(url, name, desc));
                             }
                         }
                     }
                 }
             }
             cancellationToken.ThrowIfCancellationRequested();
             return list;
         } catch (Exception e) {
             if (e is OperationCanceledException)
             {
                 DxxLogger.Instance.Cancel(LOG_CAT, $"Cancelled (Target):{urx.Url}");
             }
             else
             {
                 DxxLogger.Instance.Error(LOG_CAT, $"Error (Target):{urx.Url}");
             }
             return null;
         }
     }));
 }
예제 #11
0
 public Task <IList <DxxTargetInfo> > ExtractTargets(DxxUriEx urx)
 {
     return(Task.FromResult <IList <DxxTargetInfo> >(null));
 }
예제 #12
0
 public bool IsTarget(DxxUriEx urx)
 {
     return(false);
 }
예제 #13
0
            public async Task <IList <DxxTargetInfo> > ExtractTargets(DxxUriEx urx)
            {
                if (!IsContainer(urx))
                {
                    return(null);
                }
                return(await DxxActivityWatcher.Instance.Execute(async(cancellationToken) => {
                    try {
                        DxxLogger.Instance.Comment(LOG_CAT, $"Analyzing: {DxxUrl.GetFileName(urx.Uri)}");
                        var web = new HtmlWeb();
                        var outer = await web.LoadFromWebAsync(urx.Url, cancellationToken);
                        if (null == outer)
                        {
                            DxxLogger.Instance.Error(LOG_CAT, $"Load Error (Target):{urx.Url}");
                            return null;
                        }
                        IEnumerable <string> anchors = null;
                        var a_auth = outer.DocumentNode.SelectNodes("//a[contains(@href,'declared=yes')]");
                        if (!Utils.IsNullOrEmpty(a_auth))
                        {
                            var nextUrl = a_auth.SingleOrDefault()?.GetAttributeValue("href", null);
                            if (!string.IsNullOrEmpty(nextUrl))
                            {
                                // 年齢認証
                                // JavaScriptが必要
                                //outer = web.LoadFromBrowser(nextUrl);
                                outer = await web.LoadFromWebAsync(nextUrl, cancellationToken);
                                if (null == outer)
                                {
                                    DxxLogger.Instance.Error(LOG_CAT, $"Load Error (Age):{urx.Url}");
                                    return null;
                                }
                            }
                        }

                        var iframes = outer.DocumentNode.SelectNodes("//iframe");
                        if (null != iframes)
                        {
                            // カテゴリとか、そんなページ
                            anchors = iframes.Select((f) => {
                                var url = f.GetAttributeValue("src", null);
                                return ensureUrl(urx.Uri, url);
                            }).Where((u) => Driver.IsSupported(u));
                        }

                        if (Utils.IsNullOrEmpty(anchors))
                        {
                            // トップページ(新着とか)
                            do
                            {
                                var a = outer.DocumentNode.SelectSingleNode("//a[contains(@onclick,'sampleplay')]");
                                if (a == null)
                                {
                                    break;
                                }
                                var onclick = a.GetAttributeValue("onclick", null);
                                if (null == onclick)
                                {
                                    break;
                                }
                                var regex = new Regex(@"sampleplay\(\'(?<url>.*)\'\)");
                                var v = regex.Match(onclick);
                                if (!v.Success)
                                {
                                    break;
                                }
                                var onclickUrl = ensureUrl(urx.Uri, v.Groups["url"].Value);
                                var next = await web.LoadFromWebAsync(onclickUrl, cancellationToken);
                                if (null == next)
                                {
                                    break;
                                }
                                var last = next.DocumentNode.SelectSingleNode("//iframe");
                                if (null == last)
                                {
                                    break;
                                }
                                var anchor = last.GetAttributeValue("src", null);
                                if (anchor == null || !Driver.IsSupported(anchor))
                                {
                                    break;
                                }
                                anchors = new List <string>()
                                {
                                    anchor
                                };
                            } while (false);
                        }
                        if (Utils.IsNullOrEmpty(anchors))
                        {
                            DxxLogger.Instance.Error(LOG_CAT, $"No Target: {urx.Url}");
                            return null;
                        }

                        var list = new List <DxxTargetInfo>();
                        foreach (var frame in anchors)
                        {
                            cancellationToken.ThrowIfCancellationRequested();
                            var innerUrl = ensureUrl(urx.Uri, frame);
                            if (!Driver.IsSupported(innerUrl))
                            {
                                continue;
                            }
                            var inner = await web.LoadFromWebAsync(innerUrl, cancellationToken);
                            var txt = inner.DocumentNode.SelectSingleNode("//script[contains(text(), 'dmmplayer')]");
                            if (null != txt)
                            {
                                var regex = new Regex(@"const\s+args\s?=\s?(?<json>\{.*\})");
                                var v = regex.Match(txt.InnerText);
                                if (v.Success)
                                {
                                    //var params =
                                    //    {
                                    //                                "id":"dmmplayer","type":"litevideo","service":"litevideo","mode":"detail","cid":"ssni00529","eid":"FhJeUFYGVAAB",
                                    //        "gid":"Myd9eiAHMHFvN0Z5ZU5efCMCZ2k_","width":"560px","height":"360px","videoId":"video","videoType":"mp4",
                                    //        "src":"\/\/cc3001.dmm.co.jp\/litevideo\/freepv\/s\/ssn\/ssni00529\/ssni00529_mhb_w.mp4",
                                    //        "title":"\u7f8e\u4eba\u4e0a\u53f8\u3068\u7ae5\u8c9e\u90e8\u4e0b\u304c\u51fa\u5f35\u5148\u306e\u76f8\u90e8\u5c4b\u30db\u30c6\u30eb\u3067\u2026\u3044\u305f\u305a\u3089\u8a98\u60d1\u3092\u771f\u306b\u53d7\u3051\u305f\u90e8\u4e0b\u304c10\u767a\u5c04\u7cbe\u306e\u7d76\u502b\u6027\u4ea4 \u5929\u4f7f\u3082\u3048","titleLink":"","titleLinkTarget":"_top","autoPlay":true,
                                    //        "poster":"\/\/pics.litevideo.dmm.co.jp\/litevideo\/freepv\/s\/ssn\/ssni00529\/ssni00529.jpg",
                                    //        "replay":false,"playIconSize":"100%","loop":false,"muted":false,
                                    //        "bitrates":[
                                    //            {"bitrate":300,"src":"\/\/cc3001.dmm.co.jp\/litevideo\/freepv\/s\/ssn\/ssni00529\/ssni00529_sm_w.mp4"},
                                    //            {"bitrate":1000,"src":"\/\/cc3001.dmm.co.jp\/litevideo\/freepv\/s\/ssn\/ssni00529\/ssni00529_dm_w.mp4"},
                                    //            {"bitrate":1500,"src":"\/\/cc3001.dmm.co.jp\/litevideo\/freepv\/s\/ssn\/ssni00529\/ssni00529_dmb_w.mp4"},
                                    //            {"bitrate":3000,"src":"\/\/cc3001.dmm.co.jp\/litevideo\/freepv\/s\/ssn\/ssni00529\/ssni00529_mhb_w.mp4"}],
                                    //        "affiliateId":"","controls":{"header":true,"panel":true,"title":true,"seek":true,"duration":true,"rewind60":false,"rewind10":true,"playpause":true,
                                    //        "forward10":true,"forward60":false,"bitrate":true,"volume":true,"fullscreen":true},"isDebug":false,"isVideoDebug":false,"isDisplayPlayCount":false
                                    //    }
                                    JObject js = null;
                                    try {
                                        js = JObject.Parse(v.Groups["json"].Value);
                                    } catch (Exception e) {
                                        Debug.WriteLine(e);
                                        Debug.WriteLine("---");
                                        Debug.WriteLine(v.Value);
                                        Debug.WriteLine("---");
                                        throw;
                                    }
                                    string src = null;
                                    if (js.ContainsKey("src"))
                                    {
                                        src = js["src"].Value <string>();
                                    }

                                    if (src == null && js.ContainsKey("bitrates"))
                                    {
                                        var ary = js["bitrates"];
                                        if (ary != null && ary.Type == JTokenType.Array)
                                        {
                                            int br = 0;
                                            foreach (var e in ary)
                                            {
                                                var i = e["bitrate"].Value <int>();
                                                if (i > br)
                                                {
                                                    br = i;
                                                    src = e["src"].Value <string>();
                                                }
                                            }
                                        }
                                    }
                                    if (src != null)
                                    {
                                        var targetUrl = ensureUrl(urx.Uri, src);
                                        list.Add(new DxxTargetInfo(targetUrl, DxxUrl.GetFileName(targetUrl), js["title"].Value <string>()));
                                    }
                                }
                            }
                        }
                        cancellationToken.ThrowIfCancellationRequested();
                        return list;
                    } catch (Exception e) {
                        if (e is OperationCanceledException)
                        {
                            DxxLogger.Instance.Cancel(LOG_CAT, $"Cancelled (Target):{urx.Url}");
                        }
                        else
                        {
                            DxxLogger.Instance.Error(LOG_CAT, $"Error (Target):{urx.Url}");
                            DxxLogger.Instance.Error(LOG_CAT, $"... {e}");
                        }
                        return null;
                    }
                }));
            }
예제 #14
0
 public bool IsContainerList(DxxUriEx urx)
 {
     return(Driver.IsSupported(urx.Url) && !urx.Url.Contains("/moviepages/"));
 }
예제 #15
0
 public bool IsContainer(DxxUriEx urx)
 {
     return(false);
 }
예제 #16
0
 public bool IsTarget(DxxUriEx urx)
 {
     return(DxxUrl.GetFileName(urx.Uri).EndsWith(".mp4"));
 }
예제 #17
0
 public bool IsContainerList(DxxUriEx url)
 {
     return(true);
 }