Esempio n. 1
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = RecommendOption(url);
            }

            var html   = NetTools.DownloadString(url);
            var result = new List <NetTask>();

            foreach (var img in JObject.Parse((new Regex("item: ({.*})")).Match(html).Groups[1].Value)["album_images"]["images"])
            {
                var hash = img["hash"].ToString();
                var ext  = img["ext"].ToString();
                var task = NetTask.MakeDefault($"https://i.imgur.com/{hash}{ext}");
                task.SaveFile = true;
                task.Filename = $"{hash}{ext}";
                task.Format   = new ExtractorFileNameFormat {
                    Id = hash, Extension = ext, FilenameWithoutExtension = hash, Url = url
                };
                result.Add(task);
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.Search
            });
        }
Esempio n. 2
0
        public void Formatting(ExtractorModel extractor, ref List <NetTask> tasks, IExtractorOption option)
        {
            var ff = extractor.RecommendFormat(option);

            foreach (var task in tasks)
            {
                task.Filename = task.Format.Formatting(ff);
            }
        }
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = RecommendOption(url);
            }

            if (match["type"].Value == "reader")
            {
                var id = match["id"].Value;
                var article_info_url = $"https://hiyobi.me/info/{id}";
                option.PageReadCallback?.Invoke(article_info_url);
                var info_html = NetTools.DownloadString(article_info_url);
                var data      = parse_info(info_html);

                var img_file_json_url = $"https://xn--9w3b15m8vo.asia/data/json/{id}_list.json";
                option.PageReadCallback?.Invoke(img_file_json_url);
                var cookie             = "__cfduid=d53c18b351d4a54007ac583a96f4436381568466715";
                var img_file_json_task = NetTask.MakeDefault(img_file_json_url, cookie);
                var img_file_json      = NetTools.DownloadString(img_file_json_task);
                var img_urls           = JArray.Parse(img_file_json).Select(x => $"https://xn--9w3b15m8vo.asia/data/{id}/{x["name"].ToString()}").ToList();

                option.SimpleInfoCallback?.Invoke($"{data.Title}");

                var result = new List <NetTask>();
                var count  = 1;
                foreach (var img in img_urls)
                {
                    var task = NetTask.MakeDefault(img);
                    task.SaveFile = true;
                    task.Filename = img.Split('/').Last();
                    task.Cookie   = cookie;
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Id     = id,
                        Title  = data.Title,
                        Artist = data.artist != null ? data.artist[0] : "N/A",
                        Group  = data.artist != null ? data.artist[0] : "N/A",
                        FilenameWithoutExtension = count++.ToString("000"),
                        Extension = Path.GetExtension(task.Filename).Replace(".", "")
                    };
                    result.Add(task);
                }
                option.ThumbnailCallback?.Invoke(result[0]);
                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }
            else
            {
                throw new ExtractorException("'search' page not supports yet!");
            }
        }
Esempio n. 4
0
 public override string RecommendFormat(IExtractorOption option)
 {
     if (option.Type == ExtractorType.EpisodeImages)
     {
         return("%(extractor)s/%(episode)s/%(file)s.%(ext)s");
     }
     else
     {
         return("%(extractor)s/%(title)s/%(episode)s/%(file)s.%(ext)s");
     }
 }
Esempio n. 5
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var html = NetTools.DownloadString(url);

            //
            //  Extract Webtoon
            //

            if (option.Type == ExtractorType.EpisodeImages)
            {
                return(extract_episode_page(html), null);
            }
            else if (option.Type == ExtractorType.ComicIndex)
            {
                var match  = ValidUrl.Match(url).Groups;
                var max_no = Regex.Match(html, @"/webtoon/detail\.nhn\?titleId=\d+&no=(\d+)").Groups[1].Value.ToInt();
                var urls   = new List <string>();
                for (int i = 1; i <= max_no; i++)
                {
                    urls.Add($"https://comic.naver.com/webtoon/detail.nhn?titleId={match["id"]}&no={i}");
                }

                var htmls  = NetTools.DownloadStrings(urls);
                var result = new List <NetTask>();

                foreach (var shtml in htmls)
                {
                    result.AddRange(extract_episode_page(shtml));
                }

                return(result, new ExtractedInfo {
                    Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }

            return(null, null);
        }
Esempio n. 6
0
 public override void CLParse(ref IExtractorOption model, string[] args)
 {
     model = CommandLineParser.Parse(model as DCInsideExtractorOption, args);
 }
Esempio n. 7
0
 public override string RecommendFormat(IExtractorOption option)
 {
     return("%(extractor)s/%(artist)s (%(account)s)/%(file)s.%(ext)s");
 }
Esempio n. 8
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (!PixivAPI.Auth(Settings.Instance.Model.PixivSettings.Id, Settings.Instance.Model.PixivSettings.Password))
            {
                throw new ExtractorException("Authentication error! Check setting.json/PixivSetting.");
            }

            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = new PixivExtractorOption {
                    Type = ExtractorType.Works
                }
            }
            ;

            if (match[2].Value.StartsWith("member") && option.ExtractInformation == false)
            {
                var user  = PixivAPI.GetUsersAsync(match["id"].Value.ToInt()).Result;
                var works = PixivAPI.GetUsersWorksAsync(match["id"].Value.ToInt(), 1, 10000000).Result;

                option.SimpleInfoCallback?.Invoke($"{user[0].Name} ({user[0].Account})");

                option.ThumbnailCallback?.Invoke(NetTask.MakeDefault(user[0].ProfileImageUrls.Px170x170));

                var result = new List <NetTask>();

                foreach (var work in works)
                {
                    if (work.PageCount > 1)
                    {
                        ;
                    }
                    if (work.Type == null || work.Type == "illustration")
                    {
                        var task = NetTask.MakeDefault(work.ImageUrls.Large);
                        task.Filename = work.ImageUrls.Large.Split('/').Last();
                        task.SaveFile = true;
                        task.Referer  = url;
                        task.Format   = new ExtractorFileNameFormat
                        {
                            Artist  = user[0].Name,
                            Account = user[0].Account,
                            Id      = user[0].Id.Value.ToString(),
                            FilenameWithoutExtension = Path.GetFileNameWithoutExtension(work.ImageUrls.Large.Split('/').Last()),
                            Extension = Path.GetExtension(work.ImageUrls.Large.Split('/').Last()).Replace(".", "")
                        };
                        result.Add(task);
                    }
                    else if (work.Type == "ugoira")
                    {
                        var ugoira_data = PixivAPI.GetUgoiraAsync(work.Id.ToString()).Result;
                        var task        = NetTask.MakeDefault(ugoira_data.ZipUrls.Medium);
                        task.Filename = ugoira_data.ZipUrls.Medium.Split('/').Last();
                        task.SaveFile = true;
                        task.Referer  = url;
                        var pptask = new PostprocessorTask();
                        pptask.Postprocessor = new UgoiraPostprocessor {
                            Frames = ugoira_data.Frames
                        };
                        task.PostProcess = pptask;
                        task.Format      = new ExtractorFileNameFormat
                        {
                            Artist  = user[0].Name,
                            Account = user[0].Account,
                            Id      = user[0].Id.Value.ToString(),
                            FilenameWithoutExtension = Path.GetFileNameWithoutExtension(ugoira_data.ZipUrls.Medium.Split('/').Last()),
                            Extension = Path.GetExtension(ugoira_data.ZipUrls.Medium.Split('/').Last()).Replace(".", "")
                        };
                        result.Add(task);
                    }
                }

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Type = ExtractedInfo.ExtractedType.UserArtist
                });
            }
            else if (option.ExtractInformation == true)
            {
                var user = PixivAPI.GetUsersAsync(match["id"].Value.ToInt()).Result;
                return(null, null /*user*/);
            }

            return(null, null);
        }
Esempio n. 9
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var match = ValidUrl.Match(url).Groups;

            var limit = int.MaxValue;

            if ((option as TwitterExtractorOption).LimitPosts != null)
            {
                limit = (option as TwitterExtractorOption).LimitPosts[0].ToInt();
            }

            if (match["id"].Value == "hashtag")
            {
#if DEBUG && false
                var html     = NetTools.DownloadString(url);
                var search   = HttpUtility.UrlDecode(match["search"].Value);
                var position = Regex.Match(html, @"data-max-position""(.*?)""").Groups[1].Value;

                var document = new HtmlDocument();
                document.LoadHtml(html);
                var node   = document.DocumentNode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[2]/ol[1]");
                var tweets = node.SelectNodes("./li[@data-item-type='tweet']");
                var urls   = new List <string>();

                foreach (var tweet in tweets)
                {
                    urls.AddRange(parse_tweet_hashtag(option as TwitterExtractorOption, tweet));
                }

                while (true)
                {
                    try
                    {
                        var next = seach_query(option as TwitterExtractorOption, search, position);
                        position = JToken.Parse(next)["min_position"].ToString();
                        var html2     = JToken.Parse(next)["items_html"].ToString();
                        var document2 = new HtmlDocument();
                        document2.LoadHtml(html2);
                        var tweets2 = node.SelectNodes("./li[@data-item-type='tweet']");
                        foreach (var tweet in tweets2)
                        {
                            urls.AddRange(parse_tweet_hashtag(option as TwitterExtractorOption, tweet));
                        }
                    }
                    catch
                    {
                        break;
                    }
                }

                var result = new List <NetTask>();
                foreach (var surl in urls)
                {
                    var task = NetTask.MakeDefault(surl);
                    task.SaveFile = true;

                    var fn = surl.Split('/').Last();
                    task.Filename = fn;
                    task.Format   = new ExtractorFileNameFormat
                    {
                        FilenameWithoutExtension = Path.GetFileNameWithoutExtension(fn),
                        Extension = Path.GetExtension(fn).Replace(".", ""),
                        User      = search
                    };

                    result.Add(task);
                }
                return(new Tuple <List <NetTask>, object>(result, null));
#endif
                throw new ExtractorException("'hashtag' is not support yet!");
            }
            else
            {
                var name           = match["id"].Value;
                var html           = NetTools.DownloadString($"https://twitter.com/{name}/media");
                var min_position   = Regex.Match(html, @"data-min-position=""(.*?)""").Groups[1].Value;
                var node           = html.ToHtmlNode();
                var tweets         = node.SelectNodes("./html[1]/body[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[2]/div[2]/div[1]/div[2]/ol[1]/li[@data-item-type='tweet']");
                var urls           = new List <string>();
                var user           = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/h1[1]/a[1]").InnerText;
                var videos         = new List <(string, List <string>)>();
                var post_count     = tweets.Count;
                var last_url_count = 0;

                option.SimpleInfoCallback?.Invoke($"{user} ({name})");

                foreach (var tweet in tweets)
                {
                    urls.AddRange(parse_tweet_hashtag(option as TwitterExtractorOption, tweet, videos));
                }

                while (post_count < limit)
                {
                    var next    = profile_query(option as TwitterExtractorOption, name, min_position);
                    var html2   = JToken.Parse(next)["items_html"].ToString();
                    var tweets2 = html2.ToHtmlNode().SelectNodes("./li[@data-item-type='tweet']");
                    if (tweets2 == null)
                    {
                        break;
                    }
                    foreach (var tweet in tweets2)
                    {
                        urls.AddRange(parse_tweet_hashtag(option as TwitterExtractorOption, tweet, videos));
                    }
                    option.PostStatus?.Invoke(urls.Count - last_url_count);
                    last_url_count = urls.Count;
                    post_count    += tweets2.Count;
                    min_position   = JToken.Parse(next)["min_position"].ToString();
                    if (!(bool)JToken.Parse(next)["has_more_items"])
                    {
                        break;
                    }
                    Thread.Sleep(3000);
                }

                var result = new List <NetTask>();
                foreach (var surl in urls)
                {
                    var task = NetTask.MakeDefault(surl);
                    task.SaveFile = true;

                    var fn = surl.Split('/').Last();
                    task.Filename = fn;
                    task.Format   = new ExtractorFileNameFormat
                    {
                        FilenameWithoutExtension = Path.GetFileNameWithoutExtension(fn),
                        Extension = Path.GetExtension(fn).Replace(".", ""),
                        Account   = name,
                        User      = user,
                    };

                    result.Add(task);
                }

                foreach (var video in videos)
                {
                    var count = 0;
                    foreach (var ts in video.Item2)
                    {
                        var task = NetTask.MakeDefault(ts);
                        task.SaveFile = true;

                        var fn = ts.Split('/').Last();
                        task.Filename = fn;
                        task.Format   = new ExtractorFileNameFormat
                        {
                            FilenameWithoutExtension = video.Item1 + "/" + count++.ToString("000"),
                            Extension = Path.GetExtension(fn).Replace(".", ""),
                            Account   = name,
                            User      = user,
                        };

                        result.Add(task);
                    }
                }

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Type = ExtractedInfo.ExtractedType.UserArtist
                });
            }
        }
Esempio n. 10
0
 public virtual void CLParse(ref IExtractorOption model, string[] args)
 {
 }
Esempio n. 11
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = new DanbooruExtractorOption {
                    Type = ExtractorType.Images
                }
            }
            ;

            var tags   = match["search"].Value;
            var result = new List <NetTask>();
            var page   = 1;

            option.SimpleInfoCallback?.Invoke($"{tags}");

            if ((option as DanbooruExtractorOption).StartPage != null)
            {
                page = (option as DanbooruExtractorOption).StartPage[0].ToInt();
            }

            var end_page = int.MaxValue;

            if ((option as DanbooruExtractorOption).EndPage != null)
            {
                end_page = (option as DanbooruExtractorOption).EndPage[0].ToInt();
            }

            var already_thumbnail = false;

            while (true)
            {
                var durl = $"https://danbooru.donmai.us/posts?tags={tags}&page=" + page.ToString();

                option.PageReadCallback?.Invoke(durl);

                var html = NetTools.DownloadString(durl);
                var node = html.ToHtmlNode().SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[1]/section[1]/div[3]/div[1]/article");

                if (node == null)
                {
                    break;
                }

                var ds = new List <string>();
                foreach (var sub in node)
                {
                    ds.Add("https://danbooru.donmai.us" + sub.SelectSingleNode("./a").GetAttributeValue("href", ""));
                }

                var htmls = NetTools.DownloadStrings(ds);

                //foreach (var shtml in htmls)
                for (int i = 0; i < htmls.Count; i++)
                {
                    var snode = htmls[i].ToHtmlNode();

                    var img_url = "";
                    // Just one banner
                    if (snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/div[1]/span[1]/a[1]")?.GetAttributeValue("id", "") == "image-resize-link")
                    {
                        img_url = snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/div[1]/span[1]/a[1]").GetAttributeValue("href", "");
                    }
                    // Two banner
                    else if (snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/div[2]/span[1]/a[1]")?.GetAttributeValue("id", "") == "image-resize-link")
                    {
                        img_url = snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/div[2]/span[1]/a[1]").GetAttributeValue("href", "");
                    }
                    // Three or none banner
                    else if (snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/section[1]/img[1]") != null)
                    {
                        img_url = snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/section[1]/img[1]").GetAttributeValue("src", "");
                    }
                    // Video URL
                    else if (snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/section[1]/p[1]/a[1]") != null)
                    {
                        if ((option as DanbooruExtractorOption).ExcludeVideo)
                        {
                            continue;
                        }
                        img_url = snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/section[1]/p[1]/a[1]").GetAttributeValue("href", "");
                    }
                    else
                    {
                        // ?
                        Log.Logs.Instance.PushError("[DanbooruExtractor] Cannot find html format! " + ds[i]);
                    }

                    var task = NetTask.MakeDefault(img_url);
                    task.SaveFile = true;
                    task.Filename = img_url.Split('/').Last();
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Search = tags,
                        FilenameWithoutExtension = Path.GetFileNameWithoutExtension(task.Filename),
                        Extension = Path.GetExtension(task.Filename).Replace(".", "")
                    };
                    result.Add(task);
                }

                if (!already_thumbnail)
                {
                    option.ThumbnailCallback?.Invoke(result[0]);
                    already_thumbnail = true;
                }

                page += 1;

                if (page > end_page)
                {
                    break;
                }
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));

            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.UserArtist
            });
        }
    }
Esempio n. 12
0
 public abstract (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option);
Esempio n. 13
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var html  = NetTools.DownloadString(url);
            var match = ValidUrl.Match(url).Groups;

            var node = html.ToHtmlNode();

            var title  = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/h1[1]").InnerText.Trim();
            var genre  = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[2]/div[2]/h3[1]/a[1]").InnerText.Trim();
            var artist = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[2]/div[3]/h3[1]/a[1]").InnerText.Trim();

            var sub_urls   = new List <string>();
            var sub_titles = new List <string>();

            foreach (var episode in node.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div"))
            {
                var tag_a = episode.SelectSingleNode("./div[2]/h2[1]/a[1]");
                sub_urls.Add(tag_a.GetAttributeValue("href", ""));
                sub_titles.Add(tag_a.InnerText.Trim());
            }

            option.SimpleInfoCallback?.Invoke(title);
            option.ThumbnailCallback?.Invoke(NetTask.MakeDefault(
                                                 match["host"].Value + node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[1]/a[1]/img[1]").GetAttributeValue("src", "")));

            option.ProgressMax?.Invoke(sub_urls.Count);

            var sub_htmls = NetTools.DownloadStrings(sub_urls, "", () =>
            {
                option.PostStatus?.Invoke(1);
            });

            var result = new List <NetTask>();

            for (int i = 0; i < sub_urls.Count; i++)
            {
                var snode = sub_htmls[i].ToHtmlNode();
                int count = 1;
                foreach (var img in snode.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[2]/ul[1]//li/div[1]/img[1]"))
                {
                    var img_src = img.GetAttributeValue("data-src", "");
                    if (string.IsNullOrWhiteSpace(img_src))
                    {
                        img_src = img.GetAttributeValue("src", "");
                    }
                    var task = NetTask.MakeDefault(HttpUtility.HtmlDecode(img_src));
                    task.SaveFile = true;
                    task.Filename = count.ToString("000") + ".jpg";
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Title   = title,
                        Episode = sub_titles[i],
                        FilenameWithoutExtension = count.ToString("000"),
                        Extension = Path.GetExtension(task.Filename).Replace(".", ""),
                    };
                    result.Add(task);
                    count++;
                }
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.WorksComic
            });
        }
Esempio n. 14
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var limit = int.MaxValue;

            if ((option as InstagramExtractorOption).LimitPosts != null)
            {
                limit = (option as InstagramExtractorOption).LimitPosts[0].ToInt();
            }

            var html = NetTools.DownloadString(url);
            var user = InstaApi.get_user(option as InstagramExtractorOption, html);
            var urls = new List <string>();

            urls.AddRange(user.FirstPost.DisplayUrls);
            option.PostStatus?.Invoke(user.FirstPost.PostCount);

            option.SimpleInfoCallback?.Invoke($"{user.FullName} ({user.UserName})");

            var count = 0;
            var pp    = user.FirstPost;

            while (pp.HasNext)
            {
                if (count >= limit)
                {
                    break;
                }

                var posts = InstaApi.query_next(option as InstagramExtractorOption, InstaApi.posts_query_hash(), user.UserId, "50", pp.EndCursor);
                urls.AddRange(posts.DisplayUrls);
                option.PostStatus?.Invoke(posts.PostCount);
                count += 50;
                pp     = posts;
            }

            var result = new List <NetTask>();

            foreach (var surl in urls)
            {
                var task = NetTask.MakeDefault(surl);
                task.SaveFile = true;

                var fn = surl.Split('?')[0].Split('/').Last();
                task.Filename = fn;
                task.Format   = new ExtractorFileNameFormat
                {
                    FilenameWithoutExtension = Path.GetFileNameWithoutExtension(fn),
                    Extension = Path.GetExtension(fn).Replace(".", ""),
                    User      = user.FullName,
                    Account   = user.UserName
                };

                result.Add(task);
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.UserArtist
            });
        }
Esempio n. 15
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var match = ValidUrl.Match(url).Groups;
            var host  = "https://" + match["host"];
            var html  = NetTools.DownloadString(url);
            var node  = html.ToHtmlNode();

            var title     = node.SelectSingleNode("/html[1]/body[1]/div[2]/div[1]/div[1]/div[3]/div[1]/table[2]/tbody[1]/tr[2]/td[1]/table[1]/tr[1]/td[1]").InnerText;
            var sub_datas = node.SelectNodes("/html[1]/body[1]/div[2]/div[1]/div[1]/div[3]/div[1]/div[1]/form[1]/table[1]//tr/td[2]");

            option.SimpleInfoCallback?.Invoke($"{title}");

            var sub_urls   = new List <string>();
            var sub_titles = new List <string>();

            foreach (var sub_data in sub_datas)
            {
                sub_urls.Add(host + sub_data.GetAttributeValue("data-role", ""));
                sub_titles.Add(sub_data.InnerText.Trim());
            }

            var htmls = NetTools.DownloadStrings(sub_urls);

            var result = new List <NetTask>();

            for (int i = 0; i < htmls.Count; i++)
            {
                var    base64encoded = Regex.Match(htmls[i], "var toon_img = '(.*)'").Groups[1].Value;
                string rhtml;
                Strings.TryParseBase64(base64encoded, out rhtml);

                var snode = rhtml.ToHtmlNode();

                int count = 1;
                foreach (var img in snode.SelectNodes("/img"))
                {
                    var task = NetTask.MakeDefault(host + img.GetAttributeValue("src", ""));
                    task.SaveFile = true;
                    task.Filename = task.Url.Split('/').Last();
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Title   = title,
                        Episode = sub_titles[i],
                        FilenameWithoutExtension = count.ToString("000"),
                        Extension = Path.GetExtension(task.Filename).Replace(".", "")
                    };
                    result.Add(task);
                    count++;
                }
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.WorksComic
            });
        }
Esempio n. 16
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = RecommendOption(url);
            }

            if (option.Type == ExtractorType.Images)
            {
                var sinfo    = new ExtractedInfo.WorksComic();
                var imgs_url = $"https://ltn.hitomi.la/galleries/{match["id"].Value}.js";
                option.PageReadCallback?.Invoke($"https://ltn.hitomi.la/galleryblock/{match["id"]}.html");
                option.PageReadCallback?.Invoke(url);
                option.PageReadCallback?.Invoke(imgs_url);
                var urls = new List <string> {
                    $"https://ltn.hitomi.la/galleryblock/{match["id"]}.html",
                    url,
                    imgs_url
                };

                var strings = NetTools.DownloadStrings(urls);

                if (string.IsNullOrEmpty(strings[0]) || string.IsNullOrEmpty(strings[1]) || string.IsNullOrEmpty(strings[2]))
                {
                    return(null, null);
                }

                var data1 = ParseGalleryBlock(strings[0]);
                var data2 = ParseGallery(strings[1]);
                var imgs  = strings[2];

                option.SimpleInfoCallback?.Invoke($"[{data1.Magic}] {data1.Title}");

                // download.js
                var number_of_frontends = 3;
                var subdomain           = Convert.ToChar(97 + (Convert.ToInt32(match["id"].Value.Last()) % number_of_frontends));
                if (match["id"].Value.Last() == '0')
                {
                    subdomain = 'a';
                }

                var arr      = JArray.Parse(imgs.Substring(imgs.IndexOf('[')));
                var img_urls = new List <string>();
                foreach (var obj in arr)
                {
                    if (obj.Value <int>("haswebp") == 0)
                    {
                        img_urls.Add($"https://{subdomain}a.hitomi.la/galleries/{match["id"].Value}/{obj.Value<string>("name")}");
                    }
                    else
                    {
                        img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{match["id"].Value}/{obj.Value<string>("name")}.webp");
                    }
                }

                var result = new List <NetTask>();
                foreach (var img in img_urls)
                {
                    var task = NetTask.MakeDefault(img);
                    task.SaveFile = true;
                    task.Filename = img.Split('/').Last();
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Title      = data1.Title,
                        Id         = data1.Magic,
                        Language   = data1.Language,
                        UploadDate = data1.Posted,
                        FilenameWithoutExtension = Path.GetFileNameWithoutExtension(img.Split('/').Last()),
                        Extension = Path.GetExtension(img.Split('/').Last()).Replace(".", "")
                    };

                    if (data1.artist != null)
                    {
                        task.Format.Artist = data1.artist[0];
                    }
                    else
                    {
                        task.Format.Artist = "N/A";
                    }

                    if (data1.parody != null)
                    {
                        task.Format.Series = data1.parody[0];
                    }
                    else
                    {
                        task.Format.Series = "N/A";
                    }

                    if (data2.group != null)
                    {
                        task.Format.Group = data2.group[0];
                    }
                    else
                    {
                        task.Format.Group = "N/A";
                    }

                    if (data2.character != null)
                    {
                        task.Format.Character = data2.character[0];
                    }
                    else
                    {
                        task.Format.Character = "N/A";
                    }

                    if (task.Format.Artist == "N/A" && task.Format.Group != "N/A")
                    {
                        task.Format.Artist = task.Format.Group;
                    }

                    result.Add(task);
                }

                option.ThumbnailCallback?.Invoke(result[0]);

                sinfo.Thumbnail   = result[0];
                sinfo.URL         = url;
                sinfo.Title       = data1.Title;
                sinfo.Author      = data1.artist?.ToArray();
                sinfo.AuthorGroup = data2.group?.ToArray();
                sinfo.ShortInfo   = $"[{data1.Magic}] {data1.Title}";
                sinfo.Tags        = data1.Tags?.ToArray();
                sinfo.Characters  = data2.character?.ToArray();
                sinfo.Language    = data1.Language;
                sinfo.Parodies    = data1.parody?.ToArray();

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Info = sinfo, Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }

            return(null, null);
        }
Esempio n. 17
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = RecommendOption(url);
            }

            if (option.Type == ExtractorType.Images)
            {
                var id = match["id"].Value;

                // Handle Redirect
                var string3 = NetTools.DownloadString(url);
RETRY_DOWNLOAD1:
                if (string.IsNullOrEmpty(string3))
                {
                    return(null, null);
                }
                if (string3.ToHtmlNode().SelectSingleNode("//title").InnerText == "Redirect")
                {
                    id      = string3.ToHtmlNode().SelectSingleNode("//a").GetAttributeValue("href", "").Split('/', '-').Last().Split('.')[0];
                    string3 = NetTools.DownloadString(string3.ToHtmlNode().SelectSingleNode("//a").GetAttributeValue("href", ""));
                    goto RETRY_DOWNLOAD1;
                }

                var sinfo    = new ExtractedInfo.WorksComic();
                var imgs_url = $"https://ltn.hitomi.la/galleries/{id}.js";
                option.PageReadCallback?.Invoke($"https://ltn.hitomi.la/galleryblock/{id}.html");
                option.PageReadCallback?.Invoke(url);
                option.PageReadCallback?.Invoke(imgs_url);
                var urls = new List <string> {
                    $"https://ltn.hitomi.la/galleryblock/{id}.html",
                    imgs_url
                };

                var strings = NetTools.DownloadStrings(urls);

                if (string.IsNullOrEmpty(strings[0]) || string.IsNullOrEmpty(strings[1]))
                {
                    return(null, null);
                }

                var data1 = ParseGalleryBlock(strings[0]);
                var imgs  = strings[1];

                var string2 = NetTools.DownloadString($"https://hitomi.la{data1.Magic}");
                if (string.IsNullOrEmpty(string2))
                {
                    return(null, null);
                }
                var data2 = ParseGallery(string2);

                option.SimpleInfoCallback?.Invoke($"[{id}] {data1.Title}");

                // download.js
                var number_of_frontends = 3;
                var subdomain           = Convert.ToChar(97 + (Convert.ToInt32(id.Last()) % number_of_frontends));
                if (id.Last() == '0')
                {
                    subdomain = 'a';
                }

                var arr      = JToken.Parse(imgs.Substring(imgs.IndexOf('=') + 1))["files"];
                var img_urls = new List <string>();
                foreach (var obj in (JArray)arr)
                {
                    var hash = obj.Value <string>("hash");
                    if (obj.Value <int>("haswebp") == 0 || hash == null)
                    {
                        //img_urls.Add($"https://{subdomain}a.hitomi.la/galleries/{id}/{obj.Value<string>("name")}");
                        var postfix = hash.Substring(hash.Length - 3);
                        img_urls.Add($"https://{subdomain}a.hitomi.la/images/{postfix[2]}/{postfix[0]}{postfix[1]}/{hash}.{obj.Value<string>("name").Split('.').Last()}");
                    }
                    else if (hash == "")
                    {
                        img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{obj.Value<string>("name")}.webp");
                    }
                    else if (hash.Length < 3)
                    {
                        img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{hash}.webp");
                    }
                    else
                    {
                        var postfix = hash.Substring(hash.Length - 3);
                        img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{postfix[2]}/{postfix[0]}{postfix[1]}/{hash}.webp");
                    }
                }

                var result   = new List <NetTask>();
                var ordering = 1;
                foreach (var img in img_urls)
                {
                    var filename = Path.GetFileNameWithoutExtension(img.Split('/').Last());
                    if (!(option as HitomiExtractorOption).RealFilename)
                    {
                        filename = ordering++.ToString("000");
                    }

                    var task = NetTask.MakeDefault(img);
                    task.SaveFile = true;
                    task.Filename = img.Split('/').Last();
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Title      = data1.Title,
                        Id         = id,
                        Language   = data1.Language,
                        UploadDate = data1.Posted,
                        FilenameWithoutExtension = filename,
                        Extension = Path.GetExtension(img.Split('/').Last()).Replace(".", "")
                    };

                    if (data1.artist != null)
                    {
                        task.Format.Artist = data1.artist[0];
                    }
                    else
                    {
                        task.Format.Artist = "NA";
                    }

                    if (data1.parody != null)
                    {
                        task.Format.Series = data1.parody[0];
                    }
                    else
                    {
                        task.Format.Series = "NA";
                    }

                    if (data2.group != null)
                    {
                        task.Format.Group = data2.group[0];
                    }
                    else
                    {
                        task.Format.Group = "NA";
                    }

                    if (data2.character != null)
                    {
                        task.Format.Character = data2.character[0];
                    }
                    else
                    {
                        task.Format.Character = "NA";
                    }

                    if (task.Format.Artist == "NA" && task.Format.Group != "NA")
                    {
                        task.Format.Artist = task.Format.Group;
                    }

                    result.Add(task);
                }

                option.ThumbnailCallback?.Invoke(result[0]);

                sinfo.Thumbnail   = result[0];
                sinfo.URL         = url;
                sinfo.Title       = data1.Title;
                sinfo.Author      = data1.artist?.ToArray();
                sinfo.AuthorGroup = data2.group?.ToArray();
                sinfo.ShortInfo   = $"[{id}] {data1.Title}";
                sinfo.Tags        = data1.Tags?.ToArray();
                sinfo.Characters  = data2.character?.ToArray();
                sinfo.Language    = data1.Language;
                sinfo.Parodies    = data1.parody?.ToArray();

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Info = sinfo, Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }

            return(null, null);
        }
Esempio n. 18
0
 public override string RecommendFormat(IExtractorOption option)
 {
     return("%(extractor)s/%(artist)s/[%(id)s] %(title)s/%(file)s.%(ext)s");
 }
Esempio n. 19
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = new GelbooruExtractorOption {
                    Type = ExtractorType.Images
                }
            }
            ;

            var tags   = match[1].Value;
            var result = new List <NetTask>();
            var page   = 0;

            if ((option as GelbooruExtractorOption).StartPage != null)
            {
                page = (option as GelbooruExtractorOption).StartPage[0].ToInt();
            }

            var end_page = int.MaxValue;

            if ((option as GelbooruExtractorOption).EndPage != null)
            {
                end_page = (option as GelbooruExtractorOption).EndPage[0].ToInt();
            }

            option.SimpleInfoCallback?.Invoke($"{HttpUtility.UrlDecode(tags)}");

            var post_thumbnail = false;

            while (true)
            {
                var durl = "https://gelbooru.com/index.php?page=dapi&s=post&q=index&limit=100&tags=" + tags + "&pid=" + page.ToString();

                option.PageReadCallback?.Invoke(durl);

                var data = NetTools.DownloadString(durl);

                var document = new HtmlDocument();
                document.LoadHtml(data);
                var nodes = document.DocumentNode.SelectNodes("/posts[1]/post");

                if (nodes == null || nodes.Count == 0)
                {
                    break;
                }

                foreach (var node in nodes)
                {
                    var imgurl = node.GetAttributeValue("file_url", "");
                    var task   = NetTask.MakeDefault(imgurl);
                    task.SaveFile = true;
                    task.Filename = imgurl.Split('/').Last();
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Search = HttpUtility.UrlDecode(tags),
                        FilenameWithoutExtension = Path.GetFileNameWithoutExtension(imgurl.Split('/').Last()),
                        Extension = Path.GetExtension(imgurl.Split('/').Last()).Replace(".", "")
                    };
                    result.Add(task);
                }

                if (!post_thumbnail)
                {
                    option.ThumbnailCallback?.Invoke(result[0]);
                    post_thumbnail = true;
                }

                option.PostStatus?.Invoke(nodes.Count);

                page += 1;

                if (page > end_page)
                {
                    break;
                }
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.Search
            });
        }
    }
Esempio n. 20
0
 public override string RecommendFormat(IExtractorOption option)
 {
     return("%(extractor)s/%(gallery)s/%(title)s/%(file)s.%(ext)s");
 }
Esempio n. 21
0
 public abstract string RecommendFormat(IExtractorOption option);
Esempio n. 22
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = new DCInsideExtractorOption {
                    Type = ExtractorType.Images
                }
            }
            ;

            if ((option as DCInsideExtractorOption).OnlyRecommend)
            {
                url += "&exception_mode=recommend";
            }

            var match  = ValidUrl.Match(url).Groups;
            var result = new List <NetTask>();
            var html   = NetTools.DownloadString(url);

            if (html == null)
            {
                return(result, null);
            }

            if (match[1].Value == "gall")
            {
                try
                {
                    //
                    //  Parse article
                    //

                    if (match[3].Value == "view")
                    {
                        var article = ParseBoardView(html, match[2].Value != "");

                        if (option.Type == ExtractorType.Images && option.ExtractInformation == false)
                        {
                            if (article.ImagesLink == null || article.ImagesLink.Count == 0)
                            {
                                throw new Exception("Nothing to download!");
                            }

                            option.SimpleInfoCallback?.Invoke($"{article.Title}");

                            for (int i = 0; i < article.ImagesLink.Count; i++)
                            {
                                var task = NetTask.MakeDefault(article.ImagesLink[i]);
                                task.Filename = article.FilesName[i];
                                task.SaveFile = true;
                                task.Referer  = url;
                                task.Format   = new ExtractorFileNameFormat
                                {
                                    Id      = article.Id,
                                    Gallery = article.GalleryName,
                                    Title   = article.Title,
                                    FilenameWithoutExtension = (i + 1).ToString("000"),
                                    Extension = Path.GetExtension(article.FilesName[i]).Replace(".", ""),
                                };
                                result.Add(task);
                            }

                            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                            return(result, null /*article*/);
                        }
                        else if (option.Type == ExtractorType.ArticleInformation || option.ExtractInformation == true)
                        {
                            return(null, null /*article*/);
                        }
                        else if (option.Type == ExtractorType.Comments)
                        {
                            var cc       = new List <DCComment>();
                            var comments = GetComments(article, "1");
                            cc.Add(comments);

                            //
                            //  To avoid server blocks
                            //

                            Thread.Sleep(2000);

                            int tcount = comments.total_cnt;
                            int count  = 100;

                            for (int i = 2; count < tcount; count += 100)
                            {
                                comments = GetComments(article, i.ToString());
                                if (comments.comment_cnt == 0)
                                {
                                    break;
                                }
                                count += comments.comment_cnt;
                                cc.Add(comments);
                                Thread.Sleep(2000);
                            }

                            return(null, null /*GetComments(article, "0")*/);
                        }
                        else
                        {
                            throw new Exception("You cannot do that with this URL. " + url);
                        }
                    }

                    //
                    //  Parse Articles List
                    //

                    else if (match[3].Value == "lists")
                    {
                        DCGallery gallery;

                        if (match[2].Value == "")
                        {
                            gallery = ParseGallery(html);
                        }
                        else
                        {
                            gallery = ParseMinorGallery(html);
                        }

                        if (option.Type == ExtractorType.GalleryInformation || option.ExtractInformation == true)
                        {
                            return(null, null /*gallery*/);
                        }
                        else
                        {
                            throw new Exception("You cannot do that with this URL." + url);
                        }
                    }
                }
                catch (Exception e)
                {
                    Log.Logs.Instance.PushError("[DCInsideExtractor] Extract error - " + option.Type.ToString() + " - " + e.Message + "\r\n" + e.StackTrace);
                }
            }
            else
            {
                // Not support mobile page.
                throw new ExtractorException("[DCInside Extractor] Not support mobile page yet.");
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.Community
            });
        }
Esempio n. 23
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var html       = NetTools.DownloadString(NetTask.MakeDefault(url, cookie: cookies[0]));
            var data       = EHentaiExtractor.ParseArticleData(html, @"https://exhentai.org/.*?(?=\))");
            var pages      = EHentaiExtractor.GetPagesUri(html);
            var image_urls = new List <string>();

            option.SimpleInfoCallback?.Invoke($"{data.Title}");

            if (option == null)
            {
                option = RecommendOption(url);
            }

            if (option.ExtractInformation)
            {
                return(null, null /*data*/);
            }

            //
            //  Extract Image Url-Url
            //

            image_urls.AddRange(EHentaiExtractor.GetImagesUri(html));

            for (int i = 1; i < pages.Length; i++)
            {
                (option as EHentaiExtractorOption).PageReadCallback?.Invoke(pages[i]);

                var page = NetTools.DownloadString(NetTask.MakeDefault(pages[i], cookie: cookies[0]));
                image_urls.AddRange(EHentaiExtractor.GetImagesUri(page));
            }

            //
            //  Extract Image Url
            //

            var result = new NetTask[image_urls.Count];

            var artist = "N/A";
            var group  = "N/A";
            var series = "N/A";

            if (data.artist != null && data.artist.Length > 0)
            {
                artist = data.artist[0];
            }
            if (data.group != null && data.group.Length > 0)
            {
                group = data.group[0];
            }
            if (data.parody != null && data.parody.Length > 0)
            {
                series = data.parody[0];
            }

            if (artist == "N/A" && group != "N/A")
            {
                artist = group;
            }

            for (int i = 0; i < image_urls.Count; i++)
            {
                var html2 = NetTools.DownloadString(NetTask.MakeDefault(image_urls[i], cookies[0]));
                var durl  = EHentaiExtractor.GetImagesAddress(html2);
                var task  = NetTask.MakeDefault(durl, cookies[0]);
                task.SaveFile = true;
                task.Filename = durl.Split('/').Last();
                task.Format   = new ExtractorFileNameFormat
                {
                    Title = data.Title,
                    FilenameWithoutExtension = Path.GetFileNameWithoutExtension(task.Filename),
                    Extension     = Path.GetExtension(task.Filename).Replace(".", ""),
                    OriginalTitle = data.SubTitle,
                    Artist        = artist,
                    Group         = group,
                    Series        = series
                };
                result[i] = task;
                if (i == 0)
                {
                    option.ThumbnailCallback?.Invoke(task);
                }
            }

            var result_list = result.ToList();

            result_list.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result_list, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.WorksComic
            });
        }
Esempio n. 24
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var html  = NetTools.DownloadString(url);
            var match = ValidUrl.Match(url).Groups;

            var document = new HtmlDocument();

            document.LoadHtml(html);
            var node = document.DocumentNode;

            if (option.Type == ExtractorType.EpisodeImages)
            {
                var images = get_board_images(html);
                var title  = node.SelectSingleNode("/html[1]/head[1]/title[1]").InnerText;

                var result = new List <NetTask>();
                int count  = 1;
                foreach (var img in images)
                {
                    var task = NetTask.MakeDefault(img);
                    task.SaveFile = true;
                    task.Filename = count.ToString("000") + Path.GetExtension(img.Split('/').Last());
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Episode = title,
                        FilenameWithoutExtension = count.ToString("000"),
                        Extension = Path.GetExtension(task.Filename).Replace(".", "")
                    };
                    result.Add(task);
                    count++;
                }

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, null);
            }
            else if (option.Type == ExtractorType.Works)
            {
                var title      = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]").InnerText;
                var sub_urls   = new List <string>();
                var sub_titles = new List <string>();

                option.SimpleInfoCallback?.Invoke($"{title}");

                option.ThumbnailCallback?.Invoke(NetTask.MakeDefault(
                                                     Regex.Match(node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]").GetAttributeValue("style", ""), @"(https?://.*?)\)").Groups[1].Value));

                foreach (var item in node.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div"))
                {
                    sub_urls.Add(match["host"] + item.SelectSingleNode("./a[1]").GetAttributeValue("href", ""));
                    sub_titles.Add(item.SelectSingleNode("./a[1]/div[1]").MyText());
                }

                option.ProgressMax?.Invoke(sub_urls.Count);

                var htmls = NetTools.DownloadStrings(sub_urls, "PHPSESSID=" + Externals.ManamoaPHPSESSID, () =>
                {
                    option.PostStatus?.Invoke(1);
                });

                var result = new List <NetTask>();
                for (int i = 0; i < sub_urls.Count; i++)
                {
                    try
                    {
                        var images = get_board_images(htmls[i]);
                        int count  = 1;
                        foreach (var img in images)
                        {
                            var task = NetTask.MakeDefault(img);
                            task.SaveFile = true;
                            task.Filename = count.ToString("000") + Path.GetExtension(img.Split('/').Last());
                            task.Format   = new ExtractorFileNameFormat
                            {
                                Title   = title,
                                Episode = sub_titles[i],
                                FilenameWithoutExtension = count.ToString("000"),
                                Extension = Path.GetExtension(task.Filename).Replace(".", "")
                            };
                            result.Add(task);
                            count++;
                        }
                    }
                    catch (Exception e)
                    {
                        ;
                    }
                }

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }

            return(null, null);
        }
Esempio n. 25
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var html       = NetTools.DownloadString(url);
            var data       = ParseArticleData(html);
            var pages      = GetPagesUri(html);
            var image_urls = new List <string>();

            if (option == null)
            {
                option = RecommendOption(url);
            }

            option.SimpleInfoCallback?.Invoke($"{data.Title}");

            if (option.ExtractInformation)
            {
                return(null, null /*data*/);
            }

            //
            //  Extract Image Url-Url
            //

            image_urls.AddRange(GetImagesUri(html));

            for (int i = 1; i < pages.Length; i++)
            {
                (option as EHentaiExtractorOption).PageReadCallback?.Invoke(pages[i]);

                var page = NetTools.DownloadString(pages[i]);
                image_urls.AddRange(GetImagesUri(page));
            }

            //
            //  Extract Image Url
            //

            var result = new NetTask[image_urls.Count];
            var count  = image_urls.Count;
            var wait   = new ManualResetEvent(false);

            var artist = "N/A";
            var group  = "N/A";
            var series = "N/A";

            if (data.artist != null && data.artist.Length > 0)
            {
                artist = data.artist[0];
            }
            if (data.group != null && data.group.Length > 0)
            {
                group = data.group[0];
            }
            if (data.parody != null && data.parody.Length > 0)
            {
                series = data.parody[0];
            }

            if (artist == "N/A" && group != "N/A")
            {
                artist = group;
            }

            for (int i = 0; i < image_urls.Count; i++)
            {
                var task = NetTask.MakeDefault(image_urls[i]);
                var j    = i;

                task.Priority = new NetPriority {
                    Type = NetPriorityType.Trivial, TaskPriority = i
                };
                task.DownloadString         = true;
                task.CompleteCallbackString = (string str) =>
                {
                    var durl = GetImagesAddress(str);
                    var tt   = NetTask.MakeDefault(durl);
                    tt.SaveFile = true;
                    tt.Filename = durl.Split('/').Last();
                    tt.Format   = new ExtractorFileNameFormat
                    {
                        Title = data.Title,
                        FilenameWithoutExtension = Path.GetFileNameWithoutExtension(tt.Filename),
                        Extension     = Path.GetExtension(tt.Filename).Replace(".", ""),
                        OriginalTitle = data.SubTitle,
                        Artist        = artist,
                        Group         = group,
                        Series        = series
                    };
                    result[j] = tt;
                    if (Interlocked.Decrement(ref count) == 0)
                    {
                        wait.Set();
                    }
                };

                AppProvider.Scheduler.Add(task);
            }

            wait.WaitOne();

            option.ThumbnailCallback?.Invoke(result[0]);

            var result_list = result.ToList();

            result_list.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result_list, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.WorksComic
            });
        }