public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); var result = new List <NetTask>(); foreach (var img in JObject.Parse((new Regex("item: ({.*})")).Match(html).Groups[1].Value)["album_images"]["images"]) { var hash = img["hash"].ToString(); var ext = img["ext"].ToString(); var task = NetTask.MakeDefault($"https://i.imgur.com/{hash}{ext}"); task.SaveFile = true; task.Filename = $"{hash}{ext}"; task.Format = new ExtractorFileNameFormat { Id = hash, Extension = ext, FilenameWithoutExtension = hash, Url = url }; result.Add(task); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.Search }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } if (match["type"].Value == "reader") { var id = match["id"].Value; var article_info_url = $"https://hiyobi.me/info/{id}"; option.PageReadCallback?.Invoke(article_info_url); var info_html = NetTools.DownloadString(article_info_url); var data = parse_info(info_html); var img_file_json_url = $"https://xn--9w3b15m8vo.asia/data/json/{id}_list.json"; option.PageReadCallback?.Invoke(img_file_json_url); var cookie = "__cfduid=d53c18b351d4a54007ac583a96f4436381568466715"; var img_file_json_task = NetTask.MakeDefault(img_file_json_url, cookie); var img_file_json = NetTools.DownloadString(img_file_json_task); var img_urls = JArray.Parse(img_file_json).Select(x => $"https://xn--9w3b15m8vo.asia/data/{id}/{x["name"].ToString()}").ToList(); option.SimpleInfoCallback?.Invoke($"{data.Title}"); var result = new List <NetTask>(); var count = 1; foreach (var img in img_urls) { var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = img.Split('/').Last(); task.Cookie = cookie; task.Format = new ExtractorFileNameFormat { Id = id, Title = data.Title, Artist = data.artist != null ? data.artist[0] : "N/A", Group = data.artist != null ? data.artist[0] : "N/A", FilenameWithoutExtension = count++.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); } option.ThumbnailCallback?.Invoke(result[0]); result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); } else { throw new ExtractorException("'search' page not supports yet!"); } }
public override string RecommendFormat(IExtractorOption option) { if (option.Type == ExtractorType.EpisodeImages) { return("%(extractor)s/%(episode)s/%(file)s.%(ext)s"); } else { return("%(extractor)s/%(title)s/%(episode)s/%(file)s.%(ext)s"); } }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); // // Extract Webtoon // if (option.Type == ExtractorType.EpisodeImages) { return(extract_episode_page(html), null); } else if (option.Type == ExtractorType.ComicIndex) { var match = ValidUrl.Match(url).Groups; var max_no = Regex.Match(html, @"/webtoon/detail\.nhn\?titleId=\d+&no=(\d+)").Groups[1].Value.ToInt(); var urls = new List <string>(); for (int i = 1; i <= max_no; i++) { urls.Add($"https://comic.naver.com/webtoon/detail.nhn?titleId={match["id"]}&no={i}"); } var htmls = NetTools.DownloadStrings(urls); var result = new List <NetTask>(); foreach (var shtml in htmls) { result.AddRange(extract_episode_page(shtml)); } return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); } return(null, null); }
public override void CLParse(ref IExtractorOption model, string[] args) { model = CommandLineParser.Parse(model as InstagramExtractorOption, args); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = new DCInsideExtractorOption { Type = ExtractorType.Images } } ; if ((option as DCInsideExtractorOption).OnlyRecommend) { url += "&exception_mode=recommend"; } var match = ValidUrl.Match(url).Groups; var result = new List <NetTask>(); var html = NetTools.DownloadString(url); if (html == null) { return(result, null); } if (match[1].Value == "gall") { try { // // Parse article // if (match[3].Value == "view") { var article = ParseBoardView(html, match[2].Value != ""); if (option.Type == ExtractorType.Images && option.ExtractInformation == false) { if (article.ImagesLink == null || article.ImagesLink.Count == 0) { throw new Exception("Nothing to download!"); } option.SimpleInfoCallback?.Invoke($"{article.Title}"); for (int i = 0; i < article.ImagesLink.Count; i++) { var task = NetTask.MakeDefault(article.ImagesLink[i]); task.Filename = article.FilesName[i]; task.SaveFile = true; task.Referer = url; task.Format = new ExtractorFileNameFormat { Id = article.Id, Gallery = article.GalleryName, Title = article.Title, FilenameWithoutExtension = (i + 1).ToString("000"), Extension = Path.GetExtension(article.FilesName[i]).Replace(".", ""), }; result.Add(task); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, null /*article*/); } else if (option.Type == ExtractorType.ArticleInformation || option.ExtractInformation == true) { return(null, null /*article*/); } else if (option.Type == ExtractorType.Comments) { var cc = new List <DCComment>(); var comments = GetComments(article, "1"); cc.Add(comments); // // To avoid server blocks // Thread.Sleep(2000); int tcount = comments.total_cnt; int count = 100; for (int i = 2; count < tcount; count += 100) { comments = GetComments(article, i.ToString()); if (comments.comment_cnt == 0) { break; } count += comments.comment_cnt; cc.Add(comments); Thread.Sleep(2000); } return(null, null /*GetComments(article, "0")*/); } else { throw new Exception("You cannot do that with this URL. " + url); } } // // Parse Articles List // else if (match[3].Value == "lists") { DCGallery gallery; if (match[2].Value == "") { gallery = ParseGallery(html); } else { gallery = ParseMinorGallery(html); } if (option.Type == ExtractorType.GalleryInformation || option.ExtractInformation == true) { return(null, null /*gallery*/); } else { throw new Exception("You cannot do that with this URL." + url); } } } catch (Exception e) { Log.Logs.Instance.PushError("[DCInsideExtractor] Extract error - " + option.Type.ToString() + " - " + e.Message + "\r\n" + e.StackTrace); } } else { // Not support mobile page. throw new ExtractorException("[DCInside Extractor] Not support mobile page yet."); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.Community }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var match = ValidUrl.Match(url).Groups; var limit = int.MaxValue; if ((option as TwitterExtractorOption).LimitPosts != null) { limit = (option as TwitterExtractorOption).LimitPosts[0].ToInt(); } if (match["id"].Value == "hashtag") { #if DEBUG && false var html = NetTools.DownloadString(url); var search = HttpUtility.UrlDecode(match["search"].Value); var position = Regex.Match(html, @"data-max-position""(.*?)""").Groups[1].Value; var document = new HtmlDocument(); document.LoadHtml(html); var node = document.DocumentNode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[2]/ol[1]"); var tweets = node.SelectNodes("./li[@data-item-type='tweet']"); var urls = new List <string>(); foreach (var tweet in tweets) { urls.AddRange(parse_tweet_hashtag(option as TwitterExtractorOption, tweet)); } while (true) { try { var next = seach_query(option as TwitterExtractorOption, search, position); position = JToken.Parse(next)["min_position"].ToString(); var html2 = JToken.Parse(next)["items_html"].ToString(); var document2 = new HtmlDocument(); document2.LoadHtml(html2); var tweets2 = node.SelectNodes("./li[@data-item-type='tweet']"); foreach (var tweet in tweets2) { urls.AddRange(parse_tweet_hashtag(option as TwitterExtractorOption, tweet)); } } catch { break; } } var result = new List <NetTask>(); foreach (var surl in urls) { var task = NetTask.MakeDefault(surl); task.SaveFile = true; var fn = surl.Split('/').Last(); task.Filename = fn; task.Format = new ExtractorFileNameFormat { FilenameWithoutExtension = Path.GetFileNameWithoutExtension(fn), Extension = Path.GetExtension(fn).Replace(".", ""), User = search }; result.Add(task); } return(new Tuple <List <NetTask>, object>(result, null)); #endif throw new ExtractorException("'hashtag' is not support yet!"); } else { var name = match["id"].Value; var html = NetTools.DownloadString($"https://twitter.com/{name}/media"); var min_position = Regex.Match(html, @"data-min-position=""(.*?)""").Groups[1].Value; var node = html.ToHtmlNode(); var tweets = node.SelectNodes("./html[1]/body[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[2]/div[2]/div[1]/div[2]/ol[1]/li[@data-item-type='tweet']"); var urls = new List <string>(); var user = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/h1[1]/a[1]").InnerText; var videos = new List <(string, List <string>)>(); var post_count = tweets.Count; var last_url_count = 0; option.SimpleInfoCallback?.Invoke($"{user} ({name})"); foreach (var tweet in tweets) { urls.AddRange(parse_tweet_hashtag(option as TwitterExtractorOption, tweet, videos)); } while (post_count < limit) { var next = profile_query(option as TwitterExtractorOption, name, min_position); var html2 = JToken.Parse(next)["items_html"].ToString(); var tweets2 = html2.ToHtmlNode().SelectNodes("./li[@data-item-type='tweet']"); if (tweets2 == null) { break; } foreach (var tweet in tweets2) { urls.AddRange(parse_tweet_hashtag(option as TwitterExtractorOption, tweet, videos)); } option.PostStatus?.Invoke(urls.Count - last_url_count); last_url_count = urls.Count; post_count += tweets2.Count; min_position = JToken.Parse(next)["min_position"].ToString(); if (!(bool)JToken.Parse(next)["has_more_items"]) { break; } Thread.Sleep(3000); } var result = new List <NetTask>(); foreach (var surl in urls) { var task = NetTask.MakeDefault(surl); task.SaveFile = true; var fn = surl.Split('/').Last(); task.Filename = fn; task.Format = new ExtractorFileNameFormat { FilenameWithoutExtension = Path.GetFileNameWithoutExtension(fn), Extension = Path.GetExtension(fn).Replace(".", ""), Account = name, User = user, }; result.Add(task); } foreach (var video in videos) { var count = 0; foreach (var ts in video.Item2) { var task = NetTask.MakeDefault(ts); task.SaveFile = true; var fn = ts.Split('/').Last(); task.Filename = fn; task.Format = new ExtractorFileNameFormat { FilenameWithoutExtension = video.Item1 + "/" + count++.ToString("000"), Extension = Path.GetExtension(fn).Replace(".", ""), Account = name, User = user, }; result.Add(task); } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.UserArtist }); } }
public override string RecommendFormat(IExtractorOption option) { return("%(extractor)s/%(gallery)s/%(title)s/%(file)s.%(ext)s"); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); var node = html.ToHtmlNode(); var result = new List <NetTask>(); var id = match["id"].Value; if (match["type"].Value == "list") { var title = node.SelectSingleNode("/html[1]/head[1]/title[1]").InnerText.Replace("아뜨랑스 - ", "").Trim(); var sub_urls = get_first_urls(html); var page = 1; var prev_count = 0; option.SimpleInfoCallback?.Invoke(title); do { prev_count = sub_urls.Count; var task = NetTask.MakeDefault("https://attrangs.co.kr/shop/list_ajax.php"); task.Query = make_next_list_dict(id, page++); sub_urls.UnionWith(get_first_urls(NetTools.DownloadString(task))); } while (prev_count != sub_urls.Count); option.ProgressMax?.Invoke(sub_urls.Count); var sub_htmls = new List <string>(); var rand = new Random(); foreach (var surl in sub_urls) { sub_htmls.Add(NetTools.DownloadString(surl)); option.PostStatus?.Invoke(1); // Kuipernet Handling Thread.Sleep(rand.Next(3, 7) * 100); } foreach (var shtml in sub_htmls) { var view = new AttrangsViewParser(shtml); foreach (var img in view.Images) { var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = img.Split('/').Last(); task.Format = new ExtractorFileNameFormat { Gallery = title, Title = view.Title, FilenameWithoutExtension = Path.GetFileNameWithoutExtension(task.Filename), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); } } option.ThumbnailCallback?.Invoke(result[0]); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.Search }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = new GelbooruExtractorOption { Type = ExtractorType.Images } } ; var tags = match[1].Value; var result = new List <NetTask>(); var page = 0; if ((option as GelbooruExtractorOption).StartPage != null) { page = (option as GelbooruExtractorOption).StartPage[0].ToInt(); } var end_page = int.MaxValue; if ((option as GelbooruExtractorOption).EndPage != null) { end_page = (option as GelbooruExtractorOption).EndPage[0].ToInt(); } option.SimpleInfoCallback?.Invoke($"{HttpUtility.UrlDecode(tags)}"); var post_thumbnail = false; while (true) { var durl = "https://gelbooru.com/index.php?page=dapi&s=post&q=index&limit=100&tags=" + tags + "&pid=" + page.ToString(); option.PageReadCallback?.Invoke(durl); var data = NetTools.DownloadString(durl); var document = new HtmlDocument(); document.LoadHtml(data); var nodes = document.DocumentNode.SelectNodes("/posts[1]/post"); if (nodes == null || nodes.Count == 0) { break; } foreach (var node in nodes) { var imgurl = node.GetAttributeValue("file_url", ""); var task = NetTask.MakeDefault(imgurl); task.SaveFile = true; task.Filename = imgurl.Split('/').Last(); task.Format = new ExtractorFileNameFormat { Search = HttpUtility.UrlDecode(tags), FilenameWithoutExtension = Path.GetFileNameWithoutExtension(imgurl.Split('/').Last()), Extension = Path.GetExtension(imgurl.Split('/').Last()).Replace(".", "") }; result.Add(task); } if (!post_thumbnail) { option.ThumbnailCallback?.Invoke(result[0]); post_thumbnail = true; } option.PostStatus?.Invoke(nodes.Count); page += 1; if (page > end_page) { break; } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.Search }); } }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var match = ValidUrl.Match(url).Groups; var host = "https://" + match["host"]; var html = NetTools.DownloadString(url); var node = html.ToHtmlNode(); var title = node.SelectSingleNode("/html[1]/body[1]/div[2]/div[1]/div[1]/div[3]/div[1]/table[2]/tbody[1]/tr[2]/td[1]/table[1]/tr[1]/td[1]").InnerText; var sub_datas = node.SelectNodes("/html[1]/body[1]/div[2]/div[1]/div[1]/div[3]/div[1]/div[1]/form[1]/table[1]//tr/td[2]"); option.SimpleInfoCallback?.Invoke($"{title}"); var sub_urls = new List <string>(); var sub_titles = new List <string>(); foreach (var sub_data in sub_datas) { sub_urls.Add(host + sub_data.GetAttributeValue("data-role", "")); sub_titles.Add(sub_data.InnerText.Trim()); } var htmls = NetTools.DownloadStrings(sub_urls); var result = new List <NetTask>(); for (int i = 0; i < htmls.Count; i++) { var base64encoded = Regex.Match(htmls[i], "var toon_img = '(.*)'").Groups[1].Value; string rhtml; Strings.TryParseBase64(base64encoded, out rhtml); var snode = rhtml.ToHtmlNode(); int count = 1; foreach (var img in snode.SelectNodes("/img")) { var task = NetTask.MakeDefault(host + img.GetAttributeValue("src", "")); task.SaveFile = true; task.Filename = task.Url.Split('/').Last(); task.Format = new ExtractorFileNameFormat { Title = title, Episode = sub_titles[i], FilenameWithoutExtension = count.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); count++; } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (!PixivAPI.Auth(Settings.Instance.Model.PixivSettings.Id, Settings.Instance.Model.PixivSettings.Password)) { throw new ExtractorException("Authentication error! Check setting.json/PixivSetting."); } var match = ValidUrl.Match(url).Groups; if (option == null) { option = new PixivExtractorOption { Type = ExtractorType.Works } } ; if (match[2].Value.StartsWith("member") && option.ExtractInformation == false) { var user = PixivAPI.GetUsersAsync(match["id"].Value.ToInt()).Result; var works = PixivAPI.GetUsersWorksAsync(match["id"].Value.ToInt(), 1, 10000000).Result; option.SimpleInfoCallback?.Invoke($"{user[0].Name} ({user[0].Account})"); option.ThumbnailCallback?.Invoke(NetTask.MakeDefault(user[0].ProfileImageUrls.Px170x170)); var result = new List <NetTask>(); foreach (var work in works) { if (work.PageCount > 1) { ; } if (work.Type == null || work.Type == "illustration") { var task = NetTask.MakeDefault(work.ImageUrls.Large); task.Filename = work.ImageUrls.Large.Split('/').Last(); task.SaveFile = true; task.Referer = url; task.Format = new ExtractorFileNameFormat { Artist = user[0].Name, Account = user[0].Account, Id = user[0].Id.Value.ToString(), FilenameWithoutExtension = Path.GetFileNameWithoutExtension(work.ImageUrls.Large.Split('/').Last()), Extension = Path.GetExtension(work.ImageUrls.Large.Split('/').Last()).Replace(".", "") }; result.Add(task); } else if (work.Type == "ugoira") { var ugoira_data = PixivAPI.GetUgoiraAsync(work.Id.ToString()).Result; var task = NetTask.MakeDefault(ugoira_data.ZipUrls.Medium); task.Filename = ugoira_data.ZipUrls.Medium.Split('/').Last(); task.SaveFile = true; task.Referer = url; var pptask = new PostprocessorTask(); pptask.Postprocessor = new UgoiraPostprocessor { Frames = ugoira_data.Frames }; task.PostProcess = pptask; task.Format = new ExtractorFileNameFormat { Artist = user[0].Name, Account = user[0].Account, Id = user[0].Id.Value.ToString(), FilenameWithoutExtension = Path.GetFileNameWithoutExtension(ugoira_data.ZipUrls.Medium.Split('/').Last()), Extension = Path.GetExtension(ugoira_data.ZipUrls.Medium.Split('/').Last()).Replace(".", "") }; result.Add(task); } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.UserArtist }); } else if (option.ExtractInformation == true) { var user = PixivAPI.GetUsersAsync(match["id"].Value.ToInt()).Result; return(null, null /*user*/); } return(null, null); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var html = NetTools.DownloadString(url); var data = ParseArticleData(html); var pages = GetPagesUri(html); var image_urls = new List <string>(); if (option == null) { option = RecommendOption(url); } option.SimpleInfoCallback?.Invoke($"{data.Title}"); if (option.ExtractInformation) { return(null, null /*data*/); } // // Extract Image Url-Url // image_urls.AddRange(GetImagesUri(html)); for (int i = 1; i < pages.Length; i++) { (option as EHentaiExtractorOption).PageReadCallback?.Invoke(pages[i]); var page = NetTools.DownloadString(pages[i]); image_urls.AddRange(GetImagesUri(page)); } // // Extract Image Url // var result = new NetTask[image_urls.Count]; var count = image_urls.Count; var wait = new ManualResetEvent(false); var artist = "N/A"; var group = "N/A"; var series = "N/A"; if (data.artist != null && data.artist.Length > 0) { artist = data.artist[0]; } if (data.group != null && data.group.Length > 0) { group = data.group[0]; } if (data.parody != null && data.parody.Length > 0) { series = data.parody[0]; } if (artist == "N/A" && group != "N/A") { artist = group; } for (int i = 0; i < image_urls.Count; i++) { var task = NetTask.MakeDefault(image_urls[i]); var j = i; task.Priority = new NetPriority { Type = NetPriorityType.Trivial, TaskPriority = i }; task.DownloadString = true; task.CompleteCallbackString = (string str) => { var durl = GetImagesAddress(str); var tt = NetTask.MakeDefault(durl); tt.SaveFile = true; tt.Filename = durl.Split('/').Last(); tt.Format = new ExtractorFileNameFormat { Title = data.Title, FilenameWithoutExtension = Path.GetFileNameWithoutExtension(tt.Filename), Extension = Path.GetExtension(tt.Filename).Replace(".", ""), OriginalTitle = data.SubTitle, Artist = artist, Group = group, Series = series }; result[j] = tt; if (Interlocked.Decrement(ref count) == 0) { wait.Set(); } }; AppProvider.Scheduler.Add(task); } wait.WaitOne(); option.ThumbnailCallback?.Invoke(result[0]); var result_list = result.ToList(); result_list.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result_list, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); var match = ValidUrl.Match(url).Groups; var document = new HtmlDocument(); document.LoadHtml(html); var node = document.DocumentNode; if (option.Type == ExtractorType.EpisodeImages) { var images = get_board_images(html); var title = node.SelectSingleNode("/html[1]/head[1]/title[1]").InnerText; var result = new List <NetTask>(); int count = 1; foreach (var img in images) { var task = NetTask.MakeDefault(img[0]); task.SaveFile = true; task.Filename = count.ToString("000") + Path.GetExtension(img[0].Split('/').Last()); task.Format = new ExtractorFileNameFormat { Episode = title, FilenameWithoutExtension = count.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; task.FailUrls = img.Skip(1).ToList(); result.Add(task); count++; } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, null); } else if (option.Type == ExtractorType.Works) { var title = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]").InnerText; var sub_urls = new List <string>(); var sub_titles = new List <string>(); option.SimpleInfoCallback?.Invoke($"{title}"); option.ThumbnailCallback?.Invoke(NetTask.MakeDefault( Regex.Match(node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]").GetAttributeValue("style", ""), @"(https?://.*?)\)").Groups[1].Value)); foreach (var item in node.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div")) { sub_urls.Add(match["host"] + item.SelectSingleNode("./a[1]").GetAttributeValue("href", "")); sub_titles.Add(item.SelectSingleNode("./a[1]/div[1]").MyText()); } option.ProgressMax?.Invoke(sub_urls.Count); var htmls = NetTools.DownloadStrings(sub_urls, "PHPSESSID=" + Externals.ManamoaPHPSESSID, () => { option.PostStatus?.Invoke(1); }); var result = new List <NetTask>(); for (int i = 0; i < sub_urls.Count; i++) { try { var images = get_board_images(htmls[i]); int count = 1; foreach (var img in images) { var task = NetTask.MakeDefault(img[0]); task.SaveFile = true; task.Filename = count.ToString("000") + Path.GetExtension(img[0].Split('/').Last()); task.Format = new ExtractorFileNameFormat { Title = title, Episode = sub_titles[i], FilenameWithoutExtension = count.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", ""), }; task.FailUrls = img.Skip(1).ToList(); result.Add(task); count++; } } catch (Exception e) { ; } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); } return(null, null); }
public override string RecommendFormat(IExtractorOption option) { return("%(extractor)s/%(user)s (%(account)s)/%(file)s.%(ext)s"); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var html = NetTools.DownloadString(NetTask.MakeDefault(url, cookie: cookies[0])); var data = EHentaiExtractor.ParseArticleData(html, @"https://exhentai.org/.*?(?=\))"); var pages = EHentaiExtractor.GetPagesUri(html); var image_urls = new List <string>(); option.SimpleInfoCallback?.Invoke($"{data.Title}"); if (option == null) { option = RecommendOption(url); } if (option.ExtractInformation) { return(null, null /*data*/); } // // Extract Image Url-Url // image_urls.AddRange(EHentaiExtractor.GetImagesUri(html)); for (int i = 1; i < pages.Length; i++) { (option as EHentaiExtractorOption).PageReadCallback?.Invoke(pages[i]); var page = NetTools.DownloadString(NetTask.MakeDefault(pages[i], cookie: cookies[0])); image_urls.AddRange(EHentaiExtractor.GetImagesUri(page)); } // // Extract Image Url // var result = new NetTask[image_urls.Count]; var artist = "N/A"; var group = "N/A"; var series = "N/A"; if (data.artist != null && data.artist.Length > 0) { artist = data.artist[0]; } if (data.group != null && data.group.Length > 0) { group = data.group[0]; } if (data.parody != null && data.parody.Length > 0) { series = data.parody[0]; } if (artist == "N/A" && group != "N/A") { artist = group; } for (int i = 0; i < image_urls.Count; i++) { var html2 = NetTools.DownloadString(NetTask.MakeDefault(image_urls[i], cookies[0])); var durl = EHentaiExtractor.GetImagesAddress(html2); var task = NetTask.MakeDefault(durl, cookies[0]); task.SaveFile = true; task.Filename = durl.Split('/').Last(); task.Format = new ExtractorFileNameFormat { Title = data.Title, FilenameWithoutExtension = Path.GetFileNameWithoutExtension(task.Filename), Extension = Path.GetExtension(task.Filename).Replace(".", ""), OriginalTitle = data.SubTitle, Artist = artist, Group = group, Series = series }; result[i] = task; if (i == 0) { option.ThumbnailCallback?.Invoke(task); } } var result_list = result.ToList(); result_list.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result_list, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var limit = int.MaxValue; if ((option as InstagramExtractorOption).LimitPosts != null) { limit = (option as InstagramExtractorOption).LimitPosts[0].ToInt(); } var html = NetTools.DownloadString(url); var user = InstaApi.get_user(option as InstagramExtractorOption, html); var urls = new List <string>(); urls.AddRange(user.FirstPost.DisplayUrls); option.PostStatus?.Invoke(user.FirstPost.PostCount); option.SimpleInfoCallback?.Invoke($"{user.FullName} ({user.UserName})"); var count = 0; var pp = user.FirstPost; while (pp.HasNext) { if (count >= limit) { break; } var posts = InstaApi.query_next(option as InstagramExtractorOption, InstaApi.posts_query_hash(), user.UserId, "50", pp.EndCursor); urls.AddRange(posts.DisplayUrls); option.PostStatus?.Invoke(posts.PostCount); count += 50; pp = posts; } var result = new List <NetTask>(); foreach (var surl in urls) { var task = NetTask.MakeDefault(surl); task.SaveFile = true; var fn = surl.Split('?')[0].Split('/').Last(); task.Filename = fn; task.Format = new ExtractorFileNameFormat { FilenameWithoutExtension = Path.GetFileNameWithoutExtension(fn), Extension = Path.GetExtension(fn).Replace(".", ""), User = user.FullName, Account = user.UserName }; result.Add(task); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.UserArtist }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = new DanbooruExtractorOption { Type = ExtractorType.Images } } ; var tags = match["search"].Value; var result = new List <NetTask>(); var page = 1; option.SimpleInfoCallback?.Invoke($"{tags}"); if ((option as DanbooruExtractorOption).StartPage != null) { page = (option as DanbooruExtractorOption).StartPage[0].ToInt(); } var end_page = int.MaxValue; if ((option as DanbooruExtractorOption).EndPage != null) { end_page = (option as DanbooruExtractorOption).EndPage[0].ToInt(); } var already_thumbnail = false; while (true) { var durl = $"https://danbooru.donmai.us/posts?tags={tags}&page=" + page.ToString(); option.PageReadCallback?.Invoke(durl); var html = NetTools.DownloadString(durl); var node = html.ToHtmlNode().SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[1]/section[1]/div[3]/div[1]/article"); if (node == null) { break; } var ds = new List <string>(); foreach (var sub in node) { ds.Add("https://danbooru.donmai.us" + sub.SelectSingleNode("./a").GetAttributeValue("href", "")); } var htmls = NetTools.DownloadStrings(ds); //foreach (var shtml in htmls) for (int i = 0; i < htmls.Count; i++) { var snode = htmls[i].ToHtmlNode(); var img_url = ""; // Just one banner if (snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/div[1]/span[1]/a[1]")?.GetAttributeValue("id", "") == "image-resize-link") { img_url = snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/div[1]/span[1]/a[1]").GetAttributeValue("href", ""); } // Two banner else if (snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/div[2]/span[1]/a[1]")?.GetAttributeValue("id", "") == "image-resize-link") { img_url = snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/div[2]/span[1]/a[1]").GetAttributeValue("href", ""); } // Three or none banner else if (snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/section[1]/img[1]") != null) { img_url = snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/section[1]/img[1]").GetAttributeValue("src", ""); } // Video URL else if (snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/section[1]/p[1]/a[1]") != null) { if ((option as DanbooruExtractorOption).ExcludeVideo) { continue; } img_url = snode.SelectSingleNode("/html[1]/body[1]/div[1]/div[2]/div[1]/section[1]/section[1]/p[1]/a[1]").GetAttributeValue("href", ""); } else { // ? Log.Logs.Instance.PushError("[DanbooruExtractor] Cannot find html format! " + ds[i]); } var task = NetTask.MakeDefault(img_url); task.SaveFile = true; task.Filename = img_url.Split('/').Last(); task.Format = new ExtractorFileNameFormat { Search = tags, FilenameWithoutExtension = Path.GetFileNameWithoutExtension(task.Filename), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); } if (!already_thumbnail) { option.ThumbnailCallback?.Invoke(result[0]); already_thumbnail = true; } page += 1; if (page > end_page) { break; } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.UserArtist }); } }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); var match = ValidUrl.Match(url).Groups; var node = html.ToHtmlNode(); var title = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/h1[1]").InnerText.Trim(); var genre = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[2]/div[2]/h3[1]/a[1]").InnerText.Trim(); var artist = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[2]/div[3]/h3[1]/a[1]").InnerText.Trim(); var sub_urls = new List <string>(); var sub_titles = new List <string>(); foreach (var episode in node.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div")) { var tag_a = episode.SelectSingleNode("./div[2]/h2[1]/a[1]"); sub_urls.Add(tag_a.GetAttributeValue("href", "")); sub_titles.Add(tag_a.InnerText.Trim()); } option.SimpleInfoCallback?.Invoke(title); option.ThumbnailCallback?.Invoke(NetTask.MakeDefault( match["host"].Value + node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[1]/a[1]/img[1]").GetAttributeValue("src", ""))); option.ProgressMax?.Invoke(sub_urls.Count); var sub_htmls = NetTools.DownloadStrings(sub_urls, "", () => { option.PostStatus?.Invoke(1); }); var result = new List <NetTask>(); for (int i = 0; i < sub_urls.Count; i++) { var snode = sub_htmls[i].ToHtmlNode(); int count = 1; foreach (var img in snode.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[2]/ul[1]//li/div[1]/img[1]")) { var img_src = img.GetAttributeValue("data-src", ""); if (string.IsNullOrWhiteSpace(img_src)) { img_src = img.GetAttributeValue("src", ""); } var task = NetTask.MakeDefault(HttpUtility.HtmlDecode(img_src)); task.SaveFile = true; task.Filename = count.ToString("000") + ".jpg"; task.Format = new ExtractorFileNameFormat { Title = title, Episode = sub_titles[i], FilenameWithoutExtension = count.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", ""), }; result.Add(task); count++; } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); }
public override string RecommendFormat(IExtractorOption option) { return("%(extractor)s/%(artist)s/[%(id)s] %(title)s/%(file)s.%(ext)s"); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } var mtask = NetTask.MakeDefault(url); mtask.Encoding = Encoding.GetEncoding(51949); var html = NetTools.DownloadString(mtask); var node = html.ToHtmlNode(); var result = new List <NetTask>(); var xcode = match["xcode"].Value; if (match["menu"].Value == "shopbrand" || match["menu"].Value == "bestseller") { var filtering_filename = new string[] { "HN_Copyright2.jpg", "next_product.gif", "prev_product.gif", "btn_h8_spin_dw.gif", "btn_h8_spin_up.gif", "Review.jpg", "shoppingguide2.jpg", "sizetip-2.jpg" }; var gallery = node.SelectSingleNode("/html[1]/head[1]/title[1]").InnerText.Trim(); option.SimpleInfoCallback?.Invoke(gallery); var last_page_node = node.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/ol[1]/li[@class='last']/a"); var last_page = 1; if (last_page_node != null) { last_page = node.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/ol[1]/li[@class='last']/a").GetAttributeValue("href", "").Split('=').Last().ToInt(); } var page_urls = Enumerable.Range(1, last_page).Select(page => $"{url}&page={page}").ToList(); var htmls = NetTools.DownloadStrings(page_urls); var sub_urls = new List <string>(); foreach (var shtml in htmls) { var snode = shtml.ToHtmlNode(); sub_urls.AddRange(snode.SelectNodes("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/table[1]/tbody[1]//a").Select(x => "http://www.hn-hn.co.kr" + x.GetAttributeValue("href", ""))); } option.ProgressMax?.Invoke(sub_urls.Count); var sub_htmls = new List <string>(); foreach (var surl in sub_urls) { var task = NetTask.MakeDefault(surl); task.Encoding = Encoding.GetEncoding(51949); sub_htmls.Add(NetTools.DownloadString(task)); option.PostStatus?.Invoke(1); } foreach (var shtml in sub_htmls) { var snode = shtml.ToHtmlNode(); var title = snode.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/form[1]/div[1]/div[1]/h3[1]").InnerText.Trim(); var thumbnail = "http://www.hn-hn.co.kr" + snode.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[3]/div[1]/a[1]/img[1]").GetAttributeValue("src", "").Split('?')[0]; var imgs = snode.SelectNodes("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]//img").Select(img => { if (img.GetAttributeValue("src", "").StartsWith("http")) { return(img.GetAttributeValue("src", "")); } else { return("http://www.hn-hn.co.kr" + img.GetAttributeValue("src", "").Split('?')[0]); } }).ToList(); foreach (var img in imgs) { var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = img.Split('/').Last(); if (filtering_filename.Contains(task.Filename)) { continue; } task.Format = new ExtractorFileNameFormat { Gallery = gallery, Title = title, FilenameWithoutExtension = Path.GetFileNameWithoutExtension(task.Filename), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); } } option.ThumbnailCallback?.Invoke(result[0]); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.Search }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } if (option.Type == ExtractorType.Images) { var sinfo = new ExtractedInfo.WorksComic(); var imgs_url = $"https://ltn.hitomi.la/galleries/{match["id"].Value}.js"; option.PageReadCallback?.Invoke($"https://ltn.hitomi.la/galleryblock/{match["id"]}.html"); option.PageReadCallback?.Invoke(url); option.PageReadCallback?.Invoke(imgs_url); var urls = new List <string> { $"https://ltn.hitomi.la/galleryblock/{match["id"]}.html", imgs_url }; var strings = NetTools.DownloadStrings(urls); if (string.IsNullOrEmpty(strings[0]) || string.IsNullOrEmpty(strings[1])) { return(null, null); } var data1 = ParseGalleryBlock(strings[0]); var imgs = strings[1]; var string2 = NetTools.DownloadString($"https://hitomi.la{data1.Magic}"); if (string.IsNullOrEmpty(string2)) { return(null, null); } var data2 = ParseGallery(string2); option.SimpleInfoCallback?.Invoke($"[{match["id"].Value}] {data1.Title}"); // download.js var number_of_frontends = 3; var subdomain = Convert.ToChar(97 + (Convert.ToInt32(match["id"].Value.Last()) % number_of_frontends)); if (match["id"].Value.Last() == '0') { subdomain = 'a'; } var arr = JArray.Parse(imgs.Substring(imgs.IndexOf('['))); var img_urls = new List <string>(); foreach (var obj in arr) { var hash = obj.Value <string>("hash"); if (obj.Value <int>("haswebp") == 0 || hash == null) { img_urls.Add($"https://{subdomain}a.hitomi.la/galleries/{match["id"].Value}/{obj.Value<string>("name")}"); } else if (hash == "") { img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{obj.Value<string>("name")}.webp"); } else if (hash.Length < 3) { img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{hash}.webp"); } else { var postfix = hash.Substring(hash.Length - 3); img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{postfix[2]}/{postfix[0]}{postfix[1]}/{hash}.webp"); } } var result = new List <NetTask>(); var ordering = 1; foreach (var img in img_urls) { var filename = Path.GetFileNameWithoutExtension(img.Split('/').Last()); if (!(option as HitomiExtractorOption).RealFilename) { filename = ordering++.ToString("000"); } var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = img.Split('/').Last(); task.Format = new ExtractorFileNameFormat { Title = data1.Title, Id = match["id"].Value, Language = data1.Language, UploadDate = data1.Posted, FilenameWithoutExtension = filename, Extension = Path.GetExtension(img.Split('/').Last()).Replace(".", "") }; if (data1.artist != null) { task.Format.Artist = data1.artist[0]; } else { task.Format.Artist = "NA"; } if (data1.parody != null) { task.Format.Series = data1.parody[0]; } else { task.Format.Series = "NA"; } if (data2.group != null) { task.Format.Group = data2.group[0]; } else { task.Format.Group = "NA"; } if (data2.character != null) { task.Format.Character = data2.character[0]; } else { task.Format.Character = "NA"; } if (task.Format.Artist == "NA" && task.Format.Group != "NA") { task.Format.Artist = task.Format.Group; } result.Add(task); } option.ThumbnailCallback?.Invoke(result[0]); sinfo.Thumbnail = result[0]; sinfo.URL = url; sinfo.Title = data1.Title; sinfo.Author = data1.artist?.ToArray(); sinfo.AuthorGroup = data2.group?.ToArray(); sinfo.ShortInfo = $"[{match["id"].Value}] {data1.Title}"; sinfo.Tags = data1.Tags?.ToArray(); sinfo.Characters = data2.character?.ToArray(); sinfo.Language = data1.Language; sinfo.Parodies = data1.parody?.ToArray(); result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Info = sinfo, Type = ExtractedInfo.ExtractedType.WorksComic }); } return(null, null); }