public void DeleteTest() { // arrange ValidUrl temp1 = new ValidUrl { Id = Guid.NewGuid(), SiteId = 0, Action = "/Template/Compose", Active = true, FriendlyUrl = "/flights/v5", Index = true, StatusCode = 200, View = "/home/flights.cshtml", LastModified = DateTime.Now }; _mongodbRepo.Save(temp1); // act _mongodbRepo.Delete(temp1); // assert var actual = _mongodbRepo.GetByFriendlyUrl(temp1.SiteId, temp1.FriendlyUrl); Assert.IsNull(actual); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); var result = new List <NetTask>(); foreach (var img in JObject.Parse((new Regex("item: ({.*})")).Match(html).Groups[1].Value)["album_images"]["images"]) { var hash = img["hash"].ToString(); var ext = img["ext"].ToString(); var task = NetTask.MakeDefault($"https://i.imgur.com/{hash}{ext}"); task.SaveFile = true; task.Filename = $"{hash}{ext}"; task.Format = new ExtractorFileNameFormat { Id = hash, Extension = ext, FilenameWithoutExtension = hash, Url = url }; result.Add(task); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.Search }); }
public ActionResult UpdateUrl(ValidUrl url_) { string result = string.Empty; try { url_.LastModified = DateTime.Now; url_.LastModifiedBy = this.CMSUser.UserName; url_.SitemapPriority = float.Parse(url_.SitemapPriority.ToString("N1")); url_.Action = ECMSSettings.Current.DefaultURLRewriteAction; url_.SiteId = (short)this.GetSiteIdFromContext(); if (url_.Id == Guid.Empty) { url_.Id = Guid.NewGuid(); DependencyManager.URLRepository.Save(url_); } else { DependencyManager.URLRepository.Update(url_); } result = "Url Updated Successfully."; } catch (Exception ex) { Response.Clear(); Response.ClearHeaders(); Response.ClearContent(); Response.StatusCode = 500; Response.StatusDescription = "Failed : " + ex.Message; } return(Json(result)); }
public void UpdateTest() { // arrange ValidUrl temp1 = new ValidUrl { Id = Guid.NewGuid(), SiteId = 0, Action = "/Template/Compose", Active = true, FriendlyUrl = "/flights/", Index = true, StatusCode = 200, View = "/home/flights.cshtml", LastModified = DateTime.Now }; _mongodbRepo.Save(temp1); ValidUrl expected = _mongodbRepo.GetByFriendlyUrl(temp1.SiteId, temp1.FriendlyUrl); expected.FriendlyUrl = "/flights/v1"; expected.StatusCode = 200; expected.Active = false; // act _mongodbRepo.Update(expected); ValidUrl actual = _mongodbRepo.GetByFriendlyUrl(expected.SiteId, expected.FriendlyUrl); // assert Assert.AreEqual(actual.FriendlyUrl, expected.FriendlyUrl); Assert.AreEqual(actual.StatusCode, expected.StatusCode); Assert.AreEqual(actual.Active, expected.Active); }
public override IExtractorOption RecommendOption(string url) { var match = ValidUrl.Match(url).Groups; if (match[1].Value == "comic") { if (match[4].Value == "detail") { return(new NaverExtractorOption { Type = NaverExtractorOption.ExtractorType.EpisodeImages }); } else if (match[4].Value == "list") { return(new NaverExtractorOption { Type = NaverExtractorOption.ExtractorType.ComicIndex }); } } else if (match[1].Value == "blog") { return(new NaverExtractorOption { Type = NaverExtractorOption.ExtractorType.Images }); } return(new NaverExtractorOption { Type = NaverExtractorOption.ExtractorType.Images }); }
public override ContentItem GetById(ValidUrl url_, ContentViewType viewType_) { //ContentItem item = _db.GetCollection<ContentItem>(COLLNAME).AsQueryable<ContentItem>().Where(x => x.Url.Id == url_.Id && x.ContentView.ViewType == viewType_).FirstOrDefault<ContentItem>(); ContentItem item = _db.GetCollection <ContentItem>(COLLNAME).Find(Query.And(Query.EQ("Url.Id", url_.Id), Query.EQ("ViewType", Convert.ToInt32(viewType_)))).FirstOrDefault <ContentItem>(); if (item == null) { DependencyManager.Logger.Log(new LogEventInfo(LogLevel.Debug, ECMSSettings.DEFAULT_LOGGER, "Specific content not found now going to search for default content.")); item = _db.GetCollection <ContentItem>(COLLNAME).Find(Query.And(Query.EQ("ContentView.SiteId", url_.SiteId), Query.EQ("ContentView.ViewName", url_.View), Query.EQ("ContentView.ViewType", Convert.ToInt32(viewType_)))).FirstOrDefault <ContentItem>(); } //TODO : Optimize this if (item != null) { using (StringReader streamReader = new StringReader(item.Body[0].ToString())) { using (var csv = new CsvHelper.CsvReader(streamReader)) { //csv.Configuration.IgnoreQuotes = true; csv.Read(); item.Body = JObject.FromObject(csv.GetRecord(typeof(object))); } } } return(item); }
public static MvcHtmlString GetHref(string url_, string hrefTemplate_) { try { ValidUrl vu = DependencyManager.URLRepository.GetByFriendlyUrl(1, url_); if (vu != null && vu.StatusCode == 200) { return(new MvcHtmlString(string.Format(hrefTemplate_, vu.FriendlyUrl))); } else { Match match = hrefRegex.Match(hrefTemplate_); if (match != null && match.Groups != null && match.Groups.Count > 0 && match.Groups["name"] != null) { return(new MvcHtmlString(match.Groups["name"].Value)); } else { return(emptyMVCHtmlString); } } } catch (Exception ex) { LogEventInfo info = new LogEventInfo(LogLevel.Error, ECMSSettings.DEFAULT_LOGGER, ex.ToString()); DependencyManager.Logger.Log(info); return(emptyMVCHtmlString); } }
public override ContentItem GetById(ValidUrl url_, ContentViewType viewType_) { ContentItem item = new ContentItem(); item.Url = url_; JObject jsonBody = LoadPageContents(url_, viewType_, true); item.Body = jsonBody; item.Head = GetHeadContentByViewName(url_, jsonBody, viewType_); string temp2 = null; foreach (JToken token in jsonBody.Children()) { if (token is JProperty) { temp2 = (token as JProperty).Value.ToString(); if (temp2.Contains("@")) { string hashCode = temp2.GetHashCode().ToString(); if (DependencyManager.CachingService.Get <ITemplate>(hashCode) == null) { var task = Task.Factory.StartNew(() => CreateTemplateAndSetInCache(hashCode, (token as JProperty).Value.ToString())); DependencyManager.CachingService.Set <Task>("Task." + hashCode, task); } } } } return(item); }
public ActionResult UrlDataEdit(Guid id, ContentItem item_, ContentViewType vm) { ValidUrl url = DependencyManager.URLRepository.GetById(this.GetSiteIdFromContext(), id, false); item_.Url = url; DependencyManager.ContentRepository.Save(item_, vm); return(RedirectToAction("Index", "Urls")); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } if (match["type"].Value == "reader") { var id = match["id"].Value; var article_info_url = $"https://hiyobi.me/info/{id}"; option.PageReadCallback?.Invoke(article_info_url); var info_html = NetTools.DownloadString(article_info_url); var data = parse_info(info_html); var img_file_json_url = $"https://xn--9w3b15m8vo.asia/data/json/{id}_list.json"; option.PageReadCallback?.Invoke(img_file_json_url); var cookie = "__cfduid=d53c18b351d4a54007ac583a96f4436381568466715"; var img_file_json_task = NetTask.MakeDefault(img_file_json_url, cookie); var img_file_json = NetTools.DownloadString(img_file_json_task); var img_urls = JArray.Parse(img_file_json).Select(x => $"https://xn--9w3b15m8vo.asia/data/{id}/{x["name"].ToString()}").ToList(); option.SimpleInfoCallback?.Invoke($"{data.Title}"); var result = new List <NetTask>(); var count = 1; foreach (var img in img_urls) { var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = img.Split('/').Last(); task.Cookie = cookie; task.Format = new ExtractorFileNameFormat { Id = id, Title = data.Title, Artist = data.artist != null ? data.artist[0] : "N/A", Group = data.artist != null ? data.artist[0] : "N/A", FilenameWithoutExtension = count++.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); } option.ThumbnailCallback?.Invoke(result[0]); result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); } else { throw new ExtractorException("'search' page not supports yet!"); } }
public ContentItemHead GetHeadContentByViewName(ValidUrl url_, JObject jsonBody, ContentViewType viewType_) { JObject jsonHead = ContentHeadList[url_.SiteId.ToString() + "-" + Convert.ToInt32(viewType_).ToString() + "-" + url_.View.Trim(new char[] { '/' })]; jsonHead.MergeInto(jsonBody); ContentItemHead itemhead = new ContentItemHead(); itemhead.LoadFromJObject(jsonHead); return(itemhead); }
private string ConstructPath(ValidUrl url_, ContentViewType viewType_, bool forBodyContent_) { string filePath = AppDomain.CurrentDomain.BaseDirectory + "\\app_data\\" + url_.SiteId + "\\" + Convert.ToInt32(viewType_).ToString() + (forBodyContent_ ? "\\bodycontent\\" : "\\headcontent\\") + url_.Id + ECMS_FILE_EXTENSION; //if (!File.Exists(filePath)) //{ // filePath = AppDomain.CurrentDomain.BaseDirectory + "\\app_data\\" + url_.SiteId + "\\" + Convert.ToInt32(viewType_).ToString() + (forBodyContent_ ? "\\bodycontent\\" : "\\headcontent\\") + url_.View + "-default-content" + ECMS_FILE_EXTENSION; //} return(filePath); }
//private static void LoadPageContents(DirectoryInfo dirInfo) //{ // using (StreamReader streamReader = new StreamReader(dirInfo.FullName + "\\content.etxt")) // { // using (var csv = new CsvReader(streamReader)) // { // ContentBodyList = new Dictionary<int, Dictionary<Guid, JObject>>(); // var temp = new Dictionary<Guid, JObject>(); // while (csv.Read()) // { // temp[Guid.Parse(csv.GetField("UrlId"))] = JObject.FromObject(csv.GetRecord<object>()); // } // ContentBodyList[Convert.ToInt32(dirInfo.Name)] = temp; // } // } //} private JObject LoadPageContents(ValidUrl url_, ContentViewType viewType_, bool forBodyContent_) { string filePath = ConstructPath(url_, viewType_, forBodyContent_); if (!File.Exists(filePath)) { ECMSView view = DependencyManager.ViewRepository.GetByViewName(url_.View); filePath = ConstructPath(view, forBodyContent_); } return(ReadPageContentFromDisk(filePath)); }
public void GetById_InProcCachingService_Test() { DependencyManager.CachingService = new InProcCachingService(); ValidUrlFileRepository fileRepository = new ValidUrlFileRepository(); ValidUrl url = fileRepository.GetByFriendlyUrl(1, "/flights/cheap-flights-to-new-york-city"); Assert.AreEqual("/flights/destination-city", url.View); Assert.AreEqual(true, url.Active); Assert.AreEqual(true, url.Index); Assert.AreEqual(200, url.StatusCode); }
public override ContentItem GetContentForEditing(ValidUrl url_, ContentViewType viewType_) { //ContentItem item = _db.GetCollection<ContentItem>(COLLNAME).AsQueryable<ContentItem>().Where(x => x.Url.Id == url_.Id && x.Url.View == url_.View && Convert.ToInt32(x.ContentView.ViewType) == Convert.ToInt32(viewType_)).FirstOrDefault<ContentItem>(); ContentItem item = _db.GetCollection <ContentItem>(COLLNAME).Find(Query.And(Query.EQ("Url.Id", url_.Id), Query.EQ("ContentView.ViewName", url_.View), Query.EQ("ContentView.ViewType", Convert.ToInt32(viewType_)))).FirstOrDefault <ContentItem>(); if (item != null) { item.Body = item.Body[0]; } return(item); }
public void GetById_InProcCachingService_SlashURL_Test() { DependencyManager.CachingService = new InProcCachingService(); ValidUrlFileRepository fileRepository = new ValidUrlFileRepository(); ValidUrl url = fileRepository.GetByFriendlyUrl(1, "/"); Assert.AreEqual("/index", url.View); Assert.AreEqual(true, url.Active); Assert.AreEqual(true, url.Index); Assert.AreEqual(200, url.StatusCode); }
public void SaveTest() { ValidUrl expected = new ValidUrl { Id = Guid.NewGuid(), SiteId = 0, Action = "/Template/Compose", Active = true, FriendlyUrl = "/flights/", Index = true, StatusCode = 200, View = "/home/flights.cshtml", LastModified = DateTime.Now }; _mongodbRepo.Save(expected); }
public override IExtractorOption RecommendOption(string url) { var match = ValidUrl.Match(url).Groups; if (match["type"].Value == "board") { return new NaverExtractorOption { Type = ExtractorType.EpisodeImages } } ; else { return new NaverExtractorOption { Type = ExtractorType.Works } }; }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); // // Extract Webtoon // if (option.Type == ExtractorType.EpisodeImages) { return(extract_episode_page(html), null); } else if (option.Type == ExtractorType.ComicIndex) { var match = ValidUrl.Match(url).Groups; var max_no = Regex.Match(html, @"/webtoon/detail\.nhn\?titleId=\d+&no=(\d+)").Groups[1].Value.ToInt(); var urls = new List <string>(); for (int i = 1; i <= max_no; i++) { urls.Add($"https://comic.naver.com/webtoon/detail.nhn?titleId={match["id"]}&no={i}"); } var htmls = NetTools.DownloadStrings(urls); var result = new List <NetTask>(); foreach (var shtml in htmls) { result.AddRange(extract_episode_page(shtml)); } return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); } return(null, null); }
public void GetByFriendlyUrlTest() { ValidUrl expected = new ValidUrl { Id = Guid.NewGuid(), SiteId = 0, Action = "/Template/Compose", Active = true, FriendlyUrl = "/flights/", Index = true, StatusCode = 200, View = "/home/flights.cshtml", LastModified = DateTime.Now }; _mongodbRepo.Save(expected); ValidUrl actual = _mongodbRepo.GetByFriendlyUrl(expected.SiteId, expected.FriendlyUrl); Assert.AreEqual(actual.Id, expected.Id); }
public override ContentItem GetContentForEditing(ValidUrl url_, ContentViewType viewType_) { string bodyContentFilePath = ConstructPath(url_, viewType_, true); string headContentFilePath = ConstructPath(url_, viewType_, false); ContentItem contentItem = new ContentItem(); using (StreamReader streamReader = new StreamReader(headContentFilePath)) { using (var csv = new CsvReader(streamReader)) { while (csv.Read()) { contentItem.Head = csv.GetRecord <ContentItemHead>(); } } } if (File.Exists(bodyContentFilePath)) { contentItem.Body = (dynamic)File.ReadAllText(bodyContentFilePath); } return(contentItem); }
public override IExtractorOption RecommendOption(string url) { var match = ValidUrl.Match(url).Groups; if (match["type"].Value == "reader") { return new HiyobiExtractorOption { Type = ExtractorType.Images } } ; else if (match["type"].Value == "search") { return new HiyobiExtractorOption { Type = ExtractorType.ComicIndex } } ; return(new HiyobiExtractorOption { Type = ExtractorType.Images }); }
public ActionResult UrlDataEdit(Guid id, ContentViewType vm) { ValidUrl url = DependencyManager.URLRepository.GetById(this.GetSiteIdFromContext(), id, false); ViewBag.ViewName = url.FriendlyUrl; ViewBag.ViewType = vm; ContentItem item = null; try { item = DependencyManager.ContentRepository.GetContentForEditing(url, vm); } catch (FileNotFoundException) { } if (item != null) { return(View(GetControllerView("DefaultDataEdit"), item)); } else { return(View(GetControllerView("DefaultDataEdit"))); } }
public static string Eval(string expression) { try { string hashCode = expression.GetHashCode().ToString(); var task = DependencyManager.CachingService.Get <Task>("Task." + hashCode); if (task != null && !task.IsCompleted) { task.Wait(); } TemplateService service = new TemplateService(); return(service.Run(DependencyManager.CachingService.Get <ITemplate>(hashCode), null)); } catch (Exception ex) { ValidUrl validurl = Utility.GetValidUrlFromContext(new HttpContextWrapper(HttpContext.Current)); string url = validurl != null ? validurl.FriendlyUrl + "::" + validurl.Id.ToString() + "::" : string.Empty; LogEventInfo info = new LogEventInfo(LogLevel.Error, ECMSSettings.DEFAULT_LOGGER, url + ex.ToString()); DependencyManager.Logger.Log(info); return(string.Empty); } }
public override IExtractorOption RecommendOption(string url) { var match = ValidUrl.Match(url).Groups; if (match[1].Value == "gall") { if (match[3].Value == "view") { return(new DCInsideExtractorOption { Type = ExtractorType.Images }); } else if (match[3].Value == "lists") { return(new DCInsideExtractorOption { Type = ExtractorType.ArticleInformation, ExtractInformation = true }); } } return(new DCInsideExtractorOption { Type = ExtractorType.Images }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); var match = ValidUrl.Match(url).Groups; var document = new HtmlDocument(); document.LoadHtml(html); var node = document.DocumentNode; if (option.Type == ExtractorType.EpisodeImages) { var images = get_board_images(html); var title = node.SelectSingleNode("/html[1]/head[1]/title[1]").InnerText; var result = new List <NetTask>(); int count = 1; foreach (var img in images) { var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = count.ToString("000") + Path.GetExtension(img.Split('/').Last()); task.Format = new ExtractorFileNameFormat { Episode = title, FilenameWithoutExtension = count.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); count++; } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, null); } else if (option.Type == ExtractorType.Works) { var title = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]").InnerText; var sub_urls = new List <string>(); var sub_titles = new List <string>(); option.SimpleInfoCallback?.Invoke($"{title}"); option.ThumbnailCallback?.Invoke(NetTask.MakeDefault( Regex.Match(node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]").GetAttributeValue("style", ""), @"(https?://.*?)\)").Groups[1].Value)); foreach (var item in node.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div")) { sub_urls.Add(match["host"] + item.SelectSingleNode("./a[1]").GetAttributeValue("href", "")); sub_titles.Add(item.SelectSingleNode("./a[1]/div[1]").MyText()); } option.ProgressMax?.Invoke(sub_urls.Count); var htmls = NetTools.DownloadStrings(sub_urls, "PHPSESSID=" + Externals.ManamoaPHPSESSID, () => { option.PostStatus?.Invoke(1); }); var result = new List <NetTask>(); for (int i = 0; i < sub_urls.Count; i++) { try { var images = get_board_images(htmls[i]); int count = 1; foreach (var img in images) { var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = count.ToString("000") + Path.GetExtension(img.Split('/').Last()); task.Format = new ExtractorFileNameFormat { Title = title, Episode = sub_titles[i], FilenameWithoutExtension = count.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); count++; } } catch (Exception e) { ; } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); } return(null, null); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } if (option.Type == ExtractorType.Images) { var sinfo = new ExtractedInfo.WorksComic(); var imgs_url = $"https://ltn.hitomi.la/galleries/{match["id"].Value}.js"; option.PageReadCallback?.Invoke($"https://ltn.hitomi.la/galleryblock/{match["id"]}.html"); option.PageReadCallback?.Invoke(url); option.PageReadCallback?.Invoke(imgs_url); var urls = new List <string> { $"https://ltn.hitomi.la/galleryblock/{match["id"]}.html", imgs_url }; var strings = NetTools.DownloadStrings(urls); if (string.IsNullOrEmpty(strings[0]) || string.IsNullOrEmpty(strings[1])) { return(null, null); } var data1 = ParseGalleryBlock(strings[0]); var imgs = strings[1]; var string2 = NetTools.DownloadString($"https://hitomi.la{data1.Magic}"); if (string.IsNullOrEmpty(string2)) { return(null, null); } var data2 = ParseGallery(string2); option.SimpleInfoCallback?.Invoke($"[{match["id"].Value}] {data1.Title}"); // download.js var number_of_frontends = 3; var subdomain = Convert.ToChar(97 + (Convert.ToInt32(match["id"].Value.Last()) % number_of_frontends)); if (match["id"].Value.Last() == '0') { subdomain = 'a'; } var vv = JToken.Parse(imgs.Substring(imgs.IndexOf('=') + 1))["files"]; var arr = (JArray)vv; //var arr = JArray.Parse(imgs.Substring(imgs.IndexOf('['))); var img_urls = new List <string>(); foreach (var obj in arr) { var hash = obj.Value <string>("hash"); if (obj.Value <int>("haswebp") == 0 || hash == null) { img_urls.Add($"https://{subdomain}a.hitomi.la/galleries/{match["id"].Value}/{obj.Value<string>("name")}"); } else if (hash == "") { img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{obj.Value<string>("name")}.webp"); } else if (hash.Length < 3) { img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{hash}.webp"); } else { var postfix = hash.Substring(hash.Length - 3); img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{postfix[2]}/{postfix[0]}{postfix[1]}/{hash}.webp"); } } var result = new List <NetTask>(); var ordering = 1; foreach (var img in img_urls) { var filename = Path.GetFileNameWithoutExtension(img.Split('/').Last()); if (!(option as HitomiExtractorOption).RealFilename) { filename = ordering++.ToString("000"); } var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = img.Split('/').Last(); task.Format = new ExtractorFileNameFormat { Title = data1.Title, Id = match["id"].Value, Language = data1.Language, UploadDate = data1.Posted, FilenameWithoutExtension = filename, Extension = Path.GetExtension(img.Split('/').Last()).Replace(".", "") }; if (data1.artist != null) { task.Format.Artist = data1.artist[0]; } else { task.Format.Artist = "NA"; } if (data1.parody != null) { task.Format.Series = data1.parody[0]; } else { task.Format.Series = "NA"; } if (data2.group != null) { task.Format.Group = data2.group[0]; } else { task.Format.Group = "NA"; } if (data2.character != null) { task.Format.Character = data2.character[0]; } else { task.Format.Character = "NA"; } if (task.Format.Artist == "NA" && task.Format.Group != "NA") { task.Format.Artist = task.Format.Group; } result.Add(task); } option.ThumbnailCallback?.Invoke(result[0]); sinfo.Thumbnail = result[0]; sinfo.URL = url; sinfo.Title = data1.Title; sinfo.Author = data1.artist?.ToArray(); sinfo.AuthorGroup = data2.group?.ToArray(); sinfo.ShortInfo = $"[{match["id"].Value}] {data1.Title}"; sinfo.Tags = data1.Tags?.ToArray(); sinfo.Characters = data2.character?.ToArray(); sinfo.Language = data1.Language; sinfo.Parodies = data1.parody?.ToArray(); result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Info = sinfo, Type = ExtractedInfo.ExtractedType.WorksComic }); } return(null, null); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { var match = ValidUrl.Match(url).Groups; if (option == null) { option = RecommendOption(url); } var mtask = NetTask.MakeDefault(url); mtask.Encoding = Encoding.GetEncoding(51949); var html = NetTools.DownloadString(mtask); var node = html.ToHtmlNode(); var result = new List <NetTask>(); var xcode = match["xcode"].Value; if (match["menu"].Value == "shopbrand" || match["menu"].Value == "bestseller") { var filtering_filename = new string[] { "HN_Copyright2.jpg", "next_product.gif", "prev_product.gif", "btn_h8_spin_dw.gif", "btn_h8_spin_up.gif", "Review.jpg", "shoppingguide2.jpg", "sizetip-2.jpg" }; var gallery = node.SelectSingleNode("/html[1]/head[1]/title[1]").InnerText.Trim(); option.SimpleInfoCallback?.Invoke(gallery); var last_page_node = node.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/ol[1]/li[@class='last']/a"); var last_page = 1; if (last_page_node != null) { last_page = node.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/ol[1]/li[@class='last']/a").GetAttributeValue("href", "").Split('=').Last().ToInt(); } var page_urls = Enumerable.Range(1, last_page).Select(page => $"{url}&page={page}").ToList(); var htmls = NetTools.DownloadStrings(page_urls); var sub_urls = new List <string>(); foreach (var shtml in htmls) { var snode = shtml.ToHtmlNode(); sub_urls.AddRange(snode.SelectNodes("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/table[1]/tbody[1]//a").Select(x => "http://www.hn-hn.co.kr" + x.GetAttributeValue("href", ""))); } option.ProgressMax?.Invoke(sub_urls.Count); var sub_htmls = new List <string>(); foreach (var surl in sub_urls) { var task = NetTask.MakeDefault(surl); task.Encoding = Encoding.GetEncoding(51949); sub_htmls.Add(NetTools.DownloadString(task)); option.PostStatus?.Invoke(1); } foreach (var shtml in sub_htmls) { var snode = shtml.ToHtmlNode(); var title = snode.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/form[1]/div[1]/div[1]/h3[1]").InnerText.Trim(); var thumbnail = "http://www.hn-hn.co.kr" + snode.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[3]/div[1]/a[1]/img[1]").GetAttributeValue("src", "").Split('?')[0]; var imgs = snode.SelectNodes("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]//img").Select(img => { if (img.GetAttributeValue("src", "").StartsWith("http")) { return(img.GetAttributeValue("src", "")); } else { return("http://www.hn-hn.co.kr" + img.GetAttributeValue("src", "").Split('?')[0]); } }).ToList(); foreach (var img in imgs) { var task = NetTask.MakeDefault(img); task.SaveFile = true; task.Filename = img.Split('/').Last(); if (filtering_filename.Contains(task.Filename)) { continue; } task.Format = new ExtractorFileNameFormat { Gallery = gallery, Title = title, FilenameWithoutExtension = Path.GetFileNameWithoutExtension(task.Filename), Extension = Path.GetExtension(task.Filename).Replace(".", "") }; result.Add(task); } } option.ThumbnailCallback?.Invoke(result[0]); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.Search }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = new DCInsideExtractorOption { Type = ExtractorType.Images } } ; if ((option as DCInsideExtractorOption).OnlyRecommend) { url += "&exception_mode=recommend"; } var match = ValidUrl.Match(url).Groups; var result = new List <NetTask>(); var html = NetTools.DownloadString(url); if (html == null) { return(result, null); } if (match[1].Value == "gall") { try { // // Parse article // if (match[3].Value == "view") { var article = ParseBoardView(html, match[2].Value != ""); if (option.Type == ExtractorType.Images && option.ExtractInformation == false) { if (article.ImagesLink == null || article.ImagesLink.Count == 0) { throw new Exception("Nothing to download!"); } option.SimpleInfoCallback?.Invoke($"{article.Title}"); for (int i = 0; i < article.ImagesLink.Count; i++) { var task = NetTask.MakeDefault(article.ImagesLink[i]); task.Filename = article.FilesName[i]; task.SaveFile = true; task.Referer = url; task.Format = new ExtractorFileNameFormat { Id = article.Id, Gallery = article.GalleryName, Title = article.Title, FilenameWithoutExtension = (i + 1).ToString("000"), Extension = Path.GetExtension(article.FilesName[i]).Replace(".", ""), }; result.Add(task); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, null /*article*/); } else if (option.Type == ExtractorType.ArticleInformation || option.ExtractInformation == true) { return(null, null /*article*/); } else if (option.Type == ExtractorType.Comments) { var cc = new List <DCComment>(); var comments = GetComments(article, "1"); cc.Add(comments); // // To avoid server blocks // Thread.Sleep(2000); int tcount = comments.total_cnt; int count = 100; for (int i = 2; count < tcount; count += 100) { comments = GetComments(article, i.ToString()); if (comments.comment_cnt == 0) { break; } count += comments.comment_cnt; cc.Add(comments); Thread.Sleep(2000); } return(null, null /*GetComments(article, "0")*/); } else { throw new Exception("You cannot do that with this URL. " + url); } } // // Parse Articles List // else if (match[3].Value == "lists") { DCGallery gallery; if (match[2].Value == "") { gallery = ParseGallery(html); } else { gallery = ParseMinorGallery(html); } if (option.Type == ExtractorType.GalleryInformation || option.ExtractInformation == true) { return(null, null /*gallery*/); } else { throw new Exception("You cannot do that with this URL." + url); } } } catch (Exception e) { Log.Logs.Instance.PushError("[DCInsideExtractor] Extract error - " + option.Type.ToString() + " - " + e.Message + "\r\n" + e.StackTrace); } } else { // Not support mobile page. throw new ExtractorException("[DCInside Extractor] Not support mobile page yet."); } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.Community }); }
public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null) { if (option == null) { option = RecommendOption(url); } var html = NetTools.DownloadString(url); var match = ValidUrl.Match(url).Groups; var node = html.ToHtmlNode(); var title = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/h1[1]").InnerText.Trim(); var genre = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[2]/div[2]/h3[1]/a[1]").InnerText.Trim(); var artist = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[2]/div[3]/h3[1]/a[1]").InnerText.Trim(); var sub_urls = new List <string>(); var sub_titles = new List <string>(); foreach (var episode in node.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div")) { var tag_a = episode.SelectSingleNode("./div[2]/h2[1]/a[1]"); sub_urls.Add(tag_a.GetAttributeValue("href", "")); sub_titles.Add(tag_a.InnerText.Trim()); } option.SimpleInfoCallback?.Invoke(title); option.ThumbnailCallback?.Invoke(NetTask.MakeDefault( match["host"].Value + node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[1]/a[1]/img[1]").GetAttributeValue("src", ""))); option.ProgressMax?.Invoke(sub_urls.Count); var sub_htmls = NetTools.DownloadStrings(sub_urls, "", () => { option.PostStatus?.Invoke(1); }); var result = new List <NetTask>(); for (int i = 0; i < sub_urls.Count; i++) { var snode = sub_htmls[i].ToHtmlNode(); int count = 1; foreach (var img in snode.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[2]/ul[1]//li/div[1]/img[1]")) { var img_src = img.GetAttributeValue("data-src", ""); if (string.IsNullOrWhiteSpace(img_src)) { img_src = img.GetAttributeValue("src", ""); } var task = NetTask.MakeDefault(HttpUtility.HtmlDecode(img_src)); task.SaveFile = true; task.Filename = count.ToString("000") + ".jpg"; task.Format = new ExtractorFileNameFormat { Title = title, Episode = sub_titles[i], FilenameWithoutExtension = count.ToString("000"), Extension = Path.GetExtension(task.Filename).Replace(".", ""), }; result.Add(task); count++; } } result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", "")); return(result, new ExtractedInfo { Type = ExtractedInfo.ExtractedType.WorksComic }); }