/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Element tagList = doc.Select("#tag-sidebar").First; if (tagList == null) { return(false); } // Get tags this.AddTags(tagList, "copyright", "series"); this.AddTags(tagList, "character", "character"); this.AddTags(tagList, "artist", "creator"); this.AddTags(tagList, "general"); // Get rating if (Properties.Settings.Default.AddRating) { this.GetRating(doc, "#stats li"); } // Get informations Supremes.Nodes.Elements statsLis = doc.Select("#stats li"); Supremes.Nodes.Element highresLink = doc.Select("#highres").First; Supremes.Nodes.Element pngLink = doc.Select("#png").First; foreach (Supremes.Nodes.Element li in statsLis) { if (li.Text.StartsWith("Size: ")) { this.parseResolution(li.Text.Substring(li.Text.IndexOf(' ') + 1)); } } if (pngLink != null) { this.GetFullImageUrlAndSizeFromLink(pngLink); } else if (highresLink != null) { this.GetFullImageUrlAndSizeFromLink(highresLink); } // Checks if the image is deleted Supremes.Nodes.Elements statusNotices = doc.Select(".status-notice"); foreach (Supremes.Nodes.Element statusNotice in statusNotices) { if (statusNotice.Text.Contains("This post was deleted.")) { this.unavailable = true; break; } } return(true); }
/* * ============================================ * Private * ============================================ */ /// <summary> /// Get the URL to an anime's page from its title. /// </summary> /// <returns></returns> private string GetAnimeUrl(string title) { Supremes.Nodes.Document doc = null; // Search for the anime try { doc = Supremes.Dcsoup.Parse( new Uri("http://myanimelist.net/search/all?q=" + ((string)title).Trim().Replace(" ", "%20")) , 5000 ); } catch { return(null); } if (doc == null) { return(null); } Supremes.Nodes.Elements item = doc.Select("article > div.list.di-t.w100 > div.picSurround.di-tc.thumb"); // Get first link in the list return(item.Select("a").Attr("href")); }
/* * ============================================ * Private * ============================================ */ /// <summary> /// Get tags from the document. /// </summary> /// <param name="doc"></param> private void GetTags(Supremes.Nodes.Document doc) { Supremes.Nodes.Elements searchTags = doc.Select("#tag-sidebar li.tag-link"); foreach (Supremes.Nodes.Element searchTag in searchTags) { string name = searchTag.Attr("data-name"); if (string.IsNullOrEmpty(name)) { continue; } Tag tag = new Tag(name.Replace("_", " ")) { Source = Enum.TagSource.Booru }; string type = searchTag.Attr("data-type"); switch (type) { case "copyright": tag.Namespace = "series"; break; case "character": tag.Namespace = "character"; break; case "artist": tag.Namespace = "creator"; break; case "circle": tag.Namespace = "creator"; tag.Value += " (circle)"; break; } this.tags.Add(tag); } }
/// <summary> /// Get covers thumb and full URLs. /// </summary> /// <returns></returns> private bool GetCovers() { Supremes.Nodes.Document picsDoc = null; // Get pics try { picsDoc = Supremes.Dcsoup.Parse(new Uri(this.animeUrl + "/pics"), 5000); } catch { return(false); } if (picsDoc == null) { return(false); } Supremes.Nodes.Elements fulls = picsDoc.Select("div.picSurround > a.js-picture-gallery"); Supremes.Nodes.Elements thumbs = picsDoc.Select("div.picSurround > a.js-picture-gallery > img"); if (fulls.Count != thumbs.Count) { return(false); } int count = thumbs.Count; string[][] covers = new string[count][]; for (byte i = 0; i < count; i++) { string thumbUrl = thumbs[i].Attr("src"); string fullUrl = fulls[i].Attr("href"); // Lazyloaded if (String.IsNullOrEmpty(thumbUrl)) { thumbUrl = thumbs[i].Attr("data-src"); } covers[i] = new string[] { thumbUrl, fullUrl }; } this.covers = covers; return(true); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Element postTags = doc.Select("#post_tags > ul.tags").First; if (postTags == null) { return(false); } // Get tags this.AddTags(postTags, ""); this.AddTags(postTags, "green", "series"); this.AddTags(postTags, "blue", "character"); this.AddTags(postTags, "orange", "creator"); // Get informations Supremes.Nodes.Element postContent = doc.Select("#content .post_content").First; Supremes.Nodes.Element fullImageLink = doc.Select("#big_preview_cont > a").First; if (postContent != null) { int resolutionStartIndex = postContent.Text.IndexOf(RESOLUTION) + RESOLUTION.Length; int resolutionEndIndex = postContent.Text.IndexOf(" ", resolutionStartIndex); string resolution = postContent.Text.Substring(resolutionStartIndex, resolutionEndIndex - resolutionStartIndex); int sizeStartIndex = postContent.Text.IndexOf(SIZE) + SIZE.Length; int sizeEndIndex = postContent.Text.IndexOf(" ", sizeStartIndex); string size = postContent.Text.Substring(sizeStartIndex, sizeEndIndex - sizeStartIndex); this.parseResolution(resolution); this.size = this.KbOrMbToBytes(size); } if (fullImageLink != null) { this.full = URL + fullImageLink.Attr("href"); } return(true); }
/* * ============================================ * Private * ============================================ */ /// <summary> /// Access and parse the Github latest release page to extract informations. /// </summary> /// <returns></returns> private bool Retrieve() { Supremes.Nodes.Document doc = Supremes.Dcsoup.Parse(new Uri(this.GithubLatestUrl), 5000); if (doc == null) { return(false); } Supremes.Nodes.Element tag = doc.Select("div.release-meta > ul.tag-references a.css-truncate > span.css-truncate-target").First; Supremes.Nodes.Element changelog = doc.Select("div.release-body div.markdown-body").First; if (tag == null || changelog == null) { this.ParseErrorMessage(); return(false); } short release; // Release number is prefixed with 'r' if (!short.TryParse(tag.Text.Remove(1), out release)) { this.ParseErrorMessage(); return(false); } // Not a newer release if (release <= Constants.RELEASE) { return(false); } this.Release = release; this.Changelog = changelog.Text; return(true); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Element tagList = doc.Select("#illust_area div.lg_box_tag").First; if (tagList == null) { return(false); } Supremes.Nodes.Elements tagItems = tagList.Select("a.tag"); foreach (Supremes.Nodes.Element tagItem in tagItems) { if (tagItem != null) { this.AddTag(tagItem.Text); } } return(true); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Element tagList = doc.Select("div._2nerN > div._3UK_f").First; if (tagList == null) { return(false); } Supremes.Nodes.Elements tagItems = tagList.Select("a.Q-jc6 > span._2ohCe"); foreach (Supremes.Nodes.Element tagItem in tagItems) { if (tagItem != null) { this.AddTag(tagItem.Text); } } return(true); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Elements tagRows = doc.Select("#tagbox > ul.taglist > li > a"); if (tagRows == null) { return(false); } foreach (Supremes.Nodes.Element tagRow in tagRows) { string nameSpace = tagRow.Attr("class"); // Series is the only know namespace for this booru right now if (nameSpace != "series") { nameSpace = null; } this.AddTag(tagRow.Text, nameSpace); } return(true); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Element tagList = doc.Select("#tag-list").First; if (tagList == null) { return(false); } // Get tags this.AddTags(tagList, "copyright", "series"); this.AddTags(tagList, "character", "character"); this.AddTags(tagList, "artist", "creator"); this.AddTags(tagList, "general"); this.AddTags(tagList, "meta", "meta"); // Get rating if (Properties.Settings.Default.AddRating) { this.GetRating(doc, "#post-information li"); } // Get informations Supremes.Nodes.Element informations = doc.Select("#post-information").First; if (informations != null) { Supremes.Nodes.Elements listItems = informations.Select("ul li"); foreach (Supremes.Nodes.Element li in listItems) { string content = li.Html; if (content == null) { continue; } content = content.Trim(); if (content.StartsWith("Size:")) { Supremes.Nodes.Element full = li.Select("a").First; int start = 0; int end = 0; if (full != null && full.Text != "»") { end = full.Text.LastIndexOf(' '); this.full = full.Attr("href"); if (end > 0) { this.size = this.KbOrMbToBytes(full.Text.Substring(0, end)); } } start = content.LastIndexOf('('); end = content.LastIndexOf(')'); if (start > 0 && end > start) { this.parseResolution(content.Substring(start + 1, end - start - 1)); } } else if (content.StartsWith("Rating:")) { this.rating = content.Substring("Rating:".Length); } else if (content.StartsWith("Source:")) { Supremes.Nodes.Element link = li.Select("a").First; if (link != null) { this.source = link.Attr("href"); } } } } // Checks if the image is deleted Supremes.Nodes.Element postNoticeDeleted = doc.Select(".post-notice-deleted").First; Supremes.Nodes.Element postNoticeBanned = doc.Select(".post-notice-banned").First; if (postNoticeDeleted != null || postNoticeBanned != null) { this.unavailable = true; } return(true); }
/// <summary> /// Get general informations fro mthe page about this anime (type, episodes, premiered, studio). /// </summary> private bool GetInformations() { Supremes.Nodes.Document animeDoc = null; // Search for the anime try { animeDoc = Supremes.Dcsoup.Parse(new Uri(this.animeUrl), 5000); } catch { return(false); } Supremes.Nodes.Elements sidebar = animeDoc.Select("div#content td.borderClass > div.js-scrollfix-bottom"); Supremes.Nodes.Element h2 = sidebar.Select("h2").First; // There's 3 h2 elements in the sidebar: "Alternative Titles", "Information" and "Statistics" Supremes.Nodes.Element nextDiv = h2.NextElementSibling; while (nextDiv != null) { Supremes.Nodes.Elements spans = nextDiv.Select("span.dark_text"); Supremes.Nodes.Element span = spans.First; if (span == null) { nextDiv = nextDiv.NextElementSibling; continue; } string text = span.Text.Trim(); if (text.Length == 0) { nextDiv = nextDiv.NextElementSibling; continue; } if (text == "Type:") { this.type = this.GetValue(nextDiv, text); } if (text == "Episodes:") { this.episodes = this.GetValue(nextDiv, text); if (this.episodes == "Unknown") { this.episodes = "0"; } } if (text == "Premiered:") { string premiered = this.GetValue(nextDiv, text); if (premiered != "?") { string[] parts = premiered.Split(' '); this.seasonal = parts[0]; this.year = parts[1]; } } if (text == "Studios:") { this.studios = this.GetValue(nextDiv, text); if (this.studios == "None found, add some") { this.studios = null; } } if (text == "Source:") { this.source = this.GetValue(nextDiv, text); } if (text == "Genres:") { Supremes.Nodes.Elements aElements = nextDiv.Select("a"); this.genres = new string[aElements.Count]; for (int i = 0; i < this.genres.Length; i++) { this.genres[i] = aElements[i].Attr("title"); } } nextDiv = nextDiv.NextElementSibling; } return(true); }
// --- mangas --- public static async void MangasCrawlerOnLireScanV2() { string site = "https://www.lirescan.me/"; string url = site + "rss/"; var time = DateTime.Now; ("mangasCrawlerOnLireScanV2 (" + time + ")").Println(); try { XmlReader reader = XmlReader.Create(url); SyndicationFeed feed = SyndicationFeed.Load(reader); reader.Close(); string splitChar = "|"; int crawler_counter = 0; string data = String.Empty; List <string> processedMangas = new List <string>(); string text_data = File.ReadAllText(DataManager.Text.MANGASDATA_RSS_FILE); foreach (SyndicationItem item in feed.Items) { String title = item.Title.Text; String link = item.Links[0].Uri.ToString(); String description = item.Summary.Text; string mangaName = MangaNameToLowerCase(title); bool mangaExists = DataManager.mangasData.ContainsKey(mangaName); link = link.Replace("http://www.lirescan.com/", ""); link = site + mangaName + link; string chapter = title + splitChar + link + splitChar + description; if (mangaExists && !processedMangas.Contains(mangaName)) { bool alreadyInDataList = false; int tmp_counter = 0, data_counter = 2000; foreach (string dataLine in text_data.Split('\n')) { if (chapter.Equals(dataLine)) { data_counter = tmp_counter; alreadyInDataList = true; } tmp_counter++; } bool newChapter = false; if (alreadyInDataList) { if (crawler_counter < data_counter) { ("rentre (" + crawler_counter.ToString() + " < " + data_counter.ToString() + " )").Debug(); newChapter = true; } } else { "rentre (notInList)".Debug(); newChapter = true; } if (newChapter) { Supremes.Nodes.Document document = null; try { document = Dcsoup.Parse(new Uri(link), 15000); } catch (Exception) { ("Timeout on : <" + link + ">").Debug(); throw new TimeoutException("Timeout on : <" + link + ">"); } var pNotif = document.Select("p[id=notif]"); bool isVF = (pNotif.Text == String.Empty); if (isVF) { string scanValue = title + " => <" + link + ">"; string subs = string.Empty; var users = DataManager.database.getSubs(mangaName); string msg = "Nouveau scan trouvé pour " + mangaName + " : \n\t" + scanValue; foreach (var user in users) { subs += "<@" + user + "> "; } await Channels.Mangas.SendMessageAsync(msg + " " + subs); } } processedMangas.Add(mangaName); data += chapter + "\n"; } crawler_counter++; } File.WriteAllText(DataManager.Text.MANGASDATA_RSS_FILE, data); } catch (Exception e) { await Channels.Debug.SendMessagesAsync("Le crawl des mangas a échoué, car la connexion au site a échouée.\n" + e); e.DisplayException(System.Reflection.MethodBase.GetCurrentMethod().ToString()); } var now = DateTime.Now - time; ("search done. (" + DateTime.Now + ") [" + now + "]").Println(); Thread.Sleep(1800000); //30min MangasCrawlerOnLireScanV2(); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { if (string.IsNullOrEmpty(this.Url)) { return(false); } string url = this.Url; if (url.EndsWith("/")) { url = url.Substring(0, url.Length - 1); } int lastSlash = url.LastIndexOf('/'); string imageId = url.Substring(lastSlash + 1); Supremes.Nodes.Element tagList = doc.Select("#quicktag1_" + imageId).First; Supremes.Nodes.Element seriesList = doc.Select("#quicktag2_" + imageId).First; Supremes.Nodes.Element characterList = doc.Select("#quicktag4_" + imageId).First; Supremes.Nodes.Element artistList = doc.Select("#quicktag3_" + imageId).First; // Get tags this.AddTags(tagList); this.AddTags(seriesList, "series"); this.AddTags(characterList, "character"); this.AddTags(artistList, "creator"); // Get rating if (Properties.Settings.Default.AddRating) { Supremes.Nodes.Element rating = doc.Select("#rating" + imageId).First; if (rating != null && rating.Text != "N/A") { this.AddTag(rating.Text, "rating"); } } // Get informations Supremes.Nodes.Element imageBlock = doc.Select("#content .image_block").First; if (imageBlock == null) { return(true); } Supremes.Nodes.Element imageLink = imageBlock.Select(".thumb > a.thumb_image[href]").First; Supremes.Nodes.Elements metas = imageBlock.Select(".meta > dl").Select("> *"); if (imageLink != null) { this.full = URL + imageLink.Attr("href"); } for (byte i = 0; i < metas.Count; i += 2) { if (i + 1 >= metas.Count) { break; } Supremes.Nodes.Element dt = metas[i]; Supremes.Nodes.Element dd = metas[i + 1]; if (dt.Text == "File size:") { this.size = this.KbOrMbToBytes(dd.Text); } else if (dt.Text == "Dimensions:") { string dimensions = dd.Text; if (dimensions.Contains("(")) { dimensions = dimensions.Substring(0, dimensions.IndexOf(" ")); } this.parseResolution(dimensions); } } return(true); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Elements tagRows = doc.Select("ul#tags li"); if (tagRows == null) { return(false); } // Get tags foreach (Supremes.Nodes.Element tagRow in tagRows) { Supremes.Nodes.Elements link = tagRow.Select("a"); if (link == null) { continue; } if (tagRow.Text.Length < 1) { continue; } string value = link.Text.Replace(tagRow.Text, "").Trim(); if (value.Length < 1) { continue; } string nameSpace = tagRow.Text.Replace(value, "").Trim(); switch (nameSpace) { case "Artiste": nameSpace = "creator"; break; case "Studio": nameSpace = "series"; break; case "Game": nameSpace = "series"; break; case "Character": nameSpace = "character"; break; case "Source": nameSpace = "series"; break; default: nameSpace = null; break; } this.AddTag(value.ToLower(), nameSpace); } // Get informations Supremes.Nodes.Element imageLink = doc.Select("#large > a.preview").First; Supremes.Nodes.Element imageElement = doc.Select("#large > img").First; Supremes.Nodes.Elements paragraphs = doc.Select("#large > p"); if (imageLink != null) { this.full = imageLink.Attr("href"); } else if (imageElement != null) { this.full = imageElement.Attr("src"); } Regex resolutionnRegex = new Regex(@"\d+x\d+", RegexOptions.Compiled | RegexOptions.IgnoreCase); foreach (Supremes.Nodes.Element paragraph in paragraphs) { MatchCollection resolutionMatches = resolutionnRegex.Matches(paragraph.OwnText); if (resolutionMatches.Count == 1) { GroupCollection groups = resolutionMatches[0].Groups; this.parseResolution(groups[0].Value); } Supremes.Nodes.Element span = paragraph.Select("> span").First; if (span != null && !string.IsNullOrWhiteSpace(span.OwnText)) { this.size = this.KbOrMbToBytes(span.OwnText); } } return(true); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Element tagList = doc.Select("#tag-list").First; if (tagList == null) { return(false); } // Get tags this.AddTags(tagList, "copyright", "series"); this.AddTags(tagList, "character", "character"); this.AddTags(tagList, "artist", "creator"); this.AddTags(tagList, "general"); this.AddTags(tagList, "metadata", "meta"); // Get rating if (Properties.Settings.Default.AddRating) { this.GetRating(doc, "#tag-list li"); } // Get informations Supremes.Nodes.Elements lis = tagList.Select("li"); foreach (Supremes.Nodes.Element li in lis) { string content = li.Html; if (content == null) { continue; } content = content.Trim(); if (content.Length < 1) { continue; } if (content.StartsWith("Size:")) { int end = content.IndexOf('<'); if (end > SIZE_SUBSTR_START) { content = content.Substring(SIZE_SUBSTR_START, end - SIZE_SUBSTR_START); string[] parts = content.Trim().Split('x'); if (parts.Length == 2) { int.TryParse(parts[0], out this.width); int.TryParse(parts[1], out this.height); } } } Supremes.Nodes.Element link = li.Select("> a").First; if (link != null && link.Text == "Original image") { this.full = link.Attr("href"); } } return(true); }
/* * ============================================ * Protected * ============================================ */ override protected bool Parse(Supremes.Nodes.Document doc) { Supremes.Nodes.Element tagSidebar = doc.Select("#tag-sidebar").First; // Get tags if (tagSidebar != null) { this.AddTags(tagSidebar, "copyright", "series"); this.AddTags(tagSidebar, "character", "character"); this.AddTags(tagSidebar, "artist", "creator"); this.AddTags(tagSidebar, "medium", "meta"); this.AddTags(tagSidebar, "general"); } // Get details Supremes.Nodes.Element stats = doc.Select("#stats > ul").First; if (stats != null) { Supremes.Nodes.Elements listItems = stats.Select("li"); foreach (Supremes.Nodes.Element li in listItems) { string content = li.Html; if (content == null) { continue; } content = content.Trim(); if (content.StartsWith("Original:")) { Supremes.Nodes.Element full = li.Select("a").First; int space = full.Text.IndexOf(' '); if (full != null) { string size = full.Text.Substring(space + 1); string dimensions = full.Text.Substring(0, space); size = size.Substring(1, size.Length - 2); string[] parts = dimensions.Split('x'); this.full = full.Attr("href"); this.size = this.KbOrMbToBytes(size); if (parts.Length == 2) { int.TryParse(parts[0], out this.width); int.TryParse(parts[1], out this.height); } } } else if (content.StartsWith("Rating:")) { this.rating = content.Substring("Rating: ".Length); } } } return(true); }