private static bool IsBlacklisted(DanbooruPost post, Regex regex) { if (regex != null) { return(post.TagsEntity.Any(x => regex.IsMatch(x.Name))); } return(false); }
private void FinalizeDanbooru(DanbooruPost post) { try { post.MediaType = GetBooruMediaType(post.FullImageUrl); } catch (Exception e) { } }
private static bool IsBlacklisted(DanbooruPost post, List <DanbooruTag> TagBlackList) { if (TagBlackList != null) { foreach (var tag in TagBlackList) { return(post.TagsEntity.Any(x => x.Name.Equals(tag.Name, StringComparison.InvariantCultureIgnoreCase))); } } return(false); }
private static string FilterTags(DanbooruPost post, List <DanbooruTag> groupedTags, DanbooruTagType tagType, int tagLimit, string tagReplacement, string missingTagReplacement, bool isReplaceMode, bool isReplaceUnderScore) { var selectedTags = post.TagsEntity.Where <DanbooruTag>(x => x.Type == tagType).Select(x => x.Name).ToList(); var tagStr = ""; if (selectedTags != null) { if (isReplaceUnderScore) { for (int i = 0; i < selectedTags.Count; i++) { selectedTags[i] = selectedTags[i].Replace("_", " ").Trim(); } } if (selectedTags.Count() >= tagLimit) { if (isReplaceMode && !string.IsNullOrWhiteSpace(tagReplacement)) { tagStr = tagReplacement; if (isReplaceUnderScore) { tagStr = tagReplacement.Replace("_", " ").Trim(); } groupedTags.RemoveAll(x => x.Type == tagType); groupedTags.Add(new DanbooruTag() { Name = tagReplacement }); } else { var tempTags = selectedTags.Take(tagLimit); tagStr = String.Join(" ", tempTags); } } else { tagStr = string.Join(" ", selectedTags); } } if (string.IsNullOrWhiteSpace(tagStr)) { tagStr = missingTagReplacement; } return(tagStr); }
private static void ReadRssMethod2(DanbooruPostDaoOption option, BindingList <DanbooruPost> posts, string xmldoc) { XmlReaderSettings readerSettings = new XmlReaderSettings { DtdProcessing = DtdProcessing.Parse, XmlResolver = new LocalXhtmlXmlResolver() }; using (StringReader strReader = new StringReader(xmldoc)) { using (XmlReader reader = XmlReader.Create(strReader, readerSettings)) { XDocument doc = XDocument.Load(reader); var feeds = doc.Descendants("item"); XNamespace dc = "http://purl.org/dc/elements/1.1/"; foreach (var item in feeds) { DanbooruPost post = new DanbooruPost(); var titleData = item.Element("title").Value.Split(new char[] { '-' }, 2); post.Id = titleData[0].Trim(); post.Tags = titleData[1].Trim(); post.Referer = AppendHttp(item.Element("link").Value, option.Provider); post.CreatedAt = item.Element("pubDate").Value; post.CreatorId = item.Element(dc + "creator").Value; post.FileUrl = item.Element("enclosure").Attribute("url").Value; try { HtmlAgilityPack.HtmlDocument description = new HtmlAgilityPack.HtmlDocument(); description.LoadHtml(HttpUtility.HtmlDecode(item.Element("description").Value)); //post.Width = Convert.ToInt32(matches.Groups[1].Value); //post.Height = Convert.ToInt32(matches.Groups[2].Value); var img = description.DocumentNode.SelectSingleNode("//img"); post.PreviewWidth = Convert.ToInt32(img.Attributes["width"].Value); post.PreviewHeight = Convert.ToInt32(img.Attributes["height"].Value); post.PreviewUrl = img.Attributes["src"].Value; } catch (Exception) { } PostProcess(option, post); posts.Add(post); } } } }
private static void ReparseTags(DanbooruPost post, HtmlDocument doc) { post.TagsEntity.Clear(); var tagsElement = doc.DocumentNode.SelectNodes("//ul[@id='tag-sidebar']/li"); foreach (var tag in tagsElement) { HtmlDocument tagDoc = new HtmlDocument(); tagDoc.LoadHtml(tag.OuterHtml); var tagEntity = new DanbooruTag(); var el = tagDoc.DocumentNode.SelectSingleNode("//li"); var cls = el.Attributes["class"].Value; switch (cls) { case "tag-type-general": tagEntity.Type = DanbooruTagType.General; break; case "tag-type-artist": tagEntity.Type = DanbooruTagType.Artist; break; case "tag-type-copyright": tagEntity.Type = DanbooruTagType.Copyright; break; case "tag-type-character": tagEntity.Type = DanbooruTagType.Character; break; //case "tag-type-medium": // tagEntity.Type = DanbooruTagType.Circle; // break; case "tag-type-medium": tagEntity.Type = DanbooruTagType.Faults; break; default: tagEntity.Type = DanbooruTagType.Unknown; break; } tagEntity.Name = tagDoc.DocumentNode.SelectSingleNode("//li/a").InnerText; var countStr = tagDoc.DocumentNode.SelectSingleNode("//li//span[@class='post-count']").InnerText.Trim(); tagEntity.Count = Int32.Parse(countStr); post.TagsEntity.Add(tagEntity); } }
private static void ReadRssMethod1(DanbooruPostDaoOption option, BindingList <DanbooruPost> posts, string xmldoc) { XmlReaderSettings readerSettings = new XmlReaderSettings { DtdProcessing = DtdProcessing.Parse, XmlResolver = new LocalXhtmlXmlResolver() }; using (StringReader strReader = new StringReader(xmldoc)) { using (XmlReader reader = XmlReader.Create(strReader, readerSettings)) { XDocument doc = XDocument.Load(reader); string media = doc.Root.Attribute("{http://www.w3.org/2000/xmlns/}media").Value; foreach (var item in doc.Descendants("item")) { DanbooruPost post = new DanbooruPost(); var titleData = item.Element("title").Value.Split(new char[] { '-' }, 2); post.Id = titleData[0].Trim(); post.Tags = titleData[1].Trim(); post.Referer = AppendHttp(item.Element("link").Value, option.Provider); post.CreatedAt = item.Element("pubDate").Value; var data = item.Element("{" + media + "}thumbnail"); post.PreviewUrl = AppendHttp(data.Attribute("url").Value, option.Provider); data = item.Element("{" + media + "}content"); post.FileUrl = AppendHttp(data.Attribute("url").Value, option.Provider); try { string description = HttpUtility.HtmlDecode(item.Element("description").Value); Match matches = imageResolutionRegex.Match(description); post.Width = Convert.ToInt32(matches.Groups[1].Value); post.Height = Convert.ToInt32(matches.Groups[2].Value); post.PreviewWidth = Convert.ToInt32(matches.Groups[3].Value); post.PreviewHeight = Convert.ToInt32(matches.Groups[4].Value); post.CreatorId = matches.Groups[5].Value; } catch (Exception) { } PostProcess(option, post); posts.Add(post); } } } }
internal static void WriteTagFile(DanbooruPost post, string filename) { if (File.Exists(filename)) { File.Delete(filename); } using (TextWriter tw = File.CreateText(filename)) { foreach (var item in post.TagsEntity) { tw.WriteLine(item.Name); } } }
public void TestDumpRawData() { string dump = "test124"; DanbooruPost post = new DanbooruPost(); post.Id = "123"; post.Query = ""; post.Provider = new DanbooruProvider() { Name = "TestProvider" }; string filename = "Dump for Post " + post.Id + post.Provider.Name + " Query " + post.Query + ".txt"; bool result = Helper.DumpRawData(dump, filename); Assert.IsTrue(result); }
private static void PostProcess(DanbooruPostDaoOption option, DanbooruPost post) { post.TagsEntity = Helper.ParseTags(post.Tags, option.Provider); if (option.BlacklistedTagsUseRegex) { post.Hidden = IsBlacklisted(post, option.BlacklistedTagsRegex); } else { post.Hidden = IsBlacklisted(post, option.BlacklistedTags); } post.Query = option.Query; post.SearchTags = option.SearchTags; post.Provider = option.Provider; }
public static bool CheckBlacklistedTag(DanbooruPost post, DanbooruPostDaoOption option) { if (option.BlacklistedTagsUseRegex) { return(post.TagsEntity.Any(x => option.BlacklistedTagsRegex.IsMatch(x.Name))); } else { foreach (var tag in option.BlacklistedTags) { if (post.TagsEntity.Any(x => x.Name.Equals(tag.Name, StringComparison.InvariantCultureIgnoreCase))) { return(true); } } } return(false); }
public void TestGelbooruParser() { DanbooruProviderDao pd = DanbooruProviderDao.GetInstance(); string target = @"../../../DanbooruDownloader3.test/TestXml/gelbooru_post.htm"; var data = File.ReadAllText(target); var query = new DanbooruSearchParam(); query.Provider = pd.Read(sourceProvider).Where(x => x.Name == "gelbooru.com").First(); query.Tag = ""; query.OrderBy = "score"; var post = new DanbooruPost(); post.Provider = query.Provider; GelbooruHtmlParser.ParsePost(post, data); Assert.IsNotNull(post.FileUrl); Assert.IsTrue(post.FileUrl == @"http://cdn2.gelbooru.com//images/1559/303b7ed1fcba0c1d9520f76ee34ec37e.jpg", "Actual: " + post.FileUrl); }
public async Task <Post> LoadPostAsync(int postId) { var postHtml = await _booruLoader.LoadPageAsync(POST_URL + postId).ConfigureAwait(false); var postJsonString = await _booruLoader.LoadPageAsync(string.Format(POST_JSON, postId)).ConfigureAwait(false); var postJson = JsonConvert.DeserializeObject <Model.Danbooru.Json.Post>(postJsonString); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(postHtml); var post = new DanbooruPost(postId, htmlDoc.DocumentNode, postJson); if (post.IsUgoira) { await LoadUgoiraMeta(post); } return(post); }
private void ParsePostAttributes(XmlTextReader reader, DanbooruPost post) { while (reader.MoveToNextAttribute()) { switch (reader.Name.ToLowerInvariant()) { case "id": post.Id = reader.Value; break; case "tags": post.Tags = reader.Value; post.TagsEntity = Helper.ParseTags(post.Tags, Option.Provider); break; case "source": post.Source = reader.Value; break; case "creator_id": post.CreatorId = reader.Value; break; case "file_url": post.FileUrl = AppendHttp(reader.Value); break; case "width": post.Width = -1; try { post.Width = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "height": post.Height = -1; try { post.Height = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "change": post.Change = reader.Value; break; case "score": post.Score = reader.Value; break; case "rating": post.Rating = reader.Value; break; case "status": post.Status = reader.Value; break; case "has_children": post.HasChildren = Boolean.Parse(reader.Value); break; case "created_at": post.CreatedAt = reader.Value; break; case "md5": post.MD5 = reader.Value; break; case "preview_url": post.PreviewUrl = AppendHttp(reader.Value); break; case "preview_width": post.PreviewWidth = -1; try { post.PreviewWidth = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "preview_height": post.PreviewHeight = -1; try { post.PreviewHeight = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "parent_id": post.ParentId = reader.Value; break; case "sample_url": post.SampleUrl = AppendHttp(reader.Value); break; case "sample_width": post.SampleWidth = -1; try { post.SampleWidth = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "sample_height": post.SampleHeight = -1; try { post.SampleHeight = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "jpeg_url": post.JpegUrl = AppendHttp(reader.Value); break; case "jpeg_width": post.JpegWidth = -1; try { post.JpegWidth = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "jpeg_height": post.JpegHeight = -1; try { post.JpegHeight = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; } } }
private void ProcessXML(string rawData) { RawData = rawData; using (StringReader strReader = new StringReader(rawData)) { using (XmlTextReader reader = new XmlTextReader(strReader)) { if (Option.Provider.BoardType == BoardType.Shimmie2) { posts = Engine.ShimmieEngine.ParseRSS(rawData, Option); } else { while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: // The node is an element. var nodeName = reader.Name.ToLowerInvariant(); if (nodeName.Equals("posts")) { while (reader.MoveToNextAttribute()) { if (reader.Name.ToLowerInvariant().Equals("count")) // Posts Count { postCount = int.Parse(reader.Value); } else if (reader.Name.ToLowerInvariant().Equals("offset")) // Post Offset { offset = int.Parse(reader.Value); } } } else if (nodeName.Equals("response")) { Success = true; while (reader.MoveToNextAttribute()) { if (reader.Name.ToLowerInvariant().Equals("reason")) // Posts Count { ResponseMessage = reader.Value; } else if (reader.Name.ToLowerInvariant().Equals("success")) // Posts Count { Success = bool.Parse(reader.Value); } } } else if (nodeName.Equals("post")) { DanbooruPost post = new DanbooruPost(); ParsePostAttributes(reader, post); post.Hidden = Helper.CheckBlacklistedTag(post, Option); post.Provider = Option.Provider; post.Query = Option.Query; post.SearchTags = Option.SearchTags; if (Option.Provider.BoardType == BoardType.Danbooru || Option.Provider.BoardType == BoardType.Shimmie2) { post.Referer = Option.Provider.Url + @"/post/show/" + post.Id; } else if (Option.Provider.BoardType == BoardType.Gelbooru) { post.Referer = Option.Provider.Url + @"/index.php?page=post&s=view&id=" + post.Id; } posts.Add(post); actualCount++; } break; case XmlNodeType.EndElement: //Display the end of the element. //txtResult.AppendText("END"); break; } } } } } }
public static DanbooruPost ParsePost(DanbooruPost post, string postHtml, bool overideTagParsing) { try { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(postHtml); string file_url = ""; string sample_url = ""; // image post var img = doc.DocumentNode.SelectSingleNode("//img[@id='main_image']"); if (img != null) { sample_url = file_url = Helper.FixUrl(img.GetAttributeValue("src", ""), isHttps(post.Provider), hostname: post.Provider.Url); } // video post var video = doc.DocumentNode.SelectSingleNode("//video[@id='video-id']/source"); if (video != null) { var dl = video.GetAttributeValue("src", ""); sample_url = file_url = Helper.FixUrl(dl, isHttps(post.Provider), hostname: post.Provider.Url); } post.SampleUrl = sample_url; post.FileUrl = file_url; post.CreatedAt = "N/A"; post.CreatedAtDateTime = DateTime.MinValue; var imgInfo = doc.DocumentNode.SelectNodes("//table[@class='image_info form']"); foreach (var table in imgInfo) { if (table.HasChildNodes) { var time = table.SelectSingleNode("//time"); if (time != null) { post.CreatedAt = time.Attributes["datetime"].Value; post.CreatedAtDateTime = DateTime.Parse(post.CreatedAt); } } } // score var score = Regex.Matches(postHtml, @"Current Score: (\d+)"); if (score.Count > 0 && score[0].Groups.Count > 0) { post.Score = score[0].Groups[0].Value; } return(post); } catch (Exception ex) { string filename = $"Dump for Post {post.Id} - {post.Provider.Name} Query {post.Query}.txt"; bool result = Helper.DumpRawData(postHtml, filename); if (!result) { Program.Logger.Error($"Failed to dump rawdata to: {filename}", ex); } throw; } }
/// <summary> /// For constructing TAGS_FILENAME. /// </summary> /// <param name="baseFolder"></param> /// <param name="format"></param> /// <param name="post"></param> /// <param name="limit"></param> /// <returns></returns> public static string MakeFilename(DanbooruFilenameFormat format, DanbooruPost post) { string filename = format.FilenameFormat; string provider = post.Provider.Name; string query = post.Query; string searchTags = post.SearchTags; string originalFileName = post.FileUrl.Split('/').Last(); // sanitizing the format filename = Helper.SanitizeFilename(filename, true); //remove extension originalFileName = originalFileName.Substring(0, originalFileName.LastIndexOf('.')); originalFileName = Uri.UnescapeDataString(originalFileName); filename = filename.Replace("%provider%", Helper.SanitizeFilename(provider)); filename = filename.Replace("%id%", post.Id); filename = filename.Replace("%rating%", post.Rating); filename = filename.Replace("%md5%", post.MD5); filename = filename.Replace("%searchParam%", Helper.SanitizeFilename(query)); filename = filename.Replace("%searchtag%", Helper.SanitizeFilename(searchTags)); filename = filename.Replace("%originalFilename%", Helper.SanitizeFilename(originalFileName)); // copy the tags entity to be grouped. var groupedTags = post.TagsEntity; groupedTags.Sort(); // remove ignored tags groupedTags = RemoveIgnoredTags(format, groupedTags); // artist var artist = FilterTags(post, groupedTags, DanbooruTagType.Artist, format.ArtistGroupLimit, format.ArtistGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var artistStr = Helper.SanitizeFilename(artist).Trim(); if (String.IsNullOrEmpty(artistStr)) { artistStr = DanbooruDownloader3.Properties.Settings.Default.tagNoArtistValue; } filename = filename.Replace("%artist%", artistStr); // copyright var copyright = FilterTags(post, groupedTags, DanbooruTagType.Copyright, format.CopyrightGroupLimit, format.CopyrightGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var copyStr = Helper.SanitizeFilename(copyright.Trim()); if (String.IsNullOrEmpty(copyStr)) { copyStr = DanbooruDownloader3.Properties.Settings.Default.tagNoCopyrightValue; } filename = filename.Replace("%copyright%", copyStr); // character var character = FilterTags(post, groupedTags, DanbooruTagType.Character, format.CharacterGroupLimit, format.CharacterGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var charaStr = Helper.SanitizeFilename(character.Trim()); if (String.IsNullOrEmpty(charaStr)) { charaStr = DanbooruDownloader3.Properties.Settings.Default.tagNoCharaValue; } filename = filename.Replace("%character%", charaStr); // cirle var circleSelection = post.TagsEntity.Where <DanbooruTag>(x => x.Type == DanbooruTagType.Circle).Select(x => x.Name); var circle = FilterTags(post, groupedTags, DanbooruTagType.Circle, format.CircleGroupLimit, format.CircleGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var circleStr = Helper.SanitizeFilename(circle.Trim()); if (String.IsNullOrEmpty(circleStr)) { circleStr = DanbooruDownloader3.Properties.Settings.Default.tagNoCircleValue; } filename = filename.Replace("%circle%", circleStr); // faults var faultsSelection = post.TagsEntity.Where <DanbooruTag>(x => x.Type == DanbooruTagType.Faults).Select(x => x.Name); var faults = FilterTags(post, groupedTags, DanbooruTagType.Faults, format.FaultsGroupLimit, format.FaultsGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var faultStr = Helper.SanitizeFilename(faults.Trim()); if (String.IsNullOrEmpty(faultStr)) { faultStr = DanbooruDownloader3.Properties.Settings.Default.tagNoFaultValue; } filename = filename.Replace("%faults%", faultStr); // all tags var allTempTags = groupedTags.Select(x => x.Name).ToList(); if (format.TagReplaceUnderscoreToSpace) { for (int i = 0; i < allTempTags.Count; i++) { allTempTags[i] = allTempTags[i].Replace("_", " ").Trim(); } } filename = filename.Replace("%tags%", Helper.SanitizeFilename(string.Join(" ", allTempTags))); // append base folder from Save Folder text box if (format.BaseFolder.EndsWith(@"\")) { filename = format.BaseFolder + filename; } else if (!String.IsNullOrWhiteSpace(format.BaseFolder)) { filename = format.BaseFolder + @"\" + filename; } filename = filename.Substring(0, filename.Length < format.Limit ? filename.Length : format.Limit).Trim(); // check if contains subdirectory if (filename.Contains(@"\")) { string dir = filename.Substring(0, filename.LastIndexOf(@"\")); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } } return(filename); }
/// <summary> /// For constructing TAGS_FILENAME. /// </summary> /// <param name="baseFolder"></param> /// <param name="format"></param> /// <param name="post"></param> /// <param name="limit"></param> /// <returns></returns> public static string MakeFilename(DanbooruFilenameFormat format, DanbooruPost post) { string filename = format.FilenameFormat; string provider = post.Provider.Name; string query = post.Query; string searchTags = post.SearchTags; string originalFileName = post.FileUrl.Split('/').Last(); // sanitizing the format filename = Helper.SanitizeFilename(filename, true); //remove extension originalFileName = originalFileName.Substring(0, originalFileName.LastIndexOf('.')); originalFileName = Uri.UnescapeDataString(originalFileName); filename = filename.Replace("%provider%", Helper.SanitizeFilename(provider)); filename = filename.Replace("%id%", post.Id); filename = filename.Replace("%rating%", post.Rating); filename = filename.Replace("%md5%", post.MD5); filename = filename.Replace("%searchParam%", Helper.SanitizeFilename(query)); filename = filename.Replace("%searchtag%", Helper.SanitizeFilename(searchTags)); filename = filename.Replace("%originalFilename%", Helper.SanitizeFilename(originalFileName)); // copy the tags entity to be grouped. var groupedTags = post.TagsEntity; // remove ignored tags foreach (DanbooruTag ignoredTag in format.IgnoredTags) { if (format.IgnoreTagsUseRegex) { if (!String.IsNullOrWhiteSpace(format.IgnoredTagsRegex)) { //Program.Logger.Debug("Ignore Regex: " + format.IgnoredTagsRegex); Regex re = new Regex(format.IgnoredTagsRegex, RegexOptions.IgnoreCase); groupedTags.RemoveAll(x => re.IsMatch(x.Name)); } } else { groupedTags.RemoveAll(x => x.Name == ignoredTag.Name); } } var artistSelection = post.TagsEntity.Where <DanbooruTag>(x => x.Type == DanbooruTagType.Artist).Select(x => x.Name); var artist = ""; if (artistSelection != null) { if (artistSelection.Count() >= format.ArtistGroupLimit && !string.IsNullOrWhiteSpace(format.ArtistGroupReplacement)) { artist = format.ArtistGroupReplacement; groupedTags.RemoveAll(x => x.Type == DanbooruTagType.Artist); groupedTags.Add(new DanbooruTag() { Name = format.ArtistGroupReplacement }); } else { artist = string.Join(" ", artistSelection); } } if (string.IsNullOrWhiteSpace(artist)) { artist = format.MissingTagReplacement; } filename = filename.Replace("%artist%", Helper.SanitizeFilename(artist).Trim()); var copyrightSelection = post.TagsEntity.Where <DanbooruTag>(x => x.Type == DanbooruTagType.Copyright).Select(x => x.Name); var copyright = ""; if (copyrightSelection != null) { if (copyrightSelection.Count() >= format.CopyrightGroupLimit && format.CopyrightGroupLimit > 1 && !string.IsNullOrWhiteSpace(format.CopyrightGroupReplacement)) { copyright = format.CopyrightGroupReplacement; groupedTags.RemoveAll(x => x.Type == DanbooruTagType.Artist); groupedTags.Add(new DanbooruTag() { Name = format.ArtistGroupReplacement }); } else { copyright = string.Join(" ", copyrightSelection); } } if (string.IsNullOrWhiteSpace(copyright)) { copyright = format.MissingTagReplacement; } filename = filename.Replace("%copyright%", Helper.SanitizeFilename(copyright.Trim())); var characterSelection = post.TagsEntity.Where <DanbooruTag>(x => x.Type == DanbooruTagType.Character).Select(x => x.Name); var character = ""; if (characterSelection != null) { if (characterSelection.Count() >= format.CharacterGroupLimit && format.CharacterGroupLimit > 1 && !string.IsNullOrWhiteSpace(format.CharacterGroupReplacement)) { character = format.CharacterGroupReplacement; groupedTags.RemoveAll(x => x.Type == DanbooruTagType.Character); groupedTags.Add(new DanbooruTag() { Name = format.CharacterGroupReplacement }); } else { character = string.Join(" ", characterSelection); } } if (string.IsNullOrWhiteSpace(character)) { character = format.MissingTagReplacement; } filename = filename.Replace("%character%", Helper.SanitizeFilename(character.Trim())); var circleSelection = post.TagsEntity.Where <DanbooruTag>(x => x.Type == DanbooruTagType.Circle).Select(x => x.Name); var circle = ""; if (circleSelection != null) { if (circleSelection.Count() >= format.CircleGroupLimit && format.CircleGroupLimit > 1 && !string.IsNullOrWhiteSpace(format.CircleGroupReplacement)) { circle = format.CircleGroupReplacement; groupedTags.RemoveAll(x => x.Type == DanbooruTagType.Circle); groupedTags.Add(new DanbooruTag() { Name = format.CircleGroupReplacement }); } else { circle = string.Join(" ", circleSelection); } } if (string.IsNullOrWhiteSpace(circle)) { circle = format.MissingTagReplacement; } filename = filename.Replace("%circle%", Helper.SanitizeFilename(circle.Trim())); var faultsSelection = post.TagsEntity.Where <DanbooruTag>(x => x.Type == DanbooruTagType.Faults).Select(x => x.Name); var faults = ""; if (faultsSelection != null) { if (faultsSelection.Count() >= format.FaultsGroupLimit && format.FaultsGroupLimit > 1 && !string.IsNullOrWhiteSpace(format.FaultsGroupReplacement)) { faults = format.FaultsGroupReplacement; groupedTags.RemoveAll(x => x.Type == DanbooruTagType.Faults); groupedTags.Add(new DanbooruTag() { Name = format.FaultsGroupReplacement }); } else { faults = string.Join(" ", faultsSelection); } } if (string.IsNullOrWhiteSpace(faults)) { faults = format.MissingTagReplacement; } filename = filename.Replace("%faults%", Helper.SanitizeFilename(faults.Trim())); groupedTags.Sort(); filename = filename.Replace("%tags%", Helper.SanitizeFilename(string.Join(" ", groupedTags.Select(x => x.Name)))); // append base folder from Save Folder text box if (format.BaseFolder.EndsWith(@"\")) { filename = format.BaseFolder + filename; } else if (!String.IsNullOrWhiteSpace(format.BaseFolder)) { filename = format.BaseFolder + @"\" + filename; } filename = filename.Substring(0, filename.Length < format.Limit ? filename.Length : format.Limit).Trim(); // check if contains subdirectory if (filename.Contains(@"\")) { string dir = filename.Substring(0, filename.LastIndexOf(@"\")); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } } return(filename); }
public static DanbooruPost ParsePost(DanbooruPost post, string postHtml) { HtmlDocument doc = new HtmlDocument(); if (String.IsNullOrEmpty(postHtml)) { throw new Exception("No post html!"); } doc.LoadHtml(postHtml); string file_url = ""; string sample_url = ""; // Flash Game or bmp // TODO: need to change the preview url if (post.PreviewUrl == "http://chan.sankakucomplex.com/download-preview.png") { var links = doc.DocumentNode.SelectNodes("//a"); foreach (var link in links) { // flash if (link.InnerText == "Save this flash (right click and save)") { file_url = link.GetAttributeValue("href", ""); break; } // bmp if (link.InnerText == "Download") { file_url = link.GetAttributeValue("href", ""); break; } } } else { var image = doc.DocumentNode.SelectSingleNode("//img[@id='image']"); if (image != null) { sample_url = image.GetAttributeValue("src", ""); } var links = doc.DocumentNode.SelectNodes("//a"); foreach (var link in links) { if (link.InnerText == "Original image") { file_url = link.GetAttributeValue("href", ""); break; } } } post.FileUrl = file_url; if (!string.IsNullOrWhiteSpace(file_url) && string.IsNullOrWhiteSpace(sample_url)) { sample_url = file_url; } post.SampleUrl = sample_url; return(post); }
/// <summary> /// Parse search page result and return the images with initial tags. /// </summary> /// <param name="data"></param> /// <param name="searchParam"></param> /// <returns></returns> public BindingList <DanbooruPost> Parse(string data, DanbooruSearchParam searchParam) { try { this.SearchParam = searchParam; this.RawData = data; BindingList <DanbooruPost> posts = new BindingList <DanbooruPost>(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(data); // remove popular preview var popular = doc.DocumentNode.SelectSingleNode("//div[@id='popular-preview']"); if (popular != null) { popular.Remove(); } // get all thumbs var thumbs = doc.DocumentNode.SelectNodes("//span"); if (thumbs != null && thumbs.Count > 0) { foreach (var thumb in thumbs) { if (thumb.GetAttributeValue("class", "").Contains("thumb")) { DanbooruPost post = new DanbooruPost(); post.Id = thumb.GetAttributeValue("id", "-1").Substring(1); post.Provider = searchParam.Provider; post.SearchTags = searchParam.Tag; post.Query = GenerateQueryString(searchParam); int i = 0; // get the image link for (; i < thumb.ChildNodes.Count; ++i) { if (thumb.ChildNodes[i].Name == "a") { break; } } var a = thumb.ChildNodes[i]; post.Referer = Helper.FixUrl(searchParam.Provider.Url + a.GetAttributeValue("href", ""), isHttps(post.Provider)); var img = a.ChildNodes[i]; if (img.GetAttributeValue("src", "").Contains("images/no-visibility.svg")) { Program.Logger.Warn(String.Format("No access for post {0}.", post.Id)); continue; } var title = img.GetAttributeValue("title", ""); post.Tags = title.Substring(0, title.LastIndexOf("Rating:")).Trim(); post.Tags = Helper.DecodeEncodedNonAsciiCharacters(post.Tags); post.TagsEntity = Helper.ParseTags(post.Tags, SearchParam.Provider); post.Hidden = Helper.CheckBlacklistedTag(post, searchParam.Option); var status = img.GetAttributeValue("class", "").Replace("preview", "").Trim(); if (status.Contains("deleted")) { post.Status = "deleted"; } else if (status.Contains("pending")) { post.Status = "pending"; } else { post.Status = status; } post.PreviewUrl = Helper.FixUrl(img.GetAttributeValue("src", ""), isHttps(post.Provider)); post.PreviewHeight = img.GetAttributeValue("height", 0); post.PreviewWidth = img.GetAttributeValue("width", 0); // Rating:Explicit Score:4.5 Size:1080x1800 User:System post.Source = ""; post.Score = title.Substring(title.LastIndexOf("Score:") + 6); post.Score = post.Score.Substring(0, post.Score.IndexOf(" ")).Trim(); string resolution = title.Substring(title.LastIndexOf("Size:") + 5); resolution = resolution.Substring(0, resolution.IndexOf(" ")).Trim(); string[] resArr = resolution.Split('x'); post.Width = Int32.Parse(resArr[0]); post.Height = Int32.Parse(resArr[1]); string rating = title.Substring(title.LastIndexOf("Rating:") + 7, 1); //rating = rating.Substring(0, rating.IndexOf(" ")).Trim(); post.Rating = rating.ToLower(); post.CreatorId = title.Substring(title.LastIndexOf("User:"******"/") + 1); post.MD5 = post.MD5.Substring(0, post.MD5.LastIndexOf(".")); posts.Add(post); } } } // idol complex var siteTitle = doc.DocumentNode.SelectSingleNode("//*[@id='site-title']"); if (siteTitle != null) { var strTitle = siteTitle.InnerText.Split('\n').First(); var strCount = "-1"; if (!Regex.IsMatch(strTitle, @".* = \d+") && strTitle.LastIndexOf("(") > 0) { // single tag // Sankaku Channel/ginhaha (1,198) strCount = strTitle.Substring(strTitle.LastIndexOf("(")); } else { // compound tag // Sankaku Channel/= = (13,957) + rating:e = 585 strCount = strTitle.Split('=').Last().Trim(); } strCount = strCount.Replace("(", ""); strCount = strCount.Replace(",", ""); strCount = strCount.Replace(".", ""); strCount = strCount.Replace(")", ""); Int32.TryParse(strCount, out int count); TotalPost = count; } else { TotalPost = posts.Count; } if (!SearchParam.Page.HasValue && SearchParam.Page > 0) { SearchParam.Page = 1; } Offset = TotalPost * SearchParam.Page; // get next id for 26th page and current page return full list (20 posts) if (searchParam.Page >= 25 && posts.Count == 20) { searchParam.NextKey = posts[posts.Count - 1].Id; } return(posts); } catch (Exception ex) { var filename = Helper.SanitizeFilename("Dump for Sankaku Image List - " + searchParam.Tag + " - page " + searchParam.Page + ".txt"); var result = Helper.DumpRawData(data, filename); if (!result) { Program.Logger.Error("Failed to dump rawdata to: " + filename, ex); } throw; } }
public BindingList <DanbooruPost> Parse(string data, DanbooruSearchParam searchParam) { this.SearchParam = searchParam; this.RawData = data; BindingList <DanbooruPost> posts = new BindingList <DanbooruPost>(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(data); // remove popular preview var popular = doc.DocumentNode.SelectSingleNode("//div[@id='popular-preview']"); if (popular != null) { popular.Remove(); } // get all thumbs var thumbs = doc.DocumentNode.SelectNodes("//span"); if (thumbs != null && thumbs.Count > 0) { foreach (var thumb in thumbs) { if (thumb.GetAttributeValue("class", "").Contains("thumb")) { DanbooruPost post = new DanbooruPost(); post.Id = thumb.GetAttributeValue("id", "_N/A").Substring(1); post.Provider = searchParam.Provider; post.SearchTags = searchParam.Tag; post.Query = GenerateQueryString(searchParam); int i = 0; // get the image link for (; i < thumb.ChildNodes.Count; ++i) { if (thumb.ChildNodes[i].Name == "a") { break; } } var a = thumb.ChildNodes[i]; post.Referer = searchParam.Provider.Url + "/" + System.Web.HttpUtility.HtmlDecode(a.GetAttributeValue("href", "")); if (post.Id == "N/A") { post.Id = a.GetAttributeValue("id", "N/A").Substring(1); } var img = a.ChildNodes[i]; var title = img.GetAttributeValue("title", ""); var title2 = title.ToString(); post.Tags = title.Substring(0, title.LastIndexOf("rating:")).Trim(); post.Tags = Helper.DecodeEncodedNonAsciiCharacters(post.Tags); post.TagsEntity = Helper.ParseTags(post.Tags, SearchParam.Provider); post.Hidden = Helper.CheckBlacklistedTag(post, searchParam.Option); post.PreviewUrl = img.GetAttributeValue("src", ""); post.PreviewHeight = img.GetAttributeValue("height", 0); post.PreviewWidth = img.GetAttributeValue("width", 0); post.Source = ""; post.Score = title.Substring(title.LastIndexOf("score:") + 6); post.Score = post.Score.Substring(0, post.Score.LastIndexOf(" ")).Trim(); title2 = title2.Substring(title2.LastIndexOf("rating:")); post.Rating = title2.Substring(7, 1).ToLower(); post.Status = ""; post.MD5 = post.PreviewUrl.Substring(post.PreviewUrl.LastIndexOf("/") + 1); post.MD5 = post.MD5.Substring(0, post.MD5.LastIndexOf(".")); post.MD5 = post.MD5.Replace("thumbnail_", ""); posts.Add(post); } } } TotalPost = posts.Count; if (!SearchParam.Page.HasValue) { SearchParam.Page = 0; } Offset = posts.Count * SearchParam.Page; return(posts); }
private void ProcessJson(String json) { if (json.Length < 4) { return; } if (json.StartsWith("{")) { json = json.Substring(1, json.Length - 2); string[] splitter = { "\"," }; string[] node = json.Split(splitter, StringSplitOptions.None); foreach (string str2 in node) { string[] val = str2.Split(':'); switch (val[0].ToLowerInvariant()) { case "\"reason\"": ResponseMessage = val[1].Replace("\"", ""); break; case "\"success\"": Success = bool.Parse(val[1].Replace("\"", "")); break; } } } else { json = json.Substring(2, json.Length - 4); string[] splitter = { "},{" }; string[] split = json.Split(splitter, StringSplitOptions.None); foreach (string str in split) { DanbooruPost post = new DanbooruPost(); string[] node = str.Split(','); foreach (string str2 in node) { char[] splitter2 = { ':' }; string[] val = str2.Split(splitter2, 2); switch (val[0].ToLowerInvariant()) { case "\"id\"": post.Id = val[1].Replace("\"", ""); break; case "\"tags\"": post.Tags = val[1].Replace("\"", ""); post.Tags = Helper.DecodeEncodedNonAsciiCharacters(post.Tags); post.TagsEntity = Helper.ParseTags(post.Tags, Option.Provider); break; case "\"source\"": post.Source = val[1].Replace("\"", ""); break; case "\"creator_id\"": post.CreatorId = val[1].Replace("\"", ""); break; case "\"file_url\"": post.FileUrl = AppendHttp(val[1].Replace("\"", "")); break; case "\"width\"": post.Width = -1; try { post.Width = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"height\"": post.Height = -1; try { post.Height = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"score\"": post.Score = val[1]; break; case "\"rating\"": post.Rating = val[1].Replace("\"", ""); break; case "\"md5\"": post.MD5 = val[1].Replace("\"", ""); break; case "\"preview_url\"": post.PreviewUrl = AppendHttp(val[1].Replace("\"", "")); break; case "\"preview_width\"": post.PreviewWidth = -1; try { post.PreviewWidth = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"preview_height\"": post.PreviewHeight = -1; try { post.PreviewHeight = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"file_size\"": post.Filesize = -1; try { post.Filesize = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"parent_id\"": post.ParentId = val[1]; break; case "\"status\"": post.Status = val[1]; break; case "\"created_at\"": post.CreatedAt = val[1]; break; case "\"has_children\"": post.HasChildren = false; try { post.HasChildren = Convert.ToBoolean(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"sample_width\"": post.SampleWidth = -1; try { post.SampleWidth = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"sample_height\"": post.SampleHeight = -1; try { post.SampleHeight = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"sample_url\"": post.SampleUrl = AppendHttp(val[1].Replace("\"", "")); break; case "\"jpeg_width\"": post.JpegWidth = -1; try { post.JpegWidth = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"jpeg_height\"": post.JpegHeight = -1; try { post.JpegHeight = Convert.ToInt32(val[1]); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "\"jpeg_url\"": post.JpegUrl = AppendHttp(val[1].Replace("\"", "")); break; default: break; } } post.Hidden = Helper.CheckBlacklistedTag(post, Option); post.Provider = Option.Provider; post.Query = Option.Query; post.SearchTags = Option.SearchTags; post.Referer = Option.Provider.Url + @"/post/show/" + post.Id; this.posts.Add(post); actualCount++; } } }
/// <summary> /// Return string from tag filtered by tag type. /// Replace the returned string with tagReplacement if tag count over tagLimit. /// Replace the returned string with missingTagReplacement if tag count is 0. /// </summary> /// <param name="post"></param> /// <param name="groupedTags"></param> /// <param name="tagType"></param> /// <param name="tagLimit"></param> /// <param name="tagReplacement"></param> /// <param name="missingTagReplacement"></param> /// <param name="isReplaceMode"></param> /// <param name="isReplaceUnderScore"></param> /// <returns>Return string from tag filtered by tag type.</returns> private static string FilterTags(DanbooruPost post, List <DanbooruTag> groupedTags, DanbooruTagType tagType, int tagLimit, string tagReplacement, string missingTagReplacement, bool isReplaceMode, bool isReplaceUnderScore) { var selectedTags = post.TagsEntity.Where <DanbooruTag>(x => x.Type == tagType).Select(x => x.Name).ToList(); var tagStr = ""; if (selectedTags != null) { if (isReplaceUnderScore) { for (int i = 0; i < selectedTags.Count; i++) { selectedTags[i] = selectedTags[i].Replace("_", " ").Trim(); } } if (tagLimit > 0 && selectedTags.Count() >= tagLimit) { if (isReplaceMode && !string.IsNullOrWhiteSpace(tagReplacement)) { tagStr = tagReplacement; if (isReplaceUnderScore) { tagStr = tagReplacement.Replace("_", " ").Trim(); } //groupedTags.RemoveAll(x => x.Type == tagType); //groupedTags.Add(new DanbooruTag() { Name = tagReplacement }); } else { var tempTags = selectedTags.Take(tagLimit); tagStr = String.Join(" ", tempTags); } } else { tagStr = string.Join(" ", selectedTags); } } if (string.IsNullOrWhiteSpace(tagStr)) { switch (tagType) { case DanbooruTagType.Artist: tagStr = Properties.Settings.Default.tagNoArtistValue; break; case DanbooruTagType.Character: tagStr = Properties.Settings.Default.tagNoCharacterValue; break; case DanbooruTagType.Circle: tagStr = Properties.Settings.Default.tagNoCircleValue; break; case DanbooruTagType.Copyright: tagStr = Properties.Settings.Default.tagNoCopyrightValue; break; case DanbooruTagType.Faults: tagStr = Properties.Settings.Default.tagNoFaultValue; break; default: tagStr = missingTagReplacement; break; } } return(tagStr); }
/// <summary> /// For constructing TAGS_FILENAME. /// </summary> /// <param name="baseFolder"></param> /// <param name="format"></param> /// <param name="post"></param> /// <param name="limit"></param> /// <returns></returns> public static string MakeFilename(DanbooruFilenameFormat format, DanbooruPost post) { string filename = format.FilenameFormat; string provider = post.Provider.Name; string query = post.Query; string searchTags = post.SearchTags; string originalFileName = post.FileUrl.Split('/').Last(); // sanitizing the format filename = Helper.SanitizeFilename(filename, true); //remove extension originalFileName = originalFileName.Substring(0, originalFileName.LastIndexOf('.')); originalFileName = Uri.UnescapeDataString(originalFileName); filename = filename.Replace("%provider%", Helper.SanitizeFilename(provider)); filename = filename.Replace("%id%", post.Id); filename = filename.Replace("%rating%", post.Rating); filename = filename.Replace("%md5%", post.MD5); filename = filename.Replace("%searchParam%", Helper.SanitizeFilename(query)); filename = filename.Replace("%searchtag%", Helper.SanitizeFilename(searchTags)); filename = filename.Replace("%originalFilename%", Helper.SanitizeFilename(originalFileName)); string dtFormat = Properties.Settings.Default.DateTimeFormat; if (String.IsNullOrWhiteSpace(dtFormat)) { dtFormat = "yyyy-mm-dd HH:mm:ss"; } filename = filename.Replace("%uploadDateTime%", Helper.SanitizeFilename(post.CreatedAtDateTime.ToString(dtFormat))); // copy the tags entity to be grouped. var groupedTags = post.TagsEntity; // custom sort to prioritize some tags based on file definition // Issue #46 and #81 // regex support if (File.Exists(PRIORITY_TAGS)) { var priorityTags = ReadTagsFromTextFile(PRIORITY_TAGS); groupedTags.Sort((a, b) => { var containA = priorityTags.Exists(x => Regex.IsMatch(a.Name, x.Name)); var containB = priorityTags.Exists(x => Regex.IsMatch(b.Name, x.Name)); if (containA && !containB) { return(-1); } if (!containA && containB) { return(1); } else { return(a.CompareTo(b)); } }); } else { groupedTags.Sort(); } // remove ignored tags groupedTags = RemoveIgnoredTags(format, groupedTags); // artist var artist = FilterTags(post, groupedTags, DanbooruTagType.Artist, format.ArtistGroupLimit, format.ArtistGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var artistStr = Helper.SanitizeFilename(artist).Trim(); filename = filename.Replace("%artist%", artistStr); // copyright var copyright = FilterTags(post, groupedTags, DanbooruTagType.Copyright, format.CopyrightGroupLimit, format.CopyrightGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var copyStr = Helper.SanitizeFilename(copyright.Trim()); filename = filename.Replace("%copyright%", copyStr); // character var character = FilterTags(post, groupedTags, DanbooruTagType.Character, format.CharacterGroupLimit, format.CharacterGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var charaStr = Helper.SanitizeFilename(character.Trim()); filename = filename.Replace("%character%", charaStr); // cirle var circle = FilterTags(post, groupedTags, DanbooruTagType.Circle, format.CircleGroupLimit, format.CircleGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var circleStr = Helper.SanitizeFilename(circle.Trim()); filename = filename.Replace("%circle%", circleStr); // faults var faults = FilterTags(post, groupedTags, DanbooruTagType.Faults, format.FaultsGroupLimit, format.FaultsGroupReplacement, format.MissingTagReplacement, format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var faultStr = Helper.SanitizeFilename(faults.Trim()); filename = filename.Replace("%faults%", faultStr); // general var general = FilterTags(post, groupedTags, DanbooruTagType.General, 0, "", "", format.IsReplaceMode, format.TagReplaceUnderscoreToSpace); var generalStr = Helper.SanitizeFilename(general.Trim()); filename = filename.Replace("%general%", generalStr); // all tags var allTempTags = groupedTags.Select(x => x.Name).ToList(); if (format.TagReplaceUnderscoreToSpace) { for (int i = 0; i < allTempTags.Count; i++) { allTempTags[i] = allTempTags[i].Replace("_", " ").Trim(); } } filename = filename.Replace("%tags%", Helper.SanitizeFilename(string.Join(" ", allTempTags))); // append base folder from Save Folder text box if (format.BaseFolder.EndsWith(@"\")) { filename = format.BaseFolder + filename; } else if (!String.IsNullOrWhiteSpace(format.BaseFolder)) { filename = format.BaseFolder + @"\" + filename; } filename = filename.Substring(0, filename.Length < format.Limit ? filename.Length : format.Limit).Trim(); return(filename); }
public static DanbooruPost ParsePost(DanbooruPost post, string postHtml) { HtmlDocument doc = new HtmlDocument(); if (String.IsNullOrEmpty(postHtml)) { throw new Exception("No post html!"); } doc.LoadHtml(postHtml); string file_url = ""; string sample_url = ""; // Flash Game or bmp // TODO: need to change the preview url if (post.PreviewUrl == "http://chan.sankakucomplex.com/download-preview.png") { var links = doc.DocumentNode.SelectNodes("//a"); foreach (var link in links) { // flash if (link.InnerText == "Save this flash (right click and save)") { file_url = link.GetAttributeValue("href", ""); break; } // bmp if (link.InnerText == "Download") { file_url = link.GetAttributeValue("href", ""); break; } } } else { var image = doc.DocumentNode.SelectSingleNode("//img[@id='image']"); if (image != null) { sample_url = image.GetAttributeValue("src", ""); } var links = doc.DocumentNode.SelectNodes("//a"); foreach (var link in links) { if (link.InnerText == "Original image") { file_url = link.GetAttributeValue("href", ""); break; } } } post.FileUrl = file_url; if (!string.IsNullOrWhiteSpace(file_url) && string.IsNullOrWhiteSpace(sample_url)) { sample_url = file_url; } post.SampleUrl = sample_url; // parse datetime // TODO: untested var statsLIs = doc.DocumentNode.SelectNodes("//div[@id='stats']//li"); foreach (var item in statsLIs) { if (item.InnerHtml.StartsWith("Posted: ")) { post.CreatedAt = System.Text.RegularExpressions.Regex.Match(item.InnerHtml, "Posted: (.*)<").Value; post.CreatedAtDateTime = DanbooruPostDao.ParseDateTime(post.CreatedAt, post.Provider); break; } } return(post); }
private void ProcessXML(string rawData) { RawData = rawData; using (StringReader strReader = new StringReader(rawData)) { using (XmlTextReader reader = new XmlTextReader(strReader)) { if (Option.Provider.BoardType == BoardType.Shimmie2) { posts = Engine.ShimmieEngine.ParseRSS(rawData, Option); } else { while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: // The node is an element. var nodeName = reader.Name.ToLowerInvariant(); if (nodeName.Equals("posts")) { while (reader.MoveToNextAttribute()) { if (reader.Name.ToLowerInvariant().Equals("count")) // Posts Count { postCount = int.Parse(reader.Value); } else if (reader.Name.ToLowerInvariant().Equals("offset")) // Post Offset { offset = int.Parse(reader.Value); } } } else if (nodeName.Equals("response")) { Success = true; while (reader.MoveToNextAttribute()) { if (reader.Name.ToLowerInvariant().Equals("reason")) // Posts Count { ResponseMessage = reader.Value; } else if (reader.Name.ToLowerInvariant().Equals("success")) // Posts Count { Success = bool.Parse(reader.Value); } } } else if (nodeName.Equals("post")) { DanbooruPost post = new DanbooruPost(); while (reader.MoveToNextAttribute()) { switch (reader.Name.ToLowerInvariant()) { case "id": post.Id = reader.Value; break; case "tags": post.Tags = reader.Value; post.TagsEntity = Helper.ParseTags(post.Tags, Option.Provider); break; case "source": post.Source = reader.Value; break; case "creator_id": post.CreatorId = reader.Value; break; case "file_url": post.FileUrl = AppendHttp(reader.Value); break; case "width": post.Width = -1; try { post.Width = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "height": post.Height = -1; try { post.Height = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "change": post.Change = reader.Value; break; case "score": post.Score = reader.Value; break; case "rating": post.Rating = reader.Value; break; case "status": post.Status = reader.Value; break; case "has_children": post.HasChildren = Boolean.Parse(reader.Value); break; case "created_at": post.CreatedAt = reader.Value; break; case "md5": post.MD5 = reader.Value; break; case "preview_url": post.PreviewUrl = AppendHttp(reader.Value); break; case "preview_width": post.PreviewWidth = -1; try { post.PreviewWidth = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "preview_height": post.PreviewHeight = -1; try { post.PreviewHeight = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "parent_id": post.ParentId = reader.Value; break; case "sample_url": post.SampleUrl = AppendHttp(reader.Value); break; case "sample_width": post.SampleWidth = -1; try { post.SampleWidth = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "sample_height": post.SampleHeight = -1; try { post.SampleHeight = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "jpeg_url": post.JpegUrl = AppendHttp(reader.Value); break; case "jpeg_width": post.JpegWidth = -1; try { post.JpegWidth = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; case "jpeg_height": post.JpegHeight = -1; try { post.JpegHeight = Int32.Parse(reader.Value); } catch (Exception) { if (FormMain.Debug) { throw; } } break; } } post.Hidden = Helper.CheckBlacklistedTag(post, Option); post.Provider = Option.Provider; post.Query = Option.Query; post.SearchTags = Option.SearchTags; if (Option.Provider.BoardType == BoardType.Danbooru || Option.Provider.BoardType == BoardType.Shimmie2) { post.Referer = Option.Provider.Url + @"/post/show/" + post.Id; } else if (Option.Provider.BoardType == BoardType.Gelbooru) { post.Referer = Option.Provider.Url + @"/index.php?page=post&s=view&id=" + post.Id; } posts.Add(post); actualCount++; } break; case XmlNodeType.EndElement: //Display the end of the element. //txtResult.AppendText("END"); break; } } } } } }
/// <summary> /// Reparse tags from post details. /// </summary> /// <param name="post"></param> /// <param name="doc"></param> private static void ReparseTags(DanbooruPost post, HtmlDocument doc) { post.TagsEntity.Clear(); var tags = doc.DocumentNode.SelectNodes("//ul[@id='tag-sidebar']/li"); foreach (var tag in tags) { var tagEntity = new DanbooruTag(); var cls = tag.Attributes["class"].Value; // Fix Issue #146 switch (cls) { case "tag-type-idol": // idol complex tagEntity.Type = DanbooruTagType.Artist; break; case "tag-type-artist": // sankaku tagEntity.Type = DanbooruTagType.Artist; break; case "tag-type-photo_set": // idol complex: usually album name tagEntity.Type = DanbooruTagType.Circle; break; case "tag-type-studio": // sankaku: circlename tagEntity.Type = DanbooruTagType.Circle; break; case "tag-type-meta": // both tagEntity.Type = DanbooruTagType.Faults; break; case "tag-type-medium": // both tagEntity.Type = DanbooruTagType.Faults; break; case "tag-type-general": // both tagEntity.Type = DanbooruTagType.General; break; case "tag-type-copyright": // both tagEntity.Type = DanbooruTagType.Copyright; break; case "tag-type-character": // both tagEntity.Type = DanbooruTagType.Character; break; default: tagEntity.Type = DanbooruTagType.Unknown; break; } tagEntity.Name = Helper.DecodeEncodedNonAsciiCharacters(tag.SelectSingleNode("//ul[@id='tag-sidebar']/li/a").InnerText); var countStr = tag.SelectSingleNode("//ul[@id='tag-sidebar']/li//span[@class='post-count']").InnerText.Trim(); tagEntity.Count = Int32.Parse(countStr); post.TagsEntity.Add(tagEntity); tag.Remove(); } post.TagsEntity = post.TagsEntity.OrderByDescending(x => x.Type).ThenBy(x => x.Name).ToList(); }
public static DanbooruPost ParsePost(DanbooruPost post, string postHtml) { try { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(postHtml); string file_url = ""; string sample_url = ""; // Flash Game or bmp if (post.PreviewUrl.EndsWith("download-preview.png")) { var links = doc.DocumentNode.SelectNodes("//a"); foreach (var link in links) { // flash if (link.InnerText == "Save this file (right click and save as)") { file_url = Helper.FixUrl(link.GetAttributeValue("href", ""), isHttps(post.Provider)); // http://cs.sankakucomplex.com/data/f6/23/f623ea7559ef39d96ebb0ca7530586b8.swf post.MD5 = file_url.Substring(file_url.LastIndexOf("/") + 1); post.MD5 = post.MD5.Substring(0, 32); break; } // bmp if (link.InnerText == "Download") { file_url = Helper.FixUrl(link.GetAttributeValue("href", ""), isHttps(post.Provider)); break; } } } else { var lowresElement = doc.DocumentNode.SelectSingleNode("//a[@id='lowres']"); if (lowresElement != null) { sample_url = Helper.FixUrl(lowresElement.GetAttributeValue("href", ""), isHttps(post.Provider)); } var highresElement = doc.DocumentNode.SelectSingleNode("//a[@id='highres']"); if (highresElement != null) { file_url = Helper.FixUrl(highresElement.GetAttributeValue("href", ""), isHttps(post.Provider)); } } post.FileUrl = file_url; if (!string.IsNullOrWhiteSpace(file_url) && string.IsNullOrWhiteSpace(sample_url)) { sample_url = file_url; } post.SampleUrl = sample_url; return(post); } catch (Exception ex) { string filename = "Dump for Post " + post.Id + post.Provider.Name + " Query " + post.Query + ".txt"; bool result = Helper.DumpRawData(postHtml, filename); if (!result) { Program.Logger.Error("Failed to dump rawdata to: " + filename, ex); } throw; } }
/// <summary> /// Parse the post details after added to the download list or from batch job. /// </summary> /// <param name="post"></param> /// <param name="postHtml"></param> /// <param name="overideTagParsing"></param> /// <returns></returns> public static DanbooruPost ParsePost(DanbooruPost post, string postHtml, bool overideTagParsing) { try { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(postHtml); string file_url = ""; string sample_url = ""; // reparse tags with type if (overideTagParsing) { ReparseTags(post, doc); } // Flash Game or bmp if (post.PreviewUrl.EndsWith("download-preview.png")) { var links = doc.DocumentNode.SelectNodes("//a"); foreach (var link in links) { // flash if (link.InnerText == "Save this file (right click and save as)") { file_url = Helper.FixUrl(link.GetAttributeValue("href", ""), isHttps(post.Provider)); // http://cs.sankakucomplex.com/data/f6/23/f623ea7559ef39d96ebb0ca7530586b8.swf post.MD5 = file_url.Substring(file_url.LastIndexOf("/") + 1); post.MD5 = post.MD5.Substring(0, 32); break; } // bmp if (link.InnerText == "Download") { file_url = Helper.FixUrl(link.GetAttributeValue("href", ""), isHttps(post.Provider)); break; } } } else { var lowresElement = doc.DocumentNode.SelectSingleNode("//a[@id='lowres']"); if (lowresElement != null) { sample_url = Helper.FixUrl(lowresElement.GetAttributeValue("href", ""), isHttps(post.Provider)); } var highresElement = doc.DocumentNode.SelectSingleNode("//a[@id='highres']"); if (highresElement != null) { file_url = Helper.FixUrl(highresElement.GetAttributeValue("href", ""), isHttps(post.Provider)); } } post.FileUrl = file_url; if (!string.IsNullOrWhiteSpace(file_url) && string.IsNullOrWhiteSpace(sample_url)) { sample_url = file_url; } post.SampleUrl = sample_url; // Created datetime post.CreatedAt = "N/A"; post.CreatedAtDateTime = DateTime.MinValue; try { var lis = doc.DocumentNode.SelectNodes("//div[@id='post-view']//div[@id='stats']//li"); foreach (var item in lis) { if (item.InnerText.Contains("Posted:")) { var links = item.SelectNodes("//div[@id='post-view']//div[@id='stats']//li//a"); foreach (var link in links) { if (link.Attributes.Contains("href") && link.Attributes["href"].Value.Contains("?tags=date")) { post.CreatedAt = link.Attributes["title"].Value; post.CreatedAtDateTime = DanbooruPostDao.ParseDateTime(post.CreatedAt, post.Provider); break; } } break; } } } catch (Exception ex) { Program.Logger.Error("Unable to parse date", ex); } return(post); } catch (Exception ex) { string filename = "Dump for Post " + post.Id + post.Provider.Name + " Query " + post.Query + ".txt"; bool result = Helper.DumpRawData(postHtml, filename); if (!result) { Program.Logger.Error("Failed to dump rawdata to: " + filename, ex); } throw; } }
public BindingList <DanbooruPost> Parse(string data, DanbooruSearchParam searchParam) { try { this.SearchParam = searchParam; this.RawData = data; BindingList <DanbooruPost> posts = new BindingList <DanbooruPost>(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(data); // remove popular preview var popular = doc.DocumentNode.SelectSingleNode("//div[@id='popular-preview']"); if (popular != null) { popular.Remove(); } // get all thumbs var thumbs = doc.DocumentNode.SelectNodes("//span"); if (thumbs != null && thumbs.Count > 0) { foreach (var thumb in thumbs) { if (thumb.GetAttributeValue("class", "").Contains("thumb")) { DanbooruPost post = new DanbooruPost(); post.Id = thumb.GetAttributeValue("id", "-1").Substring(1); post.Provider = searchParam.Provider; post.SearchTags = searchParam.Tag; post.Query = GenerateQueryString(searchParam); int i = 0; // get the image link for (; i < thumb.ChildNodes.Count; ++i) { if (thumb.ChildNodes[i].Name == "a") { break; } } var a = thumb.ChildNodes[i]; post.Referer = Helper.FixUrl(searchParam.Provider.Url + a.GetAttributeValue("href", ""), isHttps(post.Provider)); var img = a.ChildNodes[i]; var title = img.GetAttributeValue("title", ""); post.Tags = title.Substring(0, title.LastIndexOf("Rating:")).Trim(); post.Tags = Helper.DecodeEncodedNonAsciiCharacters(post.Tags); post.TagsEntity = Helper.ParseTags(post.Tags, SearchParam.Provider); post.Hidden = Helper.CheckBlacklistedTag(post, searchParam.Option); var status = img.GetAttributeValue("class", "").Replace("preview", "").Trim(); if (status.Contains("deleted")) { post.Status = "deleted"; } else if (status.Contains("pending")) { post.Status = "pending"; } else { post.Status = status; } post.PreviewUrl = Helper.FixUrl(img.GetAttributeValue("src", ""), isHttps(post.Provider)); post.PreviewHeight = img.GetAttributeValue("height", 0); post.PreviewWidth = img.GetAttributeValue("width", 0); // Rating:Explicit Score:4.5 Size:1080x1800 User:System post.Source = ""; post.Score = title.Substring(title.LastIndexOf("Score:") + 6); post.Score = post.Score.Substring(0, post.Score.IndexOf(" ")).Trim(); string resolution = title.Substring(title.LastIndexOf("Size:") + 5); resolution = resolution.Substring(0, resolution.IndexOf(" ")).Trim(); string[] resArr = resolution.Split('x'); post.Width = Int32.Parse(resArr[0]); post.Height = Int32.Parse(resArr[1]); string rating = title.Substring(title.LastIndexOf("Rating:") + 7, 1); //rating = rating.Substring(0, rating.IndexOf(" ")).Trim(); post.Rating = rating.ToLower(); post.CreatorId = title.Substring(title.LastIndexOf("User:"******"/") + 1); post.MD5 = post.MD5.Substring(0, post.MD5.LastIndexOf(".")); posts.Add(post); } } } TotalPost = posts.Count; if (!SearchParam.Page.HasValue) { SearchParam.Page = 1; } Offset = TotalPost * SearchParam.Page; return(posts); } catch (Exception ex) { var filename = Helper.SanitizeFilename("Dump for Sankaku Image List - " + searchParam.Tag + " - page " + searchParam.Page + ".txt"); var result = Helper.DumpRawData(data, filename); if (!result) { Program.Logger.Error("Failed to dump rawdata to: " + filename, ex); } throw; } }