public bool beginQuery() { Regex work_regex = new Regex(@"([\d]+)\sWorks(\sfound|)\sin"); Regex error_regex = new Regex("div[^\\>]*class=\"[\\w\\s]*errors"); int works = 0; // number of works: default 0 until a number can be found String url = ""; if (this.minimum < 0 && this.maximum > -1) { url = UrlGenerator.searchUrlMax(this.maximum, this.tag, this.custom); } else if (this.maximum < 0 && this.minimum > -1) { url = UrlGenerator.searchUrlMin(this.minimum, this.tag, this.custom); } else if (this.maximum > -1 && this.minimum > -1) { url = UrlGenerator.searchUrlMinMax(this.minimum, this.maximum, this.tag, this.custom); } else { url = UrlGenerator.searchUrl(this.tag, this.custom); } String raw = Scraper.scrape(url); // scrape search results page if (String.IsNullOrEmpty(raw) != true) { if (Int32.TryParse(work_regex.Match(raw).Groups[1].ToString().Trim(), out works) == false) { return(false); } // if the response is not empty, regex for number and attempt to parse as int // if successful: parsed number will be stored in var works, replacing default 0 // if unsuccessful, default 0 will remain } else { return(false); } // either way, return number of works // successful scraping will return number // unsuccessful scraping will return 0 this.results = works; return(true); }
public static Work beginQuery(int id) { String url = UrlGenerator.workUrl(id); String raw = Scraper.scrape(url); if (String.IsNullOrEmpty(raw) == true) { throw new System.ArgumentException("Work could not be found!", id.ToString()); } Work result = new Work(); Regex title_regex = new Regex("<h2 class=\"[^\"]*title[^\"]*\">(?'title'[^\\<]*)"); Regex author_regex = new Regex("<a[^>]*rel=\"author\"[^>]*>(?'author'[^<]*)"); Regex publish_regex = new Regex("<dd class=\"published\">(?'publish'\\d\\d\\d\\d-\\d\\d-\\d\\d)"); Regex update_regex = new Regex("<dd class=\"status\">(?'update'\\d\\d\\d\\d-\\d\\d-\\d\\d)"); Regex bookmarks_regex = new Regex("<dd class=\"bookmarks\"><a[^>]*>(?'bookmarks'\\d*)"); String title = ""; String author = ""; int chapters = 1; int words = 0; int comments = 0; int kudos = 0; int bookmarks = 0; int hits = 0; Match title_match = title_regex.Match(raw); Match author_match = author_regex.Match(raw); if (title_match.Groups["title"].Success == true) { title = title_match.Groups["title"].ToString().Trim(); } if (author_match.Groups["author"].Success == true) { author = author_match.Groups["author"].ToString().Trim(); } Match chapters_match = regexNumericField("chapters").Match(raw); Int32.TryParse(matchProperty(chapters_match, "chapters"), out chapters); Match words_match = regexNumericField("words").Match(raw); Int32.TryParse(matchProperty(words_match, "words"), out words); Match comments_match = regexNumericField("comments").Match(raw); Int32.TryParse(matchProperty(comments_match, "comments"), out comments); Match kudos_match = regexNumericField("kudos").Match(raw); Int32.TryParse(matchProperty(kudos_match, "kudos"), out kudos); Match bookmarks_match = bookmarks_regex.Match(raw); Int32.TryParse(matchProperty(bookmarks_match, "bookmarks"), out bookmarks); Match hits_match = regexNumericField("hits").Match(raw); Int32.TryParse(matchProperty(hits_match, "hits"), out hits); Match publish_match = publish_regex.Match(raw); DateTime published = new DateTime(); if (publish_match.Groups["publish"].Success == true && DateTime.TryParse(publish_match.Groups["publish"].ToString(), out published) == false) { // checking first if the capture group exists. if not, condition will fail anyway. // then running int32 try parse, if successful, value will be stored regardless of if condition (out published) // if group exists but int try parse fails, published will be assigned january 1st, 1970 instead: a common failsafe value published = new DateTime(1970, 1, 1); } result.published = published; Match update_match = update_regex.Match(raw); DateTime updated = new DateTime(); if (update_match.Groups["update"].Success == false || chapters == 1) { updated = published; } else if (update_match.Groups["update"].Success == true && DateTime.TryParse(update_match.Groups["update"].ToString(), out updated) == false) { // same procedure as above, except if no update date can be detected or there is only 1 chapter // the publish date will be used, since oneshots and fics with only 1 chapter yet don't have an update date published = new DateTime(1970, 1, 1); } result.updated = updated; result.title = title; result.author = author; result.chapters = chapters; result.words = words; result.comments = comments; result.kudos = kudos; result.bookmarks = bookmarks; result.hits = hits; return(result); }