/// <summary> /// Clears this instance. /// </summary> public void Clear() { this.Title = ScraperList.None; this.Year = ScraperList.None; this.OriginalTitle = ScraperList.None; this.Rating = ScraperList.None; this.Director = ScraperList.None; this.Tagline = ScraperList.None; this.Plot = ScraperList.None; this.Outline = ScraperList.None; this.Certification = ScraperList.None; this.Top250 = ScraperList.None; this.Mpaa = ScraperList.None; this.Country = ScraperList.None; this.Language = ScraperList.None; this.Genre = ScraperList.None; this.Runtime = ScraperList.None; this.Budget = ScraperList.None; this.Revenue = ScraperList.None; this.Studio = ScraperList.None; this.ReleaseDate = ScraperList.None; this.Homepage = ScraperList.None; this.Votes = ScraperList.None; this.Trailer = ScraperList.None; this.Cast = ScraperList.None; this.Writers = ScraperList.None; this.Poster = ScraperList.None; this.Fanart = ScraperList.None; }
public ScraperList GetAll() { System.Net.ServicePointManager.SecurityProtocol = System.Net.SecurityProtocolType.Tls12; ScraperList model = new ScraperList(); model.Items = new List <ScraperItem>(); string url = "https://www.indeed.com/jobs?q=full+stack+developer&l=austin%2C+tx&sort=date"; var htmlWeb = new HtmlWeb(); HtmlDocument document = null; document = htmlWeb.Load(url); var anchorTags = document.DocumentNode.Descendants("div") .Where(d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("result") && d.Attributes["class"].Value.Contains("row")); foreach (var node in anchorTags) { ScraperItem item = new ScraperItem(); item.PostTitle = node.InnerText; item.PostUrl = node.GetAttributeValue("href", null); item.PostUrl = node.InnerHtml; model.Items.Add(item); } htmlWeb = new HtmlWeb(); document = htmlWeb.Load(url + "&start=10"); anchorTags = document.DocumentNode.Descendants("div") .Where(d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("result") && d.Attributes["class"].Value.Contains("row")); foreach (var node in anchorTags) { ScraperItem item = new ScraperItem(); item.PostTitle = node.InnerText; item.PostUrl = node.GetAttributeValue("href", null); item.PostUrl = node.InnerHtml; model.Items.Add(item); } htmlWeb = new HtmlWeb(); document = htmlWeb.Load(url + "&start=20"); anchorTags = document.DocumentNode.Descendants("div") .Where(d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("result") && d.Attributes["class"].Value.Contains("row")); foreach (var node in anchorTags) { ScraperItem item = new ScraperItem(); item.PostTitle = node.InnerText; item.PostUrl = node.GetAttributeValue("href", null); item.PostUrl = node.InnerHtml; model.Items.Add(item); } return(model); }
private void PopulateGenresFromScraper(ScraperList scraper) { var scrapers = Scrapers.Movie.MovieScraperHandler.ReturnAllScrapers(); var imdbScraper = (from s in scrapers where s.ScraperName == scraper select s).Single(); this.CustomGenres = imdbScraper.DefaultGenres; }
/// <summary> /// Determines whether the specified scraper contains a specifc scraper. /// </summary> /// <param name="scraper">The scraper.</param> /// <returns> /// <c>true</c> if the specified scraper contains scraper; otherwise, <c>false</c>. /// </returns> public bool ContainsScraper(ScraperList scraper) { if (this.ContainsScraperText(scraper) && this.ContainsScraperImage(scraper)) { return(true); } return(false); }
/// <summary> /// Get genre list. /// </summary> /// <param name="scraperList"> /// The scraper list. /// </param> /// <returns> /// Scraper list genres /// </returns> public static List <string> GetGenreList(ScraperList scraperList = ScraperList.Imdb) { if (GenresCollection[scraperList] == null) { return(new List <string>()); } return(GenresCollection[scraperList]); }
/// <summary> /// Determines whether [contains scraper image] [the specified scraper]. /// </summary> /// <param name="scraper">The scraper.</param> /// <returns> /// <c>true</c> if [contains scraper image] [the specified scraper]; otherwise, <c>false</c>. /// </returns> public bool ContainsScraperImage(ScraperList scraper) { if (this.Poster == scraper) { return(true); } if (this.Fanart == scraper) { return(true); } return(false); }
/// <summary> /// Reads the genre from XML. /// </summary> /// <param name="scraperListType"> /// Type of the scraper list. /// </param> /// <returns> /// Genre collection /// </returns> public static List <string> ReadGenreFromXml(ScraperList scraperListType) { try { var doc = new XDocument(XDocument.Load("Xml/Genre/" + scraperListType + ".xml")); IEnumerable <string> q = from x in doc.Descendants("genre") select x.Value; return(q.ToList()); } catch { return(new List <string>()); } }
/// <summary> /// Searches Bing.com API /// </summary> /// <param name="query">The QueryString to search against</param> /// <param name="urlmatch">Only return URLs containing the following match</param> /// <param name="threadID">The thread MovieUniqueId.</param> /// <returns>First successful match.</returns> public static BindingList <QueryResult> SearchBing(string query, string urlmatch, int threadID, string regexTitle, string regexYear, string regexID, ScraperList scraperList) { var logCatagory = "Scrape > Bing Search > " + query; try { var queryResults = new BindingList <QueryResult>(); query = query.Replace("%20", " "); using (var service = new BingService()) { var searchRequest = new SearchRequest { Query = query, Sources = new[] { SourceType.Web }, AppId = "9A2F2F47CF77629DA4E35E912F4B696217DCFC3C" }; var webRequest = new WebRequest { Count = 10, Offset = 0, OffsetSpecified = true }; searchRequest.Web = webRequest; var response = service.Search(searchRequest); if (response.Web.Results != null) { foreach (var result in response.Web.Results) { if (string.IsNullOrEmpty(result.Url) || result.Url.Contains(urlmatch)) { var queryResult = new QueryResult(); if (Regex.IsMatch(result.Title, regexTitle)) { if (Regex.IsMatch(result.Url, regexID)) { switch (scraperList) { case ScraperList.Imdb: queryResult.ImdbID = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.TheMovieDB: queryResult.TmdbID = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.Allocine: queryResult.AllocineId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.FilmAffinity: queryResult.FilmAffinityId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.FilmDelta: queryResult.FilmDeltaId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.FilmUp: queryResult.FilmUpId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.FilmWeb: queryResult.FilmWebId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.Impawards: queryResult.ImpawardsId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.Kinopoisk: queryResult.KinopoiskId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.OFDB: queryResult.OfdbId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.MovieMeter: queryResult.MovieMeterId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.Sratim: queryResult.SratimId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; } } queryResult.Title = Regex.Match(result.Title, regexTitle).Groups["title"].Value; queryResult.Year = Regex.Match(result.Title, regexYear).Groups["year"].Value.ToInt(); } else { queryResult.Title = result.Title; } queryResult.AdditionalInfo = result.Description; queryResult.URL = result.Url; queryResults.Add(queryResult); } } } Log.WriteToLog(LogSeverity.Info, 0, string.Format("Bing search complete ({0} results)", queryResults.Count), query); return(queryResults); } } catch (Exception ex) { Log.WriteToLog(LogSeverity.Error, LoggerName.GeneralLog, logCatagory, ex.Message); return(null); } }
/// <summary> /// Get genre list. /// </summary> /// <param name="scraperList"> /// The scraper list. /// </param> /// <returns> /// Scraper list genres /// </returns> public static List<string> GetGenreList(ScraperList scraperList = ScraperList.Imdb) { if (GenresCollection[scraperList] == null) { return new List<string>(); } return GenresCollection[scraperList]; }
/// <summary> /// Searches Bing.com API /// </summary> /// <param name="query">The QueryString to search against</param> /// <param name="urlmatch">Only return URLs containing the following match</param> /// <param name="threadID">The thread MovieUniqueId.</param> /// <returns>First successful match.</returns> public static BindingList<QueryResult> SearchBing(string query, string urlmatch, int threadID, string regexTitle, string regexYear, string regexID, ScraperList scraperList) { var logCatagory = "Scrape > Bing Search > " + query; try { var queryResults = new BindingList<QueryResult>(); query = query.Replace("%20", " "); using (var service = new BingService()) { var searchRequest = new SearchRequest { Query = query, Sources = new[] { SourceType.Web }, AppId = "9A2F2F47CF77629DA4E35E912F4B696217DCFC3C" }; var webRequest = new WebRequest { Count = 10, Offset = 0, OffsetSpecified = true }; searchRequest.Web = webRequest; var response = service.Search(searchRequest); if (response.Web.Results != null) { foreach (var result in response.Web.Results) { if (string.IsNullOrEmpty(result.Url) || result.Url.Contains(urlmatch)) { var queryResult = new QueryResult(); if (Regex.IsMatch(result.Title, regexTitle)) { if (Regex.IsMatch(result.Url, regexID)) { switch (scraperList) { case ScraperList.Imdb: queryResult.ImdbID = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.TheMovieDB: queryResult.TmdbID = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.Allocine: queryResult.AllocineId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.FilmAffinity: queryResult.FilmAffinityId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.FilmDelta: queryResult.FilmDeltaId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.FilmUp: queryResult.FilmUpId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.FilmWeb: queryResult.FilmWebId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.Impawards: queryResult.ImpawardsId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.Kinopoisk: queryResult.KinopoiskId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.OFDB: queryResult.OfdbId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.MovieMeter: queryResult.MovieMeterId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; case ScraperList.Sratim: queryResult.SratimId = Regex.Match(result.Url, regexID).Groups["id"].Value; break; } } queryResult.Title = Regex.Match(result.Title, regexTitle).Groups["title"].Value; queryResult.Year = Regex.Match(result.Title, regexYear).Groups["year"].Value.ToInt(); } else { queryResult.Title = result.Title; } queryResult.AdditionalInfo = result.Description; queryResult.URL = result.Url; queryResults.Add(queryResult); } } } Log.WriteToLog(LogSeverity.Info, 0, string.Format("Bing search complete ({0} results)", queryResults.Count), query); return queryResults; } } catch (Exception ex) { Log.WriteToLog(LogSeverity.Error, LoggerName.GeneralLog, logCatagory, ex.Message); return null; } }
/// <summary> /// Clears this instance. /// </summary> public void Clear() { this.Title = ScraperList.None; this.Year = ScraperList.None; this.OrigionalTitle = ScraperList.None; this.Rating = ScraperList.None; this.Director = ScraperList.None; this.Tagline = ScraperList.None; this.Plot = ScraperList.None; this.Outline = ScraperList.None; this.Certification = ScraperList.None; this.Top250 = ScraperList.None; this.Mpaa = ScraperList.None; this.Country = ScraperList.None; this.Language = ScraperList.None; this.Genre = ScraperList.None; this.Runtime = ScraperList.None; this.Budget = ScraperList.None; this.Revenue = ScraperList.None; this.Studio = ScraperList.None; this.ReleaseDate = ScraperList.None; this.Homepage = ScraperList.None; this.Votes = ScraperList.None; this.Trailer = ScraperList.None; this.Cast = ScraperList.None; this.Writers = ScraperList.None; this.Poster = ScraperList.None; this.Fanart = ScraperList.None; }
/// <summary> /// Determines whether [contains scraper image] [the specified scraper]. /// </summary> /// <param name="scraper">The scraper.</param> /// <returns> /// <c>true</c> if [contains scraper image] [the specified scraper]; otherwise, <c>false</c>. /// </returns> public bool ContainsScraperImage(ScraperList scraper) { if (this.Poster == scraper) { return true; } if (this.Fanart == scraper) { return true; } return false; }
/// <summary> /// Determines whether [contains scraper text] [the specified scraper]. /// </summary> /// <param name="scraper">The scraper.</param> /// <returns> /// <c>true</c> if [contains scraper text] [the specified scraper]; otherwise, <c>false</c>. /// </returns> public bool ContainsScraperText(ScraperList scraper) { if (this.Title == scraper) { return true; } if (this.Year == scraper) { return true; } if (this.OrigionalTitle == scraper) { return true; } if (this.Rating == scraper) { return true; } if (this.Director == scraper) { return true; } if (this.Tagline == scraper) { return true; } if (this.Plot == scraper) { return true; } if (this.Outline == scraper) { return true; } if (this.Certification == scraper) { return true; } if (this.Top250 == scraper) { return true; } if (this.Mpaa == scraper) { return true; } if (this.Country == scraper) { return true; } if (this.Language == scraper) { return true; } if (this.Genre == scraper) { return true; } if (this.Runtime == scraper) { return true; } if (this.Budget == scraper) { return true; } if (this.ReleaseDate == scraper) { return true; } if (this.Revenue == scraper) { return true; } if (this.Studio == scraper) { return true; } if (this.Homepage == scraper) { return true; } if (this.Votes == scraper) { return true; } if (this.Trailer == scraper) { return true; } if (this.Cast == scraper) { return true; } if (this.Writers == scraper) { return true; } return false; }
/// <summary> /// Determines whether the specified scraper contains a specifc scraper. /// </summary> /// <param name="scraper">The scraper.</param> /// <returns> /// <c>true</c> if the specified scraper contains scraper; otherwise, <c>false</c>. /// </returns> public bool ContainsScraper(ScraperList scraper) { if (this.ContainsScraperText(scraper) && this.ContainsScraperImage(scraper)) { return true; } return false; }
/// <summary> /// Reads the genre from XML. /// </summary> /// <param name="scraperListType"> /// Type of the scraper list. /// </param> /// <returns> /// Genre collection /// </returns> public static List<string> ReadGenreFromXml(ScraperList scraperListType) { try { var doc = new XDocument(XDocument.Load("Xml/Genre/" + scraperListType + ".xml")); IEnumerable<string> q = from x in doc.Descendants("genre") select x.Value; return q.ToList(); } catch { return new List<string>(); } }
/// <summary> /// Determines whether [contains scraper text] [the specified scraper]. /// </summary> /// <param name="scraper">The scraper.</param> /// <returns> /// <c>true</c> if [contains scraper text] [the specified scraper]; otherwise, <c>false</c>. /// </returns> public bool ContainsScraperText(ScraperList scraper) { if (this.Title == scraper) { return(true); } if (this.Year == scraper) { return(true); } if (this.OriginalTitle == scraper) { return(true); } if (this.Rating == scraper) { return(true); } if (this.Director == scraper) { return(true); } if (this.Tagline == scraper) { return(true); } if (this.Plot == scraper) { return(true); } if (this.Outline == scraper) { return(true); } if (this.Certification == scraper) { return(true); } if (this.Top250 == scraper) { return(true); } if (this.Mpaa == scraper) { return(true); } if (this.Country == scraper) { return(true); } if (this.Language == scraper) { return(true); } if (this.Genre == scraper) { return(true); } if (this.Runtime == scraper) { return(true); } if (this.Budget == scraper) { return(true); } if (this.ReleaseDate == scraper) { return(true); } if (this.Revenue == scraper) { return(true); } if (this.Studio == scraper) { return(true); } if (this.Homepage == scraper) { return(true); } if (this.Votes == scraper) { return(true); } if (this.Trailer == scraper) { return(true); } if (this.Cast == scraper) { return(true); } if (this.Writers == scraper) { return(true); } return(false); }