private static List <Person> GetWriters(HtmlDocument doc) { var writers = new List <Person>(); var divs = doc.DocumentNode.SelectNodes(".//div[@class='txt-block']"); var writerss = divs.Where(x => x.InnerText.Contains("Writers:")).FirstOrDefault() ?? divs.Where(x => x.InnerText.Contains("Writer:")).FirstOrDefault(); HtmlNodeCollection wnames = null; if (writerss != null) { wnames = writerss.SelectNodes(".//a"); } if (wnames != null) { foreach (var wname in wnames) { var matches = Regex.Matches(wname.OuterHtml, @"/nm\d\d\d\d\d\d\d/"); if (matches.Count > 0) { writers.Add(new Person { IMDBID = matches[0].Value.Replace("/", "").Replace("nm", ""), Name = HtmlEscapeCharConverter.Decode(wname.InnerText.Replace("\n", "")) }); } } } return(writers); }
private void LoadGoof(object sender, DoWorkEventArgs e) { var worker = sender as BackgroundWorker; var cc = new HtmlEscapeCharConverter(); var hw = new HtmlWeb(); var doc = hw.Load(_url + "/goofs"); var f = doc.DocumentNode.InnerHtml.Contains("class=\"trivia\""); if (f) { var goofs = doc.DocumentNode.SelectNodes(".//ul [@class='trivia']"); if (goofs != null) { IList <String> goo = new List <String>(); foreach (var htmlNode in goofs) { var g = htmlNode.SelectNodes(".//li").ToList(); foreach (var node in g) { goo.Add(node.InnerText); } } var goof = HtmlEscapeCharConverter.Decode(goo[randomNum(0, goo.Count - 1)].Trim()); worker.ReportProgress(100, goof); } } }
private static String GetTitle(HtmlDocument doc) { var titleNode = doc.DocumentNode.SelectNodes(".//h1[@class='header']").FirstOrDefault(); string title = null; if (titleNode != null) { title = titleNode.InnerText.Trim(); title = HtmlEscapeCharConverter.Decode(title.Remove(title.IndexOf("("))); } return(title.Replace("\n", "")); }
private void LoadTrivia(object sender, DoWorkEventArgs e) { var worker = sender as BackgroundWorker; var hw = new HtmlWeb(); var doc = hw.Load(_url + "/trivia"); if (doc.DocumentNode.InnerHtml.Contains("class=\"sodatext\"")) { var trivi = doc.DocumentNode.SelectNodes(".//div[@class='sodatext']").ToList(); var trivis = HtmlEscapeCharConverter.Decode(trivi[randomNum(0, trivi.Count - 1)].InnerText).Trim().Replace( "Link this trivia", ""); worker.ReportProgress(100, trivis); } }
private void LoadBio(object sender, DoWorkEventArgs e) { var worker = sender as BackgroundWorker; var hw = new HtmlWeb(); var doc = hw.Load(_url); var born = doc.DocumentNode.SelectSingleNode(".//div[@class='txt-block']").InnerText.Trim(); worker.ReportProgress(30, HtmlEscapeCharConverter.Decode(born.Replace(" ", " ").Replace("\n", ""))); var name = doc.DocumentNode.SelectSingleNode(".//h1[@class='header']").InnerText.Trim(); worker.ReportProgress(60, HtmlEscapeCharConverter.Decode(name.Replace(" ", " ").Replace("\n", ""))); var bio = doc.DocumentNode.SelectNodes(".//p").First().InnerText.Trim(); worker.ReportProgress(90, HtmlEscapeCharConverter.Decode(bio)); }
private static List <Role> GetCast(HtmlDocument doc) { var cast = new List <Role>(); var tab = doc.DocumentNode.SelectNodes(".//table[@class='cast_list']//tr").DefaultIfEmpty(); if (tab != null) { foreach (var htmlNode in tab.Skip(1)) { if (htmlNode.InnerText.Contains("Rest of cast")) { break; } var name = htmlNode.SelectSingleNode(".//td[@class='name']").InnerText; var actnum = htmlNode.SelectSingleNode(".//td[@class='name']").InnerHtml; var actNum = actnum.Remove(0, actnum.IndexOf("nm") + 2); actNum = actNum.Remove(7); var character = htmlNode.SelectSingleNode(".//td[@class='character']").InnerText.Trim(); while (character.Contains(" ")) { character = character.Replace(" ", " "); } cast.Add(new Role { Character = HtmlEscapeCharConverter.Decode(Regex.Replace(character, @"(\{.*\}|\(.*\)|\[.*\])", "") .Replace("\n", "").Trim()), Person = new Person { IMDBID = actNum, Name = HtmlEscapeCharConverter.Decode(name.Trim().Replace("\n", "")) } }); } } return(cast); }
private static List <FilmType> GetGenres(HtmlDocument doc) { var inline = doc.DocumentNode.SelectNodes(".//div[@class='see-more inline canwrap']"); HtmlNodeCollection gen = null; if (inline != null) { foreach (var node in inline.Where(node => node.InnerText.Contains("Genres"))) { gen = node.SelectNodes(".//a"); } } var genres = new List <FilmType>(); if (gen != null) { genres = gen.Select(g => new FilmType { Type = HtmlEscapeCharConverter.Decode(g.InnerText.Trim().Replace("\n", "")) }).ToList(); } return(genres); }
private static String GetStoryLine(HtmlDocument doc) { var u = doc.DocumentNode.SelectNodes(".//div[@class='article']").Where( x => x.SelectNodes(".//h2") != null && x.SelectNodes(".//h2").First().InnerText == "Storyline"). FirstOrDefault(); HtmlNode storyNode = null; String story = null; if (u.InnerHtml.Contains("<p>")) { storyNode = u.SelectNodes(".//p").FirstOrDefault(); } if (storyNode != null) { story = storyNode.InnerText; story = HtmlEscapeCharConverter.Decode(story); } if (story != null) { return((story.Contains("Written by ")) ? story.Remove(story.LastIndexOf("Written by ")).Replace("\n", "").Trim() : story.Replace("\n", "").Trim()); } return(""); }
public static IList <ImdbResult> SearchIMDBBySource(String source) { IList <ImdbResult> results = new List <ImdbResult>(); results.Clear(); string s; if (source.Contains("Popular Titles")) { s = source.Remove(0, source.IndexOf("<b>Popular Titles")); s = s.Remove(s.IndexOf("<p><b>")); while (s.Contains("</a> ")) { s = s.Remove(0, s.IndexOf("<img src=\"") + 10); var p = s.Substring(0, s.IndexOf(("\""))); s = s.Remove(0, s.IndexOf("href=\"/title/tt") + 15); var u = s.Substring(0, s.IndexOf("\"")); s = s.Remove(0, s.IndexOf(";\">") + 3); var t = s.Substring(0, s.IndexOf(("</td>"))); var y = t.Remove(t.IndexOf(")") + 1); y = y.Remove(0, y.IndexOf("(")); t = t.Remove(t.IndexOf("</a>")); s = s.Remove(0, s.IndexOf("</td>") + 5); results.Add(new ImdbResult(HtmlEscapeCharConverter.Decode(t.Trim()), u.Trim(), HtmlEscapeCharConverter.Decode(y.Trim()), p.Trim())); } } if (source.Contains("Titles (Exact Matches)")) { s = source.Remove(0, source.IndexOf("<b>Titles (Exact Matches)")); s = s.Remove(s.IndexOf("<p><b>")); while (s.Contains("</a> ")) { s = s.Remove(0, s.IndexOf("<img src=\"") + 10); var p = s.Substring(0, s.IndexOf(("\""))); s = s.Remove(0, s.IndexOf("href=\"/title/tt") + 15); var u = s.Substring(0, s.IndexOf("\"")); s = s.Remove(0, s.IndexOf(";\">") + 3); var t = s.Substring(0, s.IndexOf(("</td>"))); var y = t.Remove(t.IndexOf(")") + 1); y = y.Remove(0, y.IndexOf("(")); t = t.Remove(t.IndexOf("</a>")); s = s.Remove(0, s.IndexOf("</td>") + 5); results.Add(new ImdbResult(HtmlEscapeCharConverter.Decode(t.Trim()), u.Trim(), HtmlEscapeCharConverter.Decode(y.Trim()), p.Trim())); } } if (source.Contains("<b>Titles (Partial Matches)")) { s = source.Remove(0, source.IndexOf("<b>Titles (Partial Matches)")); s = s.Remove(s.IndexOf("<p><b>")); while (s.Contains("</a> ")) { s = s.Remove(0, s.IndexOf("<img src=\"") + 10); var p = s.Substring(0, s.IndexOf(("\""))); s = s.Remove(0, s.IndexOf("href=\"/title/tt") + 15); var u = s.Substring(0, s.IndexOf("\"")); s = s.Remove(0, s.IndexOf(";\">") + 3); var t = s.Substring(0, s.IndexOf(("</td>"))); var y = t.Remove(t.IndexOf(")") + 1); y = y.Remove(0, y.IndexOf("(")); t = t.Remove(t.IndexOf("</a>")); s = s.Remove(0, s.IndexOf("</td>") + 5); results.Add(new ImdbResult(HtmlEscapeCharConverter.Decode(t.Trim()), u.Trim(), HtmlEscapeCharConverter.Decode(y.Trim()), p.Trim())); } } if (source.Contains("<b>Titles (Approx Matches)")) { s = source.Remove(0, source.IndexOf("<b>Titles (Approx Matches)")); s = s.Remove(s.IndexOf("<p><b>")); while (s.Contains("</a> ")) { s = s.Remove(0, s.IndexOf("<img src=\"") + 10); var p = s.Substring(0, s.IndexOf(("\""))); s = s.Remove(0, s.IndexOf("href=\"/title/tt") + 15); var u = s.Substring(0, s.IndexOf("\"")); s = s.Remove(0, s.IndexOf(";\">") + 3); var t = s.Substring(0, s.IndexOf(("</td>"))); var y = t.Remove(t.IndexOf(")") + 1); y = y.Remove(0, y.IndexOf("(")); t = t.Remove(t.IndexOf("</a>")); s = s.Remove(0, s.IndexOf("</td>") + 5); results.Add(new ImdbResult(HtmlEscapeCharConverter.Decode(t.Trim()), u.Trim(), HtmlEscapeCharConverter.Decode(y.Trim()), p.Trim())); } } return(results); }