/// <summary> /// The main function, in wich we call the api with the options that we got from the form. /// </summary> /// <param name="searchMode">If it's to search by title or by ID.</param> /// <param name="q">Word(s) that we search.</param> /// <param name="m">If it's to search movies or tv series.</param> /// <param name="nAct">Number of actors to parse in each title.</param> /// <param name="sS">Number of the first season we want to parse.</param> /// <param name="eS">Number of the last season we want to parse.</param> /// <param name="f">Fields that we want to parse in each title.</param> public void IMDbSearch(int searchMode, string q, int m, int nAct, int sS, int eS, bool[] f) { this.query = q; this.media = m; this.nActors = nAct; this.sSeas = sS; this.eSeas = eS; this.fields = f; string url = ""; int type = -1; if (searchMode == 0) { url += "http://www.imdb.com/find?s=all&q=" + query; } else if (searchMode == 1) { url += "http://www.imdb.com/title/" + query; } IMDB imdb = new IMDB(); imdb.parentErrorCaller = errorCaller; imdb.parentProgressCaller = progressCaller; bool success = imdb.getPage(url); if (success && searchMode == 0 && !error) { type = imdb.getPageType(media, query); } if (type == 0 && !error) { List <IMDbLink> links = imdb.parseTitleLinks(media); // Gets the relevant links from that page if (links.Count > 0) { processInfo(1, links); progressHandler(20); } else { errorHandler(new Exception("No results found!")); } } else if (!error) { progressHandler(20); IMDbTitle title = imdb.parseTitlePage(fields, media, nActors, sSeas, eSeas); processInfo(0, title); } }
/// <summary> /// Parses a page of a title /// </summary> /// <param name="url">Relative URL of the page to be parsed</param> public void IMDbParse(string url) { if (!error) { IMDB imdb = new IMDB(); imdb.parentErrorCaller = errorCaller; imdb.parentProgressCaller = progressCaller; bool ok = imdb.getPage("http://www.imdb.com" + url); if (ok && !error) { IMDbTitle title = imdb.parseTitlePage(fields, media, nActors, sSeas, eSeas); processInfo(0, title); } } }
/// <summary> /// Connects to the seasons's info about a serie and parses the episodes's infos. /// </summary> /// <param name="url">url of the page.</param> /// <param name="eSeas">Parse till this season.</param> /// <param name="title">The title object that holds the infos.</param> private void parseSeason(String url, int eSeas, IMDbTitle title) { if (getPage(url)) { StringBuilder strB = new StringBuilder(page); Regex reg; string pat; Match match; int season; if (eSeas != -1) { season = Int32.Parse(url.Substring(url.Length - 1)); } else //detect last available season { season = 1; eSeas = 0; pat = "name=\"season-"; reg = new Regex(pat); match = reg.Match(strB.ToString()); while (match.Success) { eSeas++; match = match.NextMatch(); } } List <IMDbSerieSeason> seasons = new List <IMDbSerieSeason>(); while (season <= eSeas) { pat = @"Season " + season + "\n</a></h3>(.*?)\n\n</div>\n"; reg = new Regex(pat, RegexOptions.Singleline); if (reg.Match(strB.ToString()).Success) { IMDbSerieSeason seas = new IMDbSerieSeason(); seas.Number = season; string epList = reg.Match(strB.ToString()).Groups[1].Value; string[] ep = epList.Split(Environment.NewLine.ToCharArray()); int epis = 1; List <IMDbSerieEpisode> episodes = new List <IMDbSerieEpisode>(); foreach (string line in ep) { if (line.Length > 10) { IMDbSerieEpisode episode = new IMDbSerieEpisode(); pat = @"<a href="".{17}"">(.*?)</a>.*?<strong>(.*?)</strong>.*?<br>\s*(.*?)<"; reg = new Regex(pat); match = reg.Match(line); episode.Number = epis; episode.Title = cleanText(match.Groups[1].Value); episode.AirDate = match.Groups[2].Value; string plot = match.Groups[3].Value; if (plot != null && plot != "") { episode.Plot = cleanText(match.Groups[3].Value); } episodes.Add(episode); epis++; } } seas.Episodes = episodes; seasons.Add(seas); } season++; } title.Seasons = seasons; } }
/// <summary> /// Parses an html page with one title's informations /// </summary> /// <param name="fields">The fields allowed to be parsed.</param> /// <param name="media">If it's to parse a movie or a TV Serie.</param> /// <param name="actorN">Number of actors to parse.</param> /// <param name="sSeas">Number of first season to parse.</param> /// <param name="eSeas">Number of last season to parse.</param> /// <returns>A list of Strings with the info from the title.</returns> public IMDbTitle parseTitlePage(bool[] fields, int media, int actorN, int sSeas, int eSeas) { try { sB = new StringBuilder(page); IMDbTitle titl = new IMDbTitle(); titl.Media = media; sB = new StringBuilder(page); string pat = @"<title>(.*?)\((\d{4}).*?\)</title>"; Regex reg = new Regex(pat); Match match = reg.Match(sB.ToString()); string title = match.Groups[1].Value; string year = match.Groups[2].Value; pat = @"<h1>(.*?)</h1>"; reg = new Regex(pat); string type = reg.Match(sB.ToString()).Groups[1].Value; bool parse = true; if ((media == 0 && type.Contains("TV series")) || (media == 1 && !type.Contains("TV series"))) { parse = false; } if (parse) { pat = @";id=(tt\d{7});"; reg = new Regex(pat); string link = "http://www.imdb.com/title/" + reg.Match(sB.ToString()).Groups[1].Value + "/"; titl.URL = link; titl.ID = reg.Match(sB.ToString()).Groups[1].Value; if (fields[0]) //Parse the titles's title { titl.Title = cleanText(title); } if (fields[1]) //Parse the titles's year { titl.Year = cleanText(year); } parentProgressCaller.DynamicInvoke(new object[] { 10 }); if (fields[2]) //Parse the titles's Cover link { if (!sB.ToString().Contains("http://ia.media-imdb.com/media/imdb/01/I/37/89/15/10.gif")) { pat = @"<a name=""poster"".*?src=""(.*?)"""; reg = new Regex(pat); string covLink = reg.Match(sB.ToString()).Groups[1].Value; titl.CoverURL = covLink; } } parentProgressCaller.DynamicInvoke(new object[] { 5 }); if (fields[3]) //Parse the titles's User Rating { pat = @"<b>([0-9/\.]+)*.?</b>"; reg = new Regex(pat); string rating = reg.Match(sB.ToString()).Groups[1].Value; titl.Rating = cleanText(rating); } parentProgressCaller.DynamicInvoke(new object[] { 5 }); if (fields[4]) //Parse the Creators/Directors { if (media == 0) //directors { List <IMDbDirCrea> directors = new List <IMDbDirCrea>(); pat = @"<h5>Director.*?\n(<a href=.*?</a>)<br/>\n{1,2}</div>"; reg = new Regex(pat, RegexOptions.Singleline); match = reg.Match(sB.ToString()); if (match.Success) { string temp = match.Groups[1].Value; pat = @"<a href=""(.{16})"">(.*?)</a>"; reg = new Regex(pat); MatchCollection matches = reg.Matches(temp); foreach (Match m in matches) { IMDbDirCrea director = new IMDbDirCrea(); director.Type = 0; director.Name = cleanText(m.Groups[2].Value); director.URL = "http://www.imdb.com" + m.Groups[1].Value; directors.Add(director); } titl.Directors = directors; } } else if (media == 1) //creators { List <IMDbDirCrea> creators = new List <IMDbDirCrea>(); pat = @"<h5>Creator.*?\n(<a href=.*?</a>)<br/>\n{1,2}<a class"; reg = new Regex(pat, RegexOptions.Singleline); match = reg.Match(sB.ToString()); if (match.Success) { string temp = match.Groups[1].Value; pat = @"<a href=""(.{16})"">(.*?)</a>"; reg = new Regex(pat, RegexOptions.Singleline); MatchCollection matches = reg.Matches(temp); foreach (Match m in matches) { IMDbDirCrea creator = new IMDbDirCrea(); creator.Type = 0; creator.Name = cleanText(m.Groups[2].Value); creator.URL = "http://www.imdb.com" + m.Groups[1].Value; creators.Add(creator); } titl.Directors = creators; } } } parentProgressCaller.DynamicInvoke(new object[] { 15 }); if (media == 1 && fields[5]) // Parse serie's seasons { pat = @"<h5>Seasons.*?(<a href=.*?)</a>\n{1,2}<a class"; reg = new Regex(pat, RegexOptions.Singleline); match = reg.Match(sB.ToString()); if (match.Success) { string startSeas = "episodes#season-"; if (sSeas == -1) { startSeas += "1"; } else { startSeas += sSeas; } string temp = match.Groups[1].Value; reg = new Regex(startSeas, RegexOptions.Singleline); match = reg.Match(temp); if (match.Success) { parseSeason(link + startSeas, eSeas, titl); } } } parentProgressCaller.DynamicInvoke(new object[] { 25 }); if (fields[6]) //Parse Genres { pat = @"<h5>Genre.*?\n(<a href=.*?)<a class="; reg = new Regex(pat); match = reg.Match(sB.ToString()); if (match.Success) { List <string> genres = new List <string>(); string temp = match.Groups[1].Value; pat = @""">(.*?)</a>"; reg = new Regex(pat); MatchCollection matches = reg.Matches(temp); foreach (Match m in matches) { genres.Add(cleanText(m.Groups[1].Value)); } titl.Genres = genres; } } parentProgressCaller.DynamicInvoke(new object[] { 5 }); if (fields[7]) //Parse the Tagline { pat = @"<h5>Tagline.*?\n(.*?)\n?<"; reg = new Regex(pat); titl.Tagline = cleanText(reg.Match(sB.ToString()).Groups[1].Value.Trim()); } if (fields[8]) //Parse the Plot { pat = @"<h5>Plot.*?\n(.*?)\n?<"; reg = new Regex(pat); titl.Plot = cleanText(reg.Match(sB.ToString()).Groups[1].Value.Trim()); } if (fields[9]) //Parse the Actors { pat = @"<h3>Cast.*?(<a href=.*?)<a class="; reg = new Regex(pat); match = reg.Match(sB.ToString()); if (match.Success) { List <IMDbActor> actors = new List <IMDbActor>(); string temp = match.Groups[1].Value; //pat = @"<a href="".*?<img src=""(.*?)"".*?<a href=""(.*?)"">(.*?)</a>.*?(href=""/character/.*?"">(.*?))?</a>"; pat = @"<a href="".*?<img src=""(.*?)"".*?<a href=""(.*?)"">(.*?)</a>.*?(<td class=""char"">(.*?))?</td></tr>"; reg = new Regex(pat); MatchCollection matches = reg.Matches(temp); int count = 0; foreach (Match m in matches) { if (actorN == -1 || (count < actorN)) { IMDbActor actor = new IMDbActor(); actor.Name = cleanText(m.Groups[3].Value); string caract = m.Groups[5].Value; if (caract != null && caract != "") { if (caract.Contains("<a href=")) { pat = @"href=""/character/.*?"">(.*?)</a>"; reg = new Regex(pat); caract = reg.Match(caract).Groups[1].Value; } } actor.Character = cleanText(caract); if (m.Groups[1].Value != "http://i.media-imdb.com/images/tn15/addtiny.gif") { actor.PhotoURL = m.Groups[1].Value; } actor.URL = "http://www.imdb.com" + m.Groups[2].Value; actors.Add(actor); count++; } } titl.Actors = actors; } } parentProgressCaller.DynamicInvoke(new object[] { 10 }); if (fields[10]) //Parse the Runtime { pat = @"<h5>Runtime.*?\n(\d+ min)"; reg = new Regex(pat); match = reg.Match(sB.ToString()); if (match.Success) { titl.Runtime = match.Groups[1].Value; } } parentProgressCaller.DynamicInvoke(new object[] { 5 }); } return(titl); } catch (Exception ex) { parentErrorCaller.DynamicInvoke(new object[] { ex }); } return(null); }
/// <summary> /// Connects to the seasons's info about a serie and parses the episodes's infos. /// </summary> /// <param name="url">url of the page.</param> /// <param name="eSeas">Parse till this season.</param> /// <param name="title">The title object that holds the infos.</param> private void parseSeason(String url, int eSeas, IMDbTitle title) { if (getPage(url)) { StringBuilder strB = new StringBuilder(page); Regex reg; string pat; Match match; int season; if (eSeas != -1) season = Int32.Parse(url.Substring(url.Length - 1)); else //detect last available season { season = 1; eSeas = 0; pat = "name=\"season-"; reg = new Regex(pat); match = reg.Match(strB.ToString()); while (match.Success) { eSeas++; match = match.NextMatch(); } } List<IMDbSerieSeason> seasons = new List<IMDbSerieSeason>(); while (season <= eSeas) { pat = @"Season "+season+"\n</a></h3>(.*?)\n\n</div>\n"; reg = new Regex(pat, RegexOptions.Singleline); if (reg.Match(strB.ToString()).Success) { IMDbSerieSeason seas = new IMDbSerieSeason(); seas.Number = season; string epList = reg.Match(strB.ToString()).Groups[1].Value; string[] ep = epList.Split(Environment.NewLine.ToCharArray()); int epis = 1; List<IMDbSerieEpisode> episodes = new List<IMDbSerieEpisode>(); foreach (string line in ep) { if (line.Length > 10) { IMDbSerieEpisode episode = new IMDbSerieEpisode(); pat = @"<a href="".{17}"">(.*?)</a>.*?<strong>(.*?)</strong>.*?<br>\s*(.*?)<"; reg = new Regex(pat); match = reg.Match(line); episode.Number = epis; episode.Title = cleanText(match.Groups[1].Value); episode.AirDate = match.Groups[2].Value; string plot = match.Groups[3].Value; if (plot != null && plot != "") { episode.Plot = cleanText(match.Groups[3].Value); } episodes.Add(episode); epis++; } } seas.Episodes = episodes; seasons.Add(seas); } season++; } title.Seasons = seasons; } }
/// <summary> /// Parses an html page with one title's informations /// </summary> /// <param name="fields">The fields allowed to be parsed.</param> /// <param name="media">If it's to parse a movie or a TV Serie.</param> /// <param name="actorN">Number of actors to parse.</param> /// <param name="sSeas">Number of first season to parse.</param> /// <param name="eSeas">Number of last season to parse.</param> /// <returns>A list of Strings with the info from the title.</returns> public IMDbTitle parseTitlePage(bool[] fields, int media, int actorN, int sSeas, int eSeas) { try { sB = new StringBuilder(page); IMDbTitle titl = new IMDbTitle(); titl.Media = media; sB = new StringBuilder(page); string pat = @"<title>(.*?)\((\d{4}).*?\)</title>"; Regex reg = new Regex(pat); Match match = reg.Match(sB.ToString()); string title = match.Groups[1].Value; string year = match.Groups[2].Value; pat = @"<h1>(.*?)</h1>"; reg = new Regex(pat); string type = reg.Match(sB.ToString()).Groups[1].Value; bool parse = true; if ((media == 0 && type.Contains("TV series")) || (media == 1 && !type.Contains("TV series"))) { parse = false; } if (parse) { pat = @";id=(tt\d{7});"; reg = new Regex(pat); string link = "http://www.imdb.com/title/" + reg.Match(sB.ToString()).Groups[1].Value + "/"; titl.URL = link; titl.ID = reg.Match(sB.ToString()).Groups[1].Value; if (fields[0]) //Parse the titles's title { titl.Title = cleanText(title); } if (fields[1]) //Parse the titles's year { titl.Year = cleanText(year); } parentProgressCaller.DynamicInvoke(new object[] { 10 }); if (fields[2]) //Parse the titles's Cover link { if (!sB.ToString().Contains("http://ia.media-imdb.com/media/imdb/01/I/37/89/15/10.gif")) { pat = @"<a name=""poster"".*?src=""(.*?)"""; reg = new Regex(pat); string covLink = reg.Match(sB.ToString()).Groups[1].Value; titl.CoverURL = covLink; } } parentProgressCaller.DynamicInvoke(new object[] { 5 }); if (fields[3]) //Parse the titles's User Rating { pat = @"<b>([0-9/\.]+)*.?</b>"; reg = new Regex(pat); string rating = reg.Match(sB.ToString()).Groups[1].Value; titl.Rating = cleanText(rating); } parentProgressCaller.DynamicInvoke(new object[] { 5 }); if (fields[4]) //Parse the Creators/Directors { if (media == 0) //directors { List<IMDbDirCrea> directors = new List<IMDbDirCrea>(); pat = @"<h5>Director.*?\n(<a href=.*?</a>)<br/>\n{1,2}</div>"; reg = new Regex(pat, RegexOptions.Singleline); match = reg.Match(sB.ToString()); if (match.Success) { string temp = match.Groups[1].Value; pat = @"<a href=""(.{16})"">(.*?)</a>"; reg = new Regex(pat); MatchCollection matches = reg.Matches(temp); foreach (Match m in matches) { IMDbDirCrea director = new IMDbDirCrea(); director.Type = 0; director.Name = cleanText(m.Groups[2].Value); director.URL = "http://www.imdb.com" + m.Groups[1].Value; directors.Add(director); } titl.Directors = directors; } } else if (media == 1) //creators { List<IMDbDirCrea> creators = new List<IMDbDirCrea>(); pat = @"<h5>Creator.*?\n(<a href=.*?</a>)<br/>\n{1,2}<a class"; reg = new Regex(pat, RegexOptions.Singleline); match = reg.Match(sB.ToString()); if (match.Success) { string temp = match.Groups[1].Value; pat = @"<a href=""(.{16})"">(.*?)</a>"; reg = new Regex(pat, RegexOptions.Singleline); MatchCollection matches = reg.Matches(temp); foreach (Match m in matches) { IMDbDirCrea creator = new IMDbDirCrea(); creator.Type = 0; creator.Name = cleanText(m.Groups[2].Value); creator.URL = "http://www.imdb.com" + m.Groups[1].Value; creators.Add(creator); } titl.Directors = creators; } } } parentProgressCaller.DynamicInvoke(new object[] { 15 }); if (media == 1 && fields[5]) // Parse serie's seasons { pat = @"<h5>Seasons.*?(<a href=.*?)</a>\n{1,2}<a class"; reg = new Regex(pat, RegexOptions.Singleline); match = reg.Match(sB.ToString()); if (match.Success) { string startSeas = "episodes#season-"; if (sSeas == -1) startSeas += "1"; else startSeas += sSeas; string temp = match.Groups[1].Value; reg = new Regex(startSeas, RegexOptions.Singleline); match = reg.Match(temp); if (match.Success) { parseSeason(link + startSeas, eSeas, titl); } } } parentProgressCaller.DynamicInvoke(new object[] { 25 }); if (fields[6]) //Parse Genres { pat = @"<h5>Genre.*?\n(<a href=.*?)<a class="; reg = new Regex(pat); match = reg.Match(sB.ToString()); if(match.Success) { List<string> genres = new List<string>(); string temp = match.Groups[1].Value; pat = @""">(.*?)</a>"; reg = new Regex(pat); MatchCollection matches = reg.Matches(temp); foreach (Match m in matches) { genres.Add(cleanText(m.Groups[1].Value)); } titl.Genres = genres; } } parentProgressCaller.DynamicInvoke(new object[] { 5 }); if (fields[7]) //Parse the Tagline { pat = @"<h5>Tagline.*?\n(.*?)\n?<"; reg = new Regex(pat); titl.Tagline = cleanText(reg.Match(sB.ToString()).Groups[1].Value.Trim()); } if (fields[8]) //Parse the Plot { pat = @"<h5>Plot.*?\n(.*?)\n?<"; reg = new Regex(pat); titl.Plot = cleanText(reg.Match(sB.ToString()).Groups[1].Value.Trim()); } if (fields[9]) //Parse the Actors { pat = @"<h3>Cast.*?(<a href=.*?)<a class="; reg = new Regex(pat); match = reg.Match(sB.ToString()); if (match.Success) { List<IMDbActor> actors = new List<IMDbActor>(); string temp = match.Groups[1].Value; //pat = @"<a href="".*?<img src=""(.*?)"".*?<a href=""(.*?)"">(.*?)</a>.*?(href=""/character/.*?"">(.*?))?</a>"; pat = @"<a href="".*?<img src=""(.*?)"".*?<a href=""(.*?)"">(.*?)</a>.*?(<td class=""char"">(.*?))?</td></tr>"; reg = new Regex(pat); MatchCollection matches = reg.Matches(temp); int count = 0; foreach (Match m in matches) { if (actorN == -1 || (count < actorN)) { IMDbActor actor = new IMDbActor(); actor.Name = cleanText(m.Groups[3].Value); string caract = m.Groups[5].Value; if (caract != null && caract != "") { if (caract.Contains("<a href=")) { pat = @"href=""/character/.*?"">(.*?)</a>"; reg = new Regex(pat); caract = reg.Match(caract).Groups[1].Value; } } actor.Character = cleanText(caract); if (m.Groups[1].Value != "http://i.media-imdb.com/images/tn15/addtiny.gif") { actor.PhotoURL = m.Groups[1].Value; } actor.URL = "http://www.imdb.com" + m.Groups[2].Value; actors.Add(actor); count++; } } titl.Actors = actors; } } parentProgressCaller.DynamicInvoke(new object[] { 10 }); if (fields[10]) //Parse the Runtime { pat = @"<h5>Runtime.*?\n(\d+ min)"; reg = new Regex(pat); match = reg.Match(sB.ToString()); if (match.Success) { titl.Runtime = match.Groups[1].Value; } } parentProgressCaller.DynamicInvoke(new object[] { 5 }); } return titl; } catch (Exception ex) { parentErrorCaller.DynamicInvoke(new object[] { ex }); } return null; }