private ImdbSearchResult[] ParseResults(HtmlDocument doc) { Regex RESULT_ID_MATCHER = new Regex(@"tt\d+", RegexOptions.Compiled); Regex RESULT_TYPE_MATCHER = new Regex(@"\(\D+\)", RegexOptions.Compiled); Regex RESULT_YEAR_MATCHER = new Regex(@"\(\d\d\d\d\)", RegexOptions.Compiled); HtmlNode[] findResults = doc.DocumentNode.GetElementsByClassName("findResult"); List <ImdbSearchResult> results = new List <ImdbSearchResult>(); foreach (HtmlNode node in findResults) { if (node.Name == "tr") { ImdbSearchResult r = new ImdbSearchResult(); //first TD is the result image //second TD is the data HtmlNode[] imgs = node.GetElementsByTagName("img"); HtmlNode[] result_text = node.GetElementsByClassName("result_text"); if (imgs.Length > 0) { r.ThumbImageUrl = imgs[0].GetAttributeValue("src", ""); } if (result_text.Length > 0) { HtmlNode result_text_node = result_text[0]; HtmlNode[] smalls = result_text_node.GetElementsByTagName("small"); if (smalls.Length > 0) { result_text_node.RemoveChild(smalls[0]); } HtmlNode[] links = result_text_node.GetElementsByTagName("a"); HtmlNode result_link = links[0]; r.Title = HttpUtility.HtmlDecode(result_link.InnerText); string link = result_link.GetAttributeValue("href", ""); r.ID = Convert.ToInt32((RESULT_ID_MATCHER.Match(link).Value.Replace("tt", ""))); string result_description = HttpUtility.HtmlDecode(result_text_node.InnerText); var year_matches = RESULT_YEAR_MATCHER.Matches(result_description); if (year_matches.Count > 0) { r.Year = Convert.ToInt32(year_matches[0].Value.Substring(1, 4)); } var types_matches = RESULT_TYPE_MATCHER.Matches(result_description); if (types_matches.Count > 0) { r.SetMetiaType(types_matches[types_matches.Count - 1].Value); } else { r.Type = MediaType.Unspecified; } } results.Add(r); } } return(results.ToArray()); }
private ImdbSearchResult[] ParseResults(HtmlDocument doc) { Regex RESULT_ID_MATCHER = new Regex(@"tt\d+", RegexOptions.Compiled); Regex RESULT_TYPE_MATCHER = new Regex(@"\(\D+\)", RegexOptions.Compiled); Regex RESULT_YEAR_MATCHER = new Regex(@"\(\d\d\d\d\)", RegexOptions.Compiled); HtmlNode[] findResults = doc.DocumentNode.GetElementsByClassName("findResult"); List<ImdbSearchResult> results = new List<ImdbSearchResult>(); foreach (HtmlNode node in findResults) { if (node.Name == "tr") { ImdbSearchResult r = new ImdbSearchResult(); //first TD is the result image //second TD is the data HtmlNode[] imgs = node.GetElementsByTagName("img"); HtmlNode[] result_text = node.GetElementsByClassName("result_text"); if (imgs.Length > 0) { r.ThumbImageUrl = imgs[0].GetAttributeValue("src", ""); } if (result_text.Length > 0) { HtmlNode result_text_node = result_text[0]; HtmlNode[] smalls = result_text_node.GetElementsByTagName("small"); if (smalls.Length > 0) { result_text_node.RemoveChild(smalls[0]); } HtmlNode[] links = result_text_node.GetElementsByTagName("a"); HtmlNode result_link = links[0]; r.Title = HttpUtility.HtmlDecode(result_link.InnerText); string link = result_link.GetAttributeValue("href", ""); r.ID = Convert.ToInt32((RESULT_ID_MATCHER.Match(link).Value.Replace("tt", ""))); string result_description = HttpUtility.HtmlDecode(result_text_node.InnerText); var year_matches = RESULT_YEAR_MATCHER.Matches(result_description); if (year_matches.Count > 0) { r.Year = Convert.ToInt32(year_matches[0].Value.Substring(1, 4)); } var types_matches = RESULT_TYPE_MATCHER.Matches(result_description); if (types_matches.Count > 0) { r.SetMetiaType(types_matches[types_matches.Count - 1].Value); } else { r.Type = MediaType.Unspecified; } } results.Add(r); } } return results.ToArray(); }