Пример #1
0
        private ImdbSearchResult[] ParseResults(HtmlDocument doc)
        {
            Regex RESULT_ID_MATCHER   = new Regex(@"tt\d+", RegexOptions.Compiled);
            Regex RESULT_TYPE_MATCHER = new Regex(@"\(\D+\)", RegexOptions.Compiled);
            Regex RESULT_YEAR_MATCHER = new Regex(@"\(\d\d\d\d\)", RegexOptions.Compiled);

            HtmlNode[] findResults = doc.DocumentNode.GetElementsByClassName("findResult");

            List <ImdbSearchResult> results = new List <ImdbSearchResult>();

            foreach (HtmlNode node in findResults)
            {
                if (node.Name == "tr")
                {
                    ImdbSearchResult r = new ImdbSearchResult();

                    //first TD is the result image
                    //second TD is the data
                    HtmlNode[] imgs        = node.GetElementsByTagName("img");
                    HtmlNode[] result_text = node.GetElementsByClassName("result_text");

                    if (imgs.Length > 0)
                    {
                        r.ThumbImageUrl = imgs[0].GetAttributeValue("src", "");
                    }

                    if (result_text.Length > 0)
                    {
                        HtmlNode result_text_node = result_text[0];

                        HtmlNode[] smalls = result_text_node.GetElementsByTagName("small");
                        if (smalls.Length > 0)
                        {
                            result_text_node.RemoveChild(smalls[0]);
                        }

                        HtmlNode[] links = result_text_node.GetElementsByTagName("a");

                        HtmlNode result_link = links[0];

                        r.Title = HttpUtility.HtmlDecode(result_link.InnerText);

                        string link = result_link.GetAttributeValue("href", "");

                        r.ID = Convert.ToInt32((RESULT_ID_MATCHER.Match(link).Value.Replace("tt", "")));

                        string result_description = HttpUtility.HtmlDecode(result_text_node.InnerText);

                        var year_matches = RESULT_YEAR_MATCHER.Matches(result_description);
                        if (year_matches.Count > 0)
                        {
                            r.Year = Convert.ToInt32(year_matches[0].Value.Substring(1, 4));
                        }

                        var types_matches = RESULT_TYPE_MATCHER.Matches(result_description);
                        if (types_matches.Count > 0)
                        {
                            r.SetMetiaType(types_matches[types_matches.Count - 1].Value);
                        }
                        else
                        {
                            r.Type = MediaType.Unspecified;
                        }
                    }

                    results.Add(r);
                }
            }

            return(results.ToArray());
        }
Пример #2
0
        private ImdbSearchResult[] ParseResults(HtmlDocument doc)
        {
            Regex RESULT_ID_MATCHER = new Regex(@"tt\d+", RegexOptions.Compiled);
            Regex RESULT_TYPE_MATCHER = new Regex(@"\(\D+\)", RegexOptions.Compiled);
            Regex RESULT_YEAR_MATCHER = new Regex(@"\(\d\d\d\d\)", RegexOptions.Compiled);

            HtmlNode[] findResults = doc.DocumentNode.GetElementsByClassName("findResult");

            List<ImdbSearchResult> results = new List<ImdbSearchResult>();

            foreach (HtmlNode node in findResults)
            {
                if (node.Name == "tr")
                {
                    ImdbSearchResult r = new ImdbSearchResult();

                    //first TD is the result image
                    //second TD is the data
                    HtmlNode[] imgs = node.GetElementsByTagName("img");
                    HtmlNode[] result_text = node.GetElementsByClassName("result_text");

                    if (imgs.Length > 0)
                    {
                        r.ThumbImageUrl = imgs[0].GetAttributeValue("src", "");
                    }

                    if (result_text.Length > 0)
                    {
                        HtmlNode result_text_node = result_text[0];

                        HtmlNode[] smalls = result_text_node.GetElementsByTagName("small");
                        if (smalls.Length > 0)
                        {
                            result_text_node.RemoveChild(smalls[0]);
                        }

                        HtmlNode[] links = result_text_node.GetElementsByTagName("a");

                        HtmlNode result_link = links[0];

                        r.Title = HttpUtility.HtmlDecode(result_link.InnerText);

                        string link = result_link.GetAttributeValue("href", "");

                        r.ID = Convert.ToInt32((RESULT_ID_MATCHER.Match(link).Value.Replace("tt", "")));

                        string result_description = HttpUtility.HtmlDecode(result_text_node.InnerText);

                        var year_matches = RESULT_YEAR_MATCHER.Matches(result_description);
                        if (year_matches.Count > 0)
                        {
                            r.Year = Convert.ToInt32(year_matches[0].Value.Substring(1, 4));
                        }

                        var types_matches = RESULT_TYPE_MATCHER.Matches(result_description);
                        if (types_matches.Count > 0)
                        {
                            r.SetMetiaType(types_matches[types_matches.Count - 1].Value);
                        }
                        else
                        {
                            r.Type = MediaType.Unspecified;
                        }
                    }

                    results.Add(r);
                }
            }

            return results.ToArray();
        }