示例#1
0
        private Task1ResponseViewModel ScrapUrl(string url)
        {
            Task1ResponseViewModel response = new Task1ResponseViewModel();

            try
            {
                HtmlWeb      htmlWeb = new HtmlWeb();
                HtmlDocument htmlDoc = htmlWeb.Load(url);

                HtmlNode htmlTitle = htmlDoc.DocumentNode.CssSelect("div[class='hp__hotel-title']").First();

                // Obtaining name;
                foreach (var node in htmlDoc.DocumentNode.CssSelect("h2[id='hp_hotel_name']"))
                {
                    node.RemoveChild(node.SelectSingleNode("span"));
                    response.Name = RemoveBreakLine(node.InnerText);
                }

                // Obtaining Clasification/Stars
                foreach (var node in htmlDoc.DocumentNode.CssSelect("div[class='hp__hotel-title']"))
                {
                    foreach (var child in node.CssSelect("span[class='bui-icon bui-rating__item bui-icon--medium']"))
                    {
                        response.Clasification += 1;
                    }
                }

                // Obtaining Address
                foreach (var node in htmlDoc.DocumentNode.CssSelect("span[data-node_tt_id='location_score_tooltip']"))
                {
                    response.Address = RemoveBreakLine(node.InnerText);
                }

                // Obtaining Review
                foreach (var node in htmlDoc.DocumentNode.CssSelect("div[id='js--hp-gallery-scorecard']"))
                {
                    foreach (var child in node.CssSelect("div[class='bui-review-score__badge']"))
                    {
                        response.Review = decimal.Parse(RemoveBreakLine(child.FirstChild.InnerText), CultureInfo.InvariantCulture);
                    }
                }

                // Obtaining Description
                foreach (var node in htmlDoc.DocumentNode.CssSelect("div[id='summary']"))
                {
                    foreach (var child in node.CssSelect("p"))
                    {
                        response.Description += child.InnerText;
                    }
                }
                response.Description = RemoveBreakLine(response.Description + "");

                // Obtaining Rooms Category
                foreach (var node in htmlDoc.DocumentNode.CssSelect("table[id='maxotel_rooms']"))
                {
                    foreach (var child in node.CssSelect("a[class='jqrt togglelink']"))
                    {
                        response.RoomCategory.Add(RemoveBreakLine(child.InnerText));
                    }
                }

                // Obtaining Alternative Hotels

                response.Url     = url.Substring(0, url.IndexOf("?") > 0 ? url.IndexOf("?") : url.Length);
                response.Message = "Success!";
            }
            catch (Exception e)
            {
                response.Message = e.Message.ToString();
            }

            return(response);
        }
示例#2
0
        private Task1ResponseViewModel ScrapFile(IFormFile file)
        {
            Task1ResponseViewModel response = new Task1ResponseViewModel();

            try
            {
                HtmlDocument htmlDoc = new HtmlDocument();

                using var reader = new StreamReader(file.OpenReadStream());

                htmlDoc.LoadHtml(reader.ReadToEnd());

                // Obtaining name;
                foreach (var node in htmlDoc.DocumentNode.CssSelect("span[id='hp_hotel_name']"))
                {
                    response.Name = RemoveBreakLine(node.InnerText);
                }

                // Obtaining Clasification/Stars
                foreach (var node in htmlDoc.DocumentNode.CssSelect("span[class='bui-icon bui-rating__item bui-icon--medium']"))
                {
                    response.Clasification += 1;
                }

                // Obtaining Address
                foreach (var node in htmlDoc.DocumentNode.CssSelect("span[id='hp_address_subtitle']"))
                {
                    response.Address = RemoveBreakLine(node.InnerText);
                }

                // Obtaining Review
                foreach (var node in htmlDoc.DocumentNode.CssSelect("div[id='js--hp-gallery-scorecard']"))
                {
                    foreach (var child in node.CssSelect("span[class='average js--hp-scorecard-scoreval']"))
                    {
                        response.Review = decimal.Parse(RemoveBreakLine(child.FirstChild.InnerText), CultureInfo.InvariantCulture);
                    }
                }

                // Obtaining Description
                foreach (var node in htmlDoc.DocumentNode.CssSelect("div[class='hotel_description_wrapper_exp ']"))
                {
                    foreach (var child in node.CssSelect("p"))
                    {
                        response.Description += child.InnerText;
                    }
                }
                response.Description = RemoveBreakLine(response.Description + "");

                // Obtaining Rooms Category
                foreach (var node in htmlDoc.DocumentNode.CssSelect("table[id='maxotel_rooms']"))
                {
                    foreach (var child in node.CssSelect("td[class='ftd']"))
                    {
                        response.RoomCategory.Add(RemoveBreakLine(child.InnerText));
                    }
                }

                // Obtaining Alternative Hotels
                foreach (var node in htmlDoc.DocumentNode.CssSelect("table[id='althotelsTable']"))
                {
                    foreach (var child in node.CssSelect("td[class='althotelsCell tracked']"))
                    {
                        var alternative = new AlternativeHotel();
                        foreach (var name in child.CssSelect("a[class='althotel_link']"))
                        {
                            alternative.Url  = name.GetAttributeValue("href");
                            alternative.Url  = alternative.Url.Substring(0, alternative.Url.IndexOf("?") > 0 ? alternative.Url.IndexOf("?") : alternative.Url.Length);
                            alternative.Name = RemoveBreakLine(name.InnerText);
                        }
                        foreach (var description in child.CssSelect("span[class='hp_compset_description']"))
                        {
                            alternative.Description = RemoveBreakLine(description.InnerText);
                        }
                        foreach (var review in child.CssSelect("span[class='average js--hp-scorecard-scoreval']"))
                        {
                            alternative.Review = decimal.Parse(RemoveBreakLine(review.InnerText), CultureInfo.InvariantCulture);
                        }
                        response.Alternative.Add(alternative);
                    }
                }

                // Obtaining Alternative Url
                foreach (var node in htmlDoc.DocumentNode.CssSelect("input[class='share_center_url']"))
                {
                    response.Url = node.GetAttributeValue("content");
                }

                response.Message = "Success!";
            }
            catch (Exception e)
            {
                response.Message = e.Message.ToString();
            }

            return(response);
        }