// Posters //get the poster link public List <string> GetMoviePoster(HtmlNode body) { List <string> posters = new List <string>(); try { var container = helper.GetElementWithAttribute(body, "div", "class", "content-div-new"); var elements = helper.GetElementWithAttribute(container, "div", "class", "home-cinema-box-new"); var posterContainer = helper.GetElementWithAttribute(elements, "div", "class", "wallpaper-big-1"); var imageContainers = posterContainer.Elements("div"); foreach (var imageContainer in imageContainers) { if (imageContainer.Attributes["class"] != null && imageContainer.Attributes["class"].Value.Contains("wallpapers-box-300x180-2")) { var cont = helper.GetElementWithAttribute(imageContainer, "div", "class", "wallpapers-box-300x180-2-img"); var a = cont.Element("a"); var img = a.Element("img"); if (img.Attributes["src"] != null && !string.IsNullOrEmpty(img.Attributes["src"].Value)) { posters.Add(img.Attributes["src"].Value); } } } return(posters); } catch (Exception) { // TODO - Log error message } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { // Reviewer Name // Reviewer Rating // Review Text var headerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "ndmv-celeb-detail-bread ndmv-review-breadcrumb"); HtmlNode node = headerNode.SelectSingleNode("div"); var header = node == null ? string.Empty : node.InnerText; var reviewerName = helper.GetElementWithAttribute(node, "a", "class", "fn"); var reviewName = reviewerName == null ? string.Empty : reviewerName.InnerText; var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "row ndmv-celeb-detail-info ndmv-review-detail"); var contentNode = helper.GetElementWithAttribute(reviewContentNode, "div", "class", "col-md-16"); var textNode = contentNode.Elements("p"); var reviews = contentNode == null ? string.Empty : textNode.FirstOrDefault().InnerText.Replace("SPOILERS ALERT", string.Empty); var reviewerRating = string.Empty; var reviewRating = helper.GetElementWithAttribute(bodyNode, "div", "class", "ndmv-movie-rating"); if (reviewRating != null) { reviewerRating = PrepareRatingValue(reviewRating); } else { reviewerRating = string.Empty; } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = reviews.Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = reviewerRating; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "span", "class", "arttle"); HtmlNode head = headerNode.SelectSingleNode("h1"); var header = head == null ? string.Empty : head.InnerText; /* * var reviewerName = helper.GetElementWithAttribute(bodyNode, "span", "class", "grey1"); * var reviewName = reviewerName == null ? string.Empty : reviewerName.InnerText; */ var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "Normal"); var review = reviewContentNode == null ? string.Empty : reviewContentNode.InnerText; var reviewRatingNode = helper.GetElementWithAttribute(bodyNode, "div", "id", "sshow"); var ratingNode = helper.GetElementWithAttribute(reviewRatingNode, "td", "class", "flmcast"); var rates = helper.GetElementWithAttribute(ratingNode, "span", "id", "stp"); var rate = rates == null ? string.Empty : rates.InnerText; int found = rate.IndexOf(","); rate = rate.Substring(0, found); rate = rate.Substring(rate.Length - 3); if (!rate.Contains(".")) { rate = rate.Substring(rate.Length - 1); } rate = (Double.Parse(rate) * 2).ToString(); re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = "Gaurav Malani"; re.ReviewerRating = rate; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "h4", "class", "book_page_title"); //HtmlNode head = headerNode.SelectSingleNode("h1"); var header = headerNode == null ? string.Empty : headerNode.InnerText; var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "book_para"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; } var reviewerRating = helper.GetElementWithAttribute(bodyNode, "article", "class", "floatl w591px"); //HtmlNodeCollection rateImg = reviewerRating.SelectNodes("figure"); if (reviewerRating != null) { try { rating = PrepareRatingValue(reviewerRating); } catch (Exception) { } } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = "Anupama Chopra"; re.ReviewerRating = rating; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; string reviewerName = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { // Review Text // Rate is not present // Reviewer Name var topNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "artTps"); //artTps var reviewerNode = helper.GetElementWithAttribute(topNode, "span", "class", "by"); //artTps topNode.Element("span"); var reviewer = reviewerNode.Element("a"); if (reviewer != null) { reviewerName = reviewer.InnerText; } var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "fullCont"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = reviewerName.Trim(); re.ReviewerRating = string.Empty; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "td", "class", "articleheader"); HtmlNode head = headerNode.SelectSingleNode("h1"); var header = head == null ? head.InnerHtml : head.InnerText; // this is for reviewer inside the p tag var reviewerName1 = helper.GetElementWithAttribute(bodyNode, "p", "class", "articleauthor"); // this is for reviewer isnide the span var reviewerName2 = helper.GetElementWithAttribute(bodyNode, "span", "class", "articleauthor"); var reviewName = reviewerName1 == null ? reviewerName2.InnerText : reviewerName1.InnerText; reviewName = reviewName.Trim(); var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "td", "class", "story"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = string.Empty; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "h1", "class", "arti_heading"); var header = headerNode == null ? headerNode.InnerHtml : headerNode.InnerText; var reviewerName = helper.GetElementWithAttribute(bodyNode, "span", "class", "grey1"); var reviewName = reviewerName == null ? string.Empty : reviewerName.InnerText; var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "itemprop", "articleBody"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; var reviewerRating = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; if (ratingNode.InnerText.ToLower().Contains("rating")) { reviewerRating = PrepareRatingValue(ratingNode); } } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Replace("'", "'").Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = reviewerRating; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "h1", "class", "entry-title"); //HtmlNode head = headerNode.SelectSingleNode("h1"); var header = headerNode == null ? headerNode.InnerHtml : headerNode.InnerText; var reviewerName = helper.GetElementWithAttribute(bodyNode, "span", "class", "author vcard"); var reviewName = reviewerName == null ? string.Empty : reviewerName.InnerText; var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "entry-content"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = "Vajir Singh (" + reviewName.Trim() + ")"; re.ReviewerRating = string.Empty; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
private ReviewEntity PopulateReviewDetails(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("body node is null"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "story_wid"); var reviewerName = helper.GetElementWithAttribute(headerNode, "span", "class", "sty_agn"); HtmlNode node = reviewerName.Element("a"); var reviewName = node == null ? reviewerName.InnerHtml : node.InnerText; var reviewContent = helper.GetElementWithAttribute(headerNode, "div", "class", "sty_txt"); var review = reviewContent.InnerText; re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = string.Empty; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
private ReviewEntity PopulateReviewDetails(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("body node is null"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "Normal"); var reviewerNode = helper.GetElementWithAttribute(headerNode, "span", "id", "advenueINTEXT"); string reviewerName = string.Empty; if (reviewerNode != null) { reviewerName = reviewerNode.InnerText; } // Clean up the review name - It has lots of scrape values along with name if (!string.IsNullOrEmpty(reviewerName)) { reviewerName = reviewerName.Replace(" ", " ").Replace("By", "").Trim(); } HtmlNodeCollection nodes = headerNode.SelectNodes("strong"); var rating = string.Empty; foreach (var node in nodes) { if (node.InnerText.ToLower().Trim().Contains("rating:")) { rating = node.InnerText.Replace(" ", "").Replace("Rating:", "").Length.ToString(); } if (string.IsNullOrEmpty(reviewerName) && node.InnerText.ToLower().Trim().Contains("by:")) { reviewerName = rating = node.InnerText.Replace(" ", "").Replace("By:", ""); } else if (string.IsNullOrEmpty(reviewerName)) { reviewerName = "mumbaimirror"; } } float multipliedRating = 0; float.TryParse(rating, out multipliedRating); if (multipliedRating > 0) { // All other rating are based out of 10 where as Filmfare is out of 5. rating = (multipliedRating * 2).ToString(); } var review = string.Empty; if (!string.IsNullOrEmpty(rating)) { review = headerNode.InnerText.Substring(headerNode.InnerText.LastIndexOf("Rating:") + rating.Length + 1); } else if (!string.IsNullOrEmpty(reviewerName) && reviewerName != "mumbaimirror") { review = headerNode.InnerText.Substring(headerNode.InnerText.LastIndexOf(reviewerName) + rating.Length + 1); } else { review = headerNode.InnerText; } re.Affiliation = affiliation; re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Review = review.Trim(); re.ReviewerName = reviewerName.Trim(); re.ReviewerRating = rating.ToString(); re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
private ReviewEntity PopulateReviewDetails(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("body node is null"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "pageContent"); #region Get Reviewer var reviewerNameContainer = helper.GetElementWithAttribute(headerNode, "span", "class", "written"); var reviewerList = reviewerNameContainer.Elements("a"); string reviewerName = string.Empty; if (reviewerList != null) { try { reviewerName = reviewerList.FirstOrDefault().InnerText; } catch (Exception) { } } #endregion #region Get Rating var ratingNode = helper.GetElementWithAttribute(bodyNode, "span", "id", "rate_val_change"); var rating = ratingNode.Attributes["class"] != null ? ratingNode.Attributes["class"].Value : string.Empty; rating = rating.Replace("rate", ""); float multipliedRating = 0; float.TryParse(rating, out multipliedRating); if (multipliedRating > 0) { // All other rating are based out of 10 where as Filmfare is out of 5. rating = (multipliedRating * 2).ToString(); } #endregion #region Get Review Content var reviewContent = helper.GetElementWithAttribute(bodyNode, "div", "class", "upperBlk"); var reviews = reviewContent.Element("figure"); string review = string.Empty; if (reviews != null) { var reviewElements = reviews.Elements("p"); foreach (var r in reviewElements) { if (!string.IsNullOrEmpty(r.InnerText) && r.InnerText.Length > 300) { review = r.InnerText; break; } } } #endregion re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = reviewerName.Trim(); re.ReviewerRating = rating.ToString(); re.JsonString = string.Empty; re.MyScore = string.Empty; return(re); } } return(null); }
private ReviewEntity PopulateReviewDetails(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("body node is null"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "div", "id", "celeb_article_postview_tab"); var reviewrName = helper.GetElementWithAttribute(headerNode, "div", "class", "m9090"); var reviewName = reviewrName.InnerText; // Clean up the review name - It has lots of scrape values along with name if (!string.IsNullOrEmpty(reviewName)) { reviewName = reviewName.Replace(" ", " ").Replace("By", "").Trim(); int nameLength = reviewName.IndexOf(","); if (nameLength > 1) { reviewName = reviewName.Substring(0, nameLength); } } var ratingNode = helper.GetElementWithAttribute(reviewrName, "img", "width", "93"); var rating = ratingNode.Attributes["title"] != null ? ratingNode.Attributes["title"].Value : string.Empty; float multipliedRating = 0; float.TryParse(rating, out multipliedRating); if (multipliedRating > 0) { // All other rating are based out of 10 where as Filmfare is out of 5. rating = (multipliedRating * 2).ToString(); } var reviewContent = helper.GetElementWithAttribute(headerNode, "div", "class", " mfl mmb31 mfnt12 minline malignjus mmr18"); var review = reviewContent.InnerText; re.Affiliation = affiliation; re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Review = review.Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = rating.ToString(); re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { try { // Rating var ratingNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "story-rating"); var imageContainer = ratingNode.Element("span"); var ratingImages = imageContainer.Elements("img"); int rate = 0; foreach (var rateImage in ratingImages) { HtmlAttribute src = rateImage.Attributes["src"]; if (src != null) { if (src.Value.Contains("star-one-1")) { rate += 1; } #region Commented Code /*else if (src.Value.Contains("star-no-1")) * { * // We don't need to add any rating because its 0 * } * else * { * // This case could be 0.5 but not sure how it appears on review page. * // Need to wait for same for other movie reviews * }*/ #endregion } } rate = rate * 2; // Reviewer var reviewerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "editor"); var reviewerName = reviewerNode.Element("a");//, "class", "fn"); var reviewName = reviewerName == null ? string.Empty : reviewerName.InnerText; // Review Text var reviewBody = helper.GetElementWithAttribute(bodyNode, "div", "class", "main-body-content"); var reviewText = reviewBody == null ? string.Empty : reviewBody.InnerText; re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = reviewText.Replace("'", "'").Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = rate.ToString(); re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } catch (Exception) { // Log an exception } } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "article_detail"); HtmlNode head = headerNode.SelectSingleNode("h1"); var header = head == null ? head.InnerHtml : head.InnerText; var reviewerName = helper.GetElementWithAttribute(headerNode, "div", "class", "metalink"); var reviewName = reviewerName == null ? string.Empty : reviewerName.InnerText; reviewName = reviewName.Trim(); int rn = reviewName.IndexOf("|"); reviewName = reviewName.Substring(0, rn); // reviewName = reviewName.Substring(0, reviewName.Length - 20); var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "span", "itemprop", "articleBody"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; } var reviewerRating = helper.GetElementWithAttribute(bodyNode, "h2", "class", "footer_style"); if (reviewerRating != null) { try { rating = PrepareRatingValue(reviewerRating); } catch (Exception) { } } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = rating; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "fleft"); var reviewerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "blbox"); // var reviewerMiddleNode = helper.GetElementWithAttribute(reviewerNode, "div", "class", "blbox"); var reviewerName = helper.GetElementWithAttribute(reviewerNode, "span", "class", "fwb"); HtmlNode node = reviewerName.Element("a"); var reviewName = node == null ? reviewerName.InnerHtml : node.InnerText; var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "id", "atxt_box"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; var reviewerRating = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; if (ratingNode.InnerText.ToLower().Contains("rating")) { try { rating = ratingNode.InnerText.Replace("Rating:", "").Replace("/", "").Trim(); rating = rating.Remove(rating.Length - 1); rating = (Decimal.Parse(rating) * 2).ToString(); } catch (Exception) { } } } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = rating; re.MyScore = string.Empty; re.JsonString = string.Empty; return re; } } return null; }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "h1", "class", "entry-title"); HtmlNode header = headerNode.Element("a"); var head = header == null ? headerNode.InnerHtml : header.InnerText; head = head.Replace("’", "'").Replace(" ", " ").Trim(); //var reviewerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "blbox"); // var reviewerMiddleNode = helper.GetElementWithAttribute(reviewerNode, "div", "class", "blbox"); /* * var reviewerName = helper.GetElementWithAttribute(bodyNode, "div", "class", "story-date"); * HtmlNode node = reviewerName.Element("a"); * var reviewName = node == null ? reviewerName.InnerHtml : node.InnerText; */ var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "entry entry-content"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; var reviewerRating = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; if (ratingNode.InnerText.ToLower().Contains("rating")) { try { rating = ratingNode.InnerText.Replace("Rating: ", "").Replace("stars", "").Trim(); rating = (Decimal.Parse(rating) * 2).ToString(); } catch (Exception) { } } } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Trim(); re.ReviewerName = "Raja Sen"; re.ReviewerRating = rating; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
public ReviewEntity PopulateReviewDetail(string html, string affiliation) { ReviewEntity re = new ReviewEntity(); string rating = string.Empty; HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.LoadHtml(html); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { Console.WriteLine("Body is empty"); } else { var headerNode = helper.GetElementWithAttribute(bodyNode, "h1", "class", "pageheading"); //var reviewerNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "blbox"); // var reviewerMiddleNode = helper.GetElementWithAttribute(reviewerNode, "div", "class", "blbox"); var reviewerName = helper.GetElementWithAttribute(bodyNode, "p", "class", "authorname"); HtmlNode node = reviewerName.Element("a"); var reviewName = node == null ? reviewerName.InnerHtml : node.InnerText; var reviewContentNode = helper.GetElementWithAttribute(bodyNode, "div", "class", "content-story"); HtmlNodeCollection nodes = reviewContentNode.SelectNodes("p"); var review = string.Empty; var reviewerRating = string.Empty; foreach (var ratingNode in nodes) { review += ratingNode.InnerText; if (ratingNode.InnerText.ToLower().Contains("rating")) { try { //string count = ratingNode.InnerText.Contains("*"); int rate = 0; rating = ratingNode.InnerText.Replace("Rating:", "").Trim(); if (ratingNode.InnerText.ToLower().Contains("*")) { rate = review.Count(s => s == '*'); } //rating = rating.Remove(rating.Length - 1); rating = (rate * 2).ToString(); } catch (Exception) { } } } re.RowKey = re.ReviewId = Guid.NewGuid().ToString(); re.Affiliation = affiliation.Trim(); re.Review = review.Replace("'", "'").Trim(); re.ReviewerName = reviewName.Trim(); re.ReviewerRating = rating; re.MyScore = string.Empty; re.JsonString = string.Empty; return(re); } } return(null); }
// Movie Name private string GetMovieName(HtmlNode body) { var headerNode = helper.GetElementWithAttribute(body, "h1", "class", "header"); var movieName = helper.GetElementWithAttribute(headerNode, "span", "class", "itemprop"); return(movieName.InnerText); }