private void AnalysicHtmlForCompany(JobWaitAS job, HtmlDocument document) { List <Comment> lstComment = _pareseComment.ParseComment(_configuration, document, job); if (lstComment.Count > 0) { _producerComment.PublishString(Newtonsoft.Json.JsonConvert.SerializeObject(lstComment)); //_noSqlHtml.SaveComment(job.Id, job.CompanyId, lstComment); string url = (job.Url == null) ? "" : job.Url; _log.Info(string.Format("Extract {0} comment from id {1} {2}", lstComment.Count, job.Id, url)); } }
public override void ProcessMessage(BasicDeliverEventArgs message) { var jobAs = JobWaitAS.FromObjMQ(message.Body); if (jobAs != null) { var html = _noSqlHtml.GetHtml(jobAs.Id, jobAs.CompanyId); if (!string.IsNullOrEmpty(html)) { var document = new HtmlDocument(); document.LoadHtml(html); AnalysicHtmlForCompany(jobAs, document); } else { _log.Info("Html can't select from db"); } } _log.Info(string.Format("Processed for job {0}", jobAs.ToJson())); GetChannel().BasicAck(message.DeliveryTag, true); }
public List <Comment> ParseComment(DsConfigurationComment.Configuration_CommentRow configuration, HtmlDocument doc, JobWaitAS job) { List <Comment> lst = new List <Comment>(); var nodeReviews = doc.DocumentNode.SelectNodes(configuration.CommentListXpath); if (nodeReviews != null) { foreach (var nodeReview in nodeReviews) { Comment cmt = new Comment(); cmt.Author = GetTextReview(nodeReview, configuration.AuthorXPath); cmt.Title = GetTextReview(nodeReview, configuration.ContentXPath); cmt.Content = GetTextReview(nodeReview, ".//div[@itemprop='description']"); cmt.DatePublish = GetTextReview(nodeReview, configuration.DatePostXPath); cmt.ProductId = job.Id; cmt.CompanyId = job.CompanyId; cmt.Url = job.Url; if (!string.IsNullOrEmpty(cmt.Content)) { lst.Add(cmt); } } } return(lst); }