예제 #1
0
      private void AnalysicHtmlForCompany(JobWaitAS job, HtmlDocument document)
      {
          List <Comment> lstComment = _pareseComment.ParseComment(_configuration, document, job);

          if (lstComment.Count > 0)
          {
              _producerComment.PublishString(Newtonsoft.Json.JsonConvert.SerializeObject(lstComment));
              //_noSqlHtml.SaveComment(job.Id, job.CompanyId, lstComment);
              string url = (job.Url == null) ? "" : job.Url;
              _log.Info(string.Format("Extract {0} comment from id {1} {2}", lstComment.Count, job.Id, url));
          }
      }
예제 #2
0
      public override void ProcessMessage(BasicDeliverEventArgs message)
      {
          var jobAs = JobWaitAS.FromObjMQ(message.Body);

          if (jobAs != null)
          {
              var html = _noSqlHtml.GetHtml(jobAs.Id, jobAs.CompanyId);
              if (!string.IsNullOrEmpty(html))
              {
                  var document = new HtmlDocument();
                  document.LoadHtml(html);
                  AnalysicHtmlForCompany(jobAs, document);
              }
              else
              {
                  _log.Info("Html can't select from db");
              }
          }
          _log.Info(string.Format("Processed for job {0}", jobAs.ToJson()));
          GetChannel().BasicAck(message.DeliveryTag, true);
      }
예제 #3
0
        public List <Comment> ParseComment(DsConfigurationComment.Configuration_CommentRow configuration, HtmlDocument doc, JobWaitAS job)
        {
            List <Comment> lst         = new List <Comment>();
            var            nodeReviews = doc.DocumentNode.SelectNodes(configuration.CommentListXpath);

            if (nodeReviews != null)
            {
                foreach (var nodeReview in nodeReviews)
                {
                    Comment cmt = new Comment();

                    cmt.Author      = GetTextReview(nodeReview, configuration.AuthorXPath);
                    cmt.Title       = GetTextReview(nodeReview, configuration.ContentXPath);
                    cmt.Content     = GetTextReview(nodeReview, ".//div[@itemprop='description']");
                    cmt.DatePublish = GetTextReview(nodeReview, configuration.DatePostXPath);
                    cmt.ProductId   = job.Id;
                    cmt.CompanyId   = job.CompanyId;
                    cmt.Url         = job.Url;

                    if (!string.IsNullOrEmpty(cmt.Content))
                    {
                        lst.Add(cmt);
                    }
                }
            }
            return(lst);
        }