예제 #1
0
        public Offer ParseOffer(HtmlNode node)
        {
            string link  = node.SelectSingleNode("a[1]").Attributes["href"].Value;
            string title = node.SelectSingleNode("a[1]").InnerText;

            var offer = new Offer
            {
                Title = title,
                Link  = link
            };

            var request = WebRequest.Create(offer.Link);

            using (var response = request.GetResponse().GetResponseStream())
            {
                var document = new HtmlDocument();
                document.Load(response, true);

                var body    = document.DocumentNode.SelectSingleNode("//body");
                var date    = body.SelectSingleNode("//td[@class='first_row']").InnerText.Trim().Substring(0, 10).Replace("/", "-");
                var text    = body.SelectSingleNode("//span[@id='preview-local-desc']").InnerText;
                var address = document.DocumentNode.SelectSingleNode("//meta[@property='og:locality']").Attributes["content"].Value;

                var provinceAndCity = address.Trim().Split('/');
                var province        = provinceAndCity[0];
                var city            = provinceAndCity[1];

                DateTime dateParsed;
                DateTime.TryParseExact(date, "dd-MM-yyyy", CultureInfo.CurrentUICulture.DateTimeFormat,
                                       DateTimeStyles.AllowWhiteSpaces, out dateParsed);



                offer.Text     = text;
                offer.City     = city;
                offer.Province = province;
                offer.Date     = dateParsed;
                offer.Source   = "Gumtree";
            }

            offer.Sha1 = OfferHelper.GenerateSha1(offer.Text);
            return(offer);
        }
예제 #2
0
        public Offer ParseOffer(HtmlNode node)
        {
            string link            = node.SelectSingleNode("td[@class='position']").SelectSingleNode("a[1]").Attributes["href"].Value;
            var    dateAndProvince = node.SelectSingleNode("td[@class='date']").InnerHtml.Split(new string[] { "<br>" }, StringSplitOptions.None);

            var date     = dateAndProvince[0];
            var province = dateAndProvince[1];

            DateTime dateParsed;

            DateTime.TryParse(date, out dateParsed);


            var offer = new Offer
            {
                Link     = link,
                Date     = dateParsed,
                Province = province,
            };

            var request = WebRequest.Create(offer.Link);

            using (var response = request.GetResponse().GetResponseStream())
            {
                var document = new HtmlDocument();
                document.Load(response, true);



                var body        = document.DocumentNode.SelectSingleNode("//body");
                var title       = document.DocumentNode.SelectSingleNode("//strong[1]").InnerText;
                var textRawHtml = body.InnerHtml;

                offer.City   = "";
                offer.Title  = title;
                offer.Text   = textRawHtml;
                offer.Source = "GoldenLine";
            }

            offer.Sha1 = OfferHelper.GenerateSha1(offer.Text);
            return(offer);
        }
예제 #3
0
        public Offer ParseOffer(HtmlNode node)
        {
            string link  = node.SelectSingleNode("a[1]").Attributes["href"].Value;
            string title = node.SelectSingleNode("div[@class='info']/div[1]/h3[1]/a[1]").InnerText;


            var offer = new Offer
            {
                Link  = link,
                Title = title,
            };

            var date = node.SelectSingleNode("div[@class='description']/dl[1]/dd[2]/div/div") ??
                       node.SelectSingleNode("div[@class='description']/dl[1]/dd[1]/span[1]");

            DateTime dateParsed;

            DateTime.TryParse(date.InnerText, out dateParsed);
            offer.Date = dateParsed;
            var request = WebRequest.Create(offer.Link);

            using (var response = request.GetResponse().GetResponseStream())
            {
                var document = new HtmlDocument();
                document.Load(response, true);


                var body        = document.DocumentNode.SelectSingleNode("//body");
                var textRawHtml = body.SelectSingleNode("//div[@class='divPostContent']/div[1]").InnerHtml;

                offer.City     = "";
                offer.Province = "";
                offer.Text     = textRawHtml;
                offer.Source   = "CodeGuru";
            }

            offer.Sha1 = OfferHelper.GenerateSha1(offer.Text);
            return(offer);
        }
예제 #4
0
        public virtual void Crawl()
        {
            var request = WebRequest.Create(_requestUriString);

            using (var response = request.GetResponse().GetResponseStream())
            {
                if (response == null)
                {
                    return;
                }

                var document = new HtmlDocument();
                document.Load(response, true);

                var nodes       = document.DocumentNode.SelectNodes(_nodesXPath);
                var allKeywords = _repository.GetAllKeywords();


                foreach (var node in nodes)
                {
                    try
                    {
                        var offer = _parser.ParseOffer(node);
                        if (_repository.InsertOffer(offer))
                        {
                            var offerKeywords = OfferHelper.ScanTextForKeywords(offer.Text, allKeywords);
                            _repository.InsertKeywordsForOffer(offer.Sha1, offerKeywords);
                        }
                    }
                    catch (Exception ex)
                    {
                        var log = NLog.LogManager.GetCurrentClassLogger();
                        log.LogException(NLog.LogLevel.Fatal, "Exception occured", ex);
                    }
                }
            }
        }
예제 #5
0
        public Offer ParseOffer(HtmlNode node)
        {
            if (node.SelectSingleNode("ul") != null)
            {
                return(Offer.Empty);
            }


            string link = node.SelectSingleNode("div//a[@class='offerLink']").Attributes["href"].Value;

            var offer = new Offer
            {
                Link = string.Format("http://pracuj.pl{0}", link),
            };

            var request = WebRequest.Create(offer.Link);

            using (var response = request.GetResponse().GetResponseStream())
            {
                var document = new HtmlDocument();
                document.Load(response, true);

                var body        = document.DocumentNode.SelectSingleNode("//body");
                var rawHtmlText = body.SelectSingleNode("//div[@id='offCont']").InnerHtml;

                var dateRegex = new Regex("\\d{4}-\\d{2}-\\d{2}");

                var date = body.SelectNodes("//dd").First(x => dateRegex.IsMatch(x.InnerText)).InnerText;

                var addressContainer = body.SelectNodes("//dt").First(x => x.InnerText == "Lokalizacja:").NextSibling.NextSibling;


                string city     = "";
                string province = "";

                string title = body.SelectSingleNode("//h1[@class='offerTitle']").InnerText;

                if (addressContainer.ChildNodes.Count > 3)
                {
                    city     = addressContainer.ChildNodes.ElementAt(1).InnerText;
                    province = addressContainer.ChildNodes.ElementAt(3).InnerText;
                }
                else
                {
                    province = addressContainer.ChildNodes.ElementAt(0).InnerText;
                }


                DateTime dateParsed;
                DateTime.TryParse(date, out dateParsed);


                offer.Title    = title;
                offer.Text     = rawHtmlText;
                offer.City     = city;
                offer.Province = province;
                offer.Date     = dateParsed;
                offer.Source   = "PracujPL";
            }

            offer.Sha1 = OfferHelper.GenerateSha1(offer.Text);
            return(offer);
        }