Ejemplo n.º 1
0
        private Offer GetFullOffer(Offer teaser)
        {
            var client = new HtmlClient();

            var html = client.GetHtml(teaser.Url);

            var dom = new CQ(html);

            var header = dom[".wspolny_naglowek_tytul"][0].InnerHTML;
            if (header.Contains("PRYWATNA"))
            {
                teaser.PrivateOffer = true;
            }

            var fullDescription = dom[".pokaz_ogloszenie_tresc"];
            for (var i = 0; i < fullDescription.Length; i++)
            {
                teaser.Description =  TextHelper.CleanText(fullDescription.RenderSelection());
            }

            var kontakt = dom["ul.pokaz_ogloszenie"][0].OuterHTML;

            Regex rgx = new Regex("<script.+script>", RegexOptions.Singleline);
            Match match = rgx.Match(kontakt);
            if (match.Success)
                kontakt = rgx.Replace(kontakt, "");

            teaser.Description += kontakt;

            var pictureEls = dom["img.pokaz_ogloszenie_obrazek"];

            var pictures = new List<string>();

            foreach (var picture in pictureEls)
            {
                var pictureUrl = "http://ogloszenia.przemysl.pl/" + picture.ParentNode.Attributes["href"];
                pictures.Add(pictureUrl);
            }

            if (pictures.Count > 0)
            {
                teaser.Pictures = pictures;
            }

            teaser.Teaser = false;

            return teaser;
        }
Ejemplo n.º 2
0
        private void Save(Offer offer, IndexWriter writer)
        {
            // remove older index entry
            var searchQuery = new TermQuery(new Term("Id", offer.Id));
            writer.DeleteDocuments(searchQuery);

            // add new index entry
            var doc = new Document();

            // add lucene fields mapped to db fields
            doc.Add(new Field("Id", offer.Id, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("Title", offer.Title, Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("Url", offer.Url, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("Price", offer.Price, Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("Description", offer.Description, Field.Store.YES, Field.Index.ANALYZED));
            var strDate = DateTools.DateToString(offer.Date, DateTools.Resolution.DAY);
            doc.Add(new Field("Date", strDate, Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("Teaser", offer.Teaser?"1":"0", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("PrivateOffer", offer.PrivateOffer?"1":"0", Field.Store.YES, Field.Index.ANALYZED));

            if (offer.Pictures != null)
            {
                foreach (var picture in offer.Pictures)
                {
                    doc.Add(new Field("Pictures", picture, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
            }

            doc.Add(new Field("Attractivenes", offer.Attractivenes.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("HaveSeen", offer.HaveSeen?"1":"0", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("Hide", offer.Hide ? "1" : "0", Field.Store.YES, Field.Index.ANALYZED));

            if (!string.IsNullOrEmpty(offer.Notes))
            {
                doc.Add(new Field("Notes", offer.Notes, Field.Store.YES, Field.Index.ANALYZED));
            }

            // add entry to index
            writer.AddDocument(doc);
        }
Ejemplo n.º 3
0
        private List<Offer> GetTeasers(CQ dom)
        {
            var result = new List<Offer>();

            var offerEls = dom["h4.lista_ogloszen_w_kategorii"];

            foreach (var el in offerEls)
            {
                var oferElDom = new CQ(el.OuterHTML);
                //el.OuterHTML
                var aEl = oferElDom["a.lista_ogloszen_link"];
                if (aEl.Length == 0)
                {
                    continue;
                }

                var offer = new Offer();
                offer.Teaser = true;
                offer.Title = HttpUtility.HtmlDecode(aEl[0].InnerText);
                offer.Id = aEl[0].Attributes["href"];
                offer.Url = "http://ogloszenia.przemysl.pl/" + offer.Id;

                var pictures = new List<string>();

                var spans = oferElDom["span:not(.stopka_ogloszenia) span"];

                foreach (var span in spans)
                {
                    var txt = TextHelper.CleanText(span.InnerHTML);
                    if (!string.IsNullOrEmpty(txt))
                    {
                        offer.Description = txt;
                        break;
                    }
                }

                var imgs = oferElDom["span:not(.stopka_ogloszenia) span img"];

                foreach (var img in imgs)
                {

                    var pictureUrl = img.Attributes["src"];
                    if (offer.Pictures == null)
                    {
                        offer.Pictures = new List<string>();
                    }

                    pictureUrl = "http://ogloszenia.przemysl.pl/" + pictureUrl;
                    offer.Pictures.Add(pictureUrl);
                }

                var price = oferElDom["span.stopka_ogloszenia span"];

                if (price.Length > 0)
                {
                    offer.Price = HttpUtility.HtmlDecode(price[0].InnerText);
                }

                var dateEl = oferElDom["span.stopka_ogloszenia"];

                if (dateEl.Length > 0)
                {
                    var strDate = dateEl[0].InnerText;

                    var rx = new Regex("([0-9]{4})-([0-9]{2})-([0-9]{2})");
                    var m = rx.Match(strDate);
                    if (m.Success)
                    {
                        offer.Date = DateTime.ParseExact(m.Groups[0].Value,
                                        "yyyy-MM-dd",
                                        CultureInfo.InvariantCulture,
                                        DateTimeStyles.None);
                    }

                }

                result.Add(offer);
            }

            return result;
        }
Ejemplo n.º 4
0
        public void Save(Offer offer)
        {
            var analyzer = new StandardAnalyzer(Version.LUCENE_30);
            using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                Save(offer, writer);

                // close handles
                analyzer.Close();
                writer.Dispose();
            }
        }