private Offer GetFullOffer(Offer teaser) { var client = new HtmlClient(); var html = client.GetHtml(teaser.Url); var dom = new CQ(html); var header = dom[".wspolny_naglowek_tytul"][0].InnerHTML; if (header.Contains("PRYWATNA")) { teaser.PrivateOffer = true; } var fullDescription = dom[".pokaz_ogloszenie_tresc"]; for (var i = 0; i < fullDescription.Length; i++) { teaser.Description = TextHelper.CleanText(fullDescription.RenderSelection()); } var kontakt = dom["ul.pokaz_ogloszenie"][0].OuterHTML; Regex rgx = new Regex("<script.+script>", RegexOptions.Singleline); Match match = rgx.Match(kontakt); if (match.Success) kontakt = rgx.Replace(kontakt, ""); teaser.Description += kontakt; var pictureEls = dom["img.pokaz_ogloszenie_obrazek"]; var pictures = new List<string>(); foreach (var picture in pictureEls) { var pictureUrl = "http://ogloszenia.przemysl.pl/" + picture.ParentNode.Attributes["href"]; pictures.Add(pictureUrl); } if (pictures.Count > 0) { teaser.Pictures = pictures; } teaser.Teaser = false; return teaser; }
private void Save(Offer offer, IndexWriter writer) { // remove older index entry var searchQuery = new TermQuery(new Term("Id", offer.Id)); writer.DeleteDocuments(searchQuery); // add new index entry var doc = new Document(); // add lucene fields mapped to db fields doc.Add(new Field("Id", offer.Id, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("Title", offer.Title, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Url", offer.Url, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("Price", offer.Price, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", offer.Description, Field.Store.YES, Field.Index.ANALYZED)); var strDate = DateTools.DateToString(offer.Date, DateTools.Resolution.DAY); doc.Add(new Field("Date", strDate, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Teaser", offer.Teaser?"1":"0", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("PrivateOffer", offer.PrivateOffer?"1":"0", Field.Store.YES, Field.Index.ANALYZED)); if (offer.Pictures != null) { foreach (var picture in offer.Pictures) { doc.Add(new Field("Pictures", picture, Field.Store.YES, Field.Index.NOT_ANALYZED)); } } doc.Add(new Field("Attractivenes", offer.Attractivenes.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("HaveSeen", offer.HaveSeen?"1":"0", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Hide", offer.Hide ? "1" : "0", Field.Store.YES, Field.Index.ANALYZED)); if (!string.IsNullOrEmpty(offer.Notes)) { doc.Add(new Field("Notes", offer.Notes, Field.Store.YES, Field.Index.ANALYZED)); } // add entry to index writer.AddDocument(doc); }
private List<Offer> GetTeasers(CQ dom) { var result = new List<Offer>(); var offerEls = dom["h4.lista_ogloszen_w_kategorii"]; foreach (var el in offerEls) { var oferElDom = new CQ(el.OuterHTML); //el.OuterHTML var aEl = oferElDom["a.lista_ogloszen_link"]; if (aEl.Length == 0) { continue; } var offer = new Offer(); offer.Teaser = true; offer.Title = HttpUtility.HtmlDecode(aEl[0].InnerText); offer.Id = aEl[0].Attributes["href"]; offer.Url = "http://ogloszenia.przemysl.pl/" + offer.Id; var pictures = new List<string>(); var spans = oferElDom["span:not(.stopka_ogloszenia) span"]; foreach (var span in spans) { var txt = TextHelper.CleanText(span.InnerHTML); if (!string.IsNullOrEmpty(txt)) { offer.Description = txt; break; } } var imgs = oferElDom["span:not(.stopka_ogloszenia) span img"]; foreach (var img in imgs) { var pictureUrl = img.Attributes["src"]; if (offer.Pictures == null) { offer.Pictures = new List<string>(); } pictureUrl = "http://ogloszenia.przemysl.pl/" + pictureUrl; offer.Pictures.Add(pictureUrl); } var price = oferElDom["span.stopka_ogloszenia span"]; if (price.Length > 0) { offer.Price = HttpUtility.HtmlDecode(price[0].InnerText); } var dateEl = oferElDom["span.stopka_ogloszenia"]; if (dateEl.Length > 0) { var strDate = dateEl[0].InnerText; var rx = new Regex("([0-9]{4})-([0-9]{2})-([0-9]{2})"); var m = rx.Match(strDate); if (m.Success) { offer.Date = DateTime.ParseExact(m.Groups[0].Value, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None); } } result.Add(offer); } return result; }
public void Save(Offer offer) { var analyzer = new StandardAnalyzer(Version.LUCENE_30); using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { Save(offer, writer); // close handles analyzer.Close(); writer.Dispose(); } }