static void Main(string[] args) { idPortale = (byte)Convert.ToInt32(ConfigurationManager.AppSettings["idPortale"]); try { logger = new Log(typeof(Program), Assembly.GetExecutingAssembly().GetName().Name); logger.Info("Crawler: Start crawling. " + DateTime.Now.ToLongDateString()); dbparams[] tipologie = new dbparams[8]; for (int x = 0; x <= 7; x++) { tipologie[x] = new dbparams(); } tipologie[0].tipologia = “xxxx”; tipologie[0].idtipologia = 1; tipologie[0].idcategoria = 1; tipologie[1].tipologia = “xxxx”; tipologie[1].idtipologia = 3; tipologie[1].idcategoria = 1; tipologie[2].tipologia = “xxxx”; tipologie[2].idtipologia = 4; tipologie[2].idcategoria = 1; tipologie[3].tipologia = “xxxx”; tipologie[3].idtipologia = 14; tipologie[3].idcategoria = 1; tipologie[4].tipologia = “xxxx; tipologie[4].idtipologia = 11; tipologie[4].idcategoria = 1; tipologie[5].tipologia = “xxxx”; tipologie[5].idtipologia = 2; tipologie[5].idcategoria = 1; tipologie[6].tipologia = “xxxx”; tipologie[6].idtipologia = 10; tipologie[6].idcategoria = 1; tipologie[7].tipologia = “xxxx”; tipologie[7].idtipologia = 141; tipologie[7].idcategoria = 1; SqlConnection connImport01 = new SqlConnection(ConfigurationManager.ConnectionStrings["DBIMPORT01ConnectionString"].ConnectionString); connImport01.Open(); using (SqlCommand cmd = new SqlCommand()) { cmd.CommandText = “xxxx”; cmd.CommandType = CommandType.StoredProcedure; cmd.Connection = connImport01; cmd.CommandTimeout = 600; cmd.Parameters.Add("@IdPortale", SqlDbType.TinyInt).Value = idPortale; try { var adapt = new SqlDataAdapter(); adapt.SelectCommand = cmd; DataTable dTable = new DataTable(); adapt.Fill(dTable); foreach (DataRow dRow in dTable.Rows) { comuniConQuartieri.Add(((string)dRow["Comune"]).ToLower()); } } catch (SqlException ex) { logger.Error("Errore nella select comuni con quartieri", ex); } } using (SqlCommand cmd = new SqlCommand()) { cmd.CommandText = “xxxx”; cmd.CommandType = CommandType.StoredProcedure; cmd.Connection = connImport01; cmd.CommandTimeout = 600; try { var adapt = new SqlDataAdapter(); adapt.SelectCommand = cmd; DataTable dTable = new DataTable(); adapt.Fill(dTable); foreach (DataRow dRow in dTable.Rows) { province.Add((string)dRow["SiglaProvincia"]); } } catch (SqlException ex) { logger.Error("Errore nella select province", ex); } } foreach (dbparams t in tipologie) { logger.Info("Crawling: " + t.tipologia + " vendita"); crawl(connImport01, "vendita", t); logger.Info("Crawling: " + t.tipologia + " affitto"); crawl(connImport01, "affitto", t); } connImport01.Close(); connImport01.Dispose(); esitoSuccess = true; DateTime fineProcedura = DateTime.Now; TimeSpan span = fineProcedura.Subtract(inizioProcedura); double seconds = span.TotalSeconds; sendStatus(esitoSuccess, esitoAnnunciLavorati, esitoErrors, seconds, esitoErrorsNotes); } catch (Exception ex) { logger.Error("Errore", ex); esitoSuccess = false; DateTime fineProcedura = DateTime.Now; TimeSpan span = fineProcedura.Subtract(inizioProcedura); double seconds = span.TotalSeconds; sendStatus(esitoSuccess, esitoAnnunciLavorati, esitoErrors, seconds, esitoErrorsNotes); } }
static int disambiguateTipologia(adResult ad, dbparams tip) { int t = ad.idtipologia; if (tip.disambiguation != "" && tip.disambiguation != null) { string[] pairs = tip.disambiguation.Split(';'); foreach (string pair in pairs) { string[] values = pair.Split('='); if (ad.title.ToLower().Contains(values[0])) { t = Convert.ToInt32(values[1]); } } } return t; }
static void crawl(SqlConnection connImport01, string contratto, dbparams tip) { try { ArrayList urls = new ArrayList(); foreach (string siglaProvincia in province) { string categoria = ""; switch (tip.idcategoria) { case 1: categoria = “xxxx”; break; case 2: categoria = “xxxx”; break; } url uIT = new url(); uIT.uri = string.Format(“xxxxxxxxxxxxxxxxxxx”, contratto, categoria, tip.tipologia, siglaProvincia.ToLower() ); uIT.siglaprovincia = siglaProvincia; urls.Add(uIT); } for (int u = 0; u < urls.Count; u++) { ForUrls: url uu = (url)urls[u]; logger.Info(uu.uri); string baseUrl = uu.uri; HtmlDocument basedom = new HtmlDocument(); basedom = Functions.GetHtmlDocumentByUrl(baseUrl, logger); string baseHtml = basedom.DocumentNode.InnerHtml; HtmlNode content = basedom.GetElementbyId("results"); if (content != null) { string pagination_results = Functions.extractFirstOccur(content.InnerHtml, " di * xxxx”); if (pagination_results.IsNumeric() && pagination_results.Trim() != "0") { string limit = pagination_results; limit = limit.Replace(".", ""); if (Functions.IsNumeric(limit)) { int numPages = Convert.ToInt32(limit) / resultsPerPage; if (numPages * resultsPerPage < Convert.ToInt32(limit)) { numPages += 1; } for (int x = 1; x <= numPages; x++) { try { Console.WriteLine("Pagina " + x.ToString()); HtmlDocument pagedom = new HtmlDocument(); if (x > 1) { pagedom = Functions.GetHtmlDocumentByUrl(baseUrl.Replace("lista-1", "lista-" + x.ToString()), logger); } else { pagedom = basedom; } HtmlNode searchResults = pagedom.GetElementbyId("searchResultsTbl"); if (searchResults != null) { ArrayList results = Functions.getElementsByClass(searchResults, "resultBody first tier1", "resultBody tier1"); foreach (HtmlNode hn in results) { try { bool nuovoAnnuncio = hn.InnerHtml.Contains("<div class=\"newIcon\"></div>"); if (!nuovoAnnuncio) { u++; if (u == urls.Count) { return; } else { goto ForUrls; } } if (nuovoAnnuncio) { adResult ad = new adResult(); ad.idtipologia = tip.idtipologia; ad.idcategoria = tip.idcategoria; ad.url = Functions.extractFirstOccur(hn.InnerHtml, "href=\"*\""); ad.id = hn.Id.Replace("t", ""); string city = Functions.retrieveStringAfterChars(ad.url, tip.tipologia + "-" + uu.siglaprovincia.ToLower() + "-"); city = Functions.retrieveStringBeforeChars(city, "-"); city = city.Replace("+", " "); if (ad.url != "") { ad.url = ConfigurationManager.AppSettings["urlPortale"] + ad.url; } string imgSrc = Functions.extractFirstOccur(hn.InnerHtml, "data-src='*'"); if (imgSrc.Contains("placeholder.jpg")) { imgSrc = ""; } ad.imgSrc = imgSrc; ad.date = DateTime.Now; comune comuneByName = getComuneByName(connImport01, city); if (comuneByName != null) { ad.idcomune = comuneByName.IDComune.ToString(); ad.city = comuneByName.Comune; } if (ad.idcomune != "" && ad.idcomune != null) { string zone = Functions.extractFirstOccur(hn.InnerHtml, "<p class=\"zone\">*</p>"); if (comuniConQuartieri.Contains(city)) { quartiereportale qp = getQuartierePortaleByName(connImport01, comuneByName.IDComune, zone); if (qp != null) { ad.idquartiere = qp.idquartiere; ad.idquartiereportale = qp.nrtavola; } } else { ad.idquartiereportale = 0; ad.idquartiere = 0; } ad.zona = zone; HtmlDocument basedomScheda = new HtmlDocument(); basedomScheda = Functions.GetHtmlDocumentByUrl(ad.url, logger); string htmlScheda = basedomScheda.DocumentNode.InnerHtml; ad.title = Functions.extractFirstOccur(htmlScheda, "<title>*-", "", true); Console.WriteLine(ad.id + " - " + ad.city + " - " + ad.title); string pr = Functions.extractFirstOccur(htmlScheda, "class=\"price\"><span class=\"hidden\">€ *</span>", "", true).Replace(".", ""); if (Functions.IsNumeric(pr)) { ad.price = Convert.ToDecimal(pr); } else { ad.price = 0; } string description = Functions.extractFirstOccur(htmlScheda, "<p class=\"body\">*</p>"); if (description != "") { ad.title = description; } if (ad.title.Length > 2000) { ad.title = ad.title.Substring(0, 1997) + "..."; } ad.idtipologia = disambiguateTipologia(ad, tip); string mq = Functions.extractFirstOccur(htmlScheda, "<li>Metri quadri:<span>* mq</span></li>"); string locali = Functions.extractFirstOccur(htmlScheda, "<li>Locali:<span>*</span></li>"); if (Functions.IsNumeric(locali)) { ad.locali = Convert.ToInt32(locali); } else { ad.locali = 0; } if (Functions.IsNumeric(mq)) { ad.mq = Convert.ToInt32(mq); } else { ad.mq = 0; } ad.contratto = contratto.Substring(0, 1).ToUpper(); saveAd(connImport01, ad); } } } catch (Exception ex) { Console.WriteLine(ex.Message); logger.Error("Errore", ex); esitoErrors += 1; esitoErrorsNotes += "\r\n" + ex.Message; } } } } catch (Exception ex) { Console.WriteLine(ex.Message); logger.Error("Errore", ex); esitoErrors += 1; esitoErrorsNotes += "\r\n" + ex.Message; } } } } } } } catch (Exception ex) { Console.WriteLine(ex.Message); logger.Error("Errore", ex); esitoErrors += 1; esitoErrorsNotes += "\r\n" + ex.Message; } }