Exemple #1
0
 static int disambiguateTipologia(adResult ad, dbparams tip)
 {
     int t = ad.idtipologia;
     if (tip.disambiguation != "" && tip.disambiguation != null)
     {
         string[] pairs = tip.disambiguation.Split(';');
         foreach (string pair in pairs)
         {
             string[] values = pair.Split('=');
             if (ad.title.ToLower().Contains(values[0]))
             {
                 t = Convert.ToInt32(values[1]);
             }
         }
     }
     return t;
 }
Exemple #2
0
        static int saveAd(SqlConnection conn, adResult ad)
        {
            int r = 0;
            try
            {
                using (SqlCommand cmd = new SqlCommand())
                {
                    cmd.CommandText = "Crawler_Inserimento";
                    cmd.CommandType = CommandType.StoredProcedure;
                    cmd.Connection = conn;
                    cmd.CommandTimeout = 600;
                    if (ad.id != "" && ad.idcomune != "" && ad.idcomune != null && ad.url != "" && ad.idtipologia > 0 && ad.idcategoria > 0 && ad.contratto != "")
                    {

                        cmd.Parameters.Add("@IDPortale", SqlDbType.TinyInt).Value = Convert.ToInt32(ConfigurationManager.AppSettings["idPortale"]);
                        cmd.Parameters.Add("@IDImmobileImportato", SqlDbType.Int).Value = ad.id;
                        cmd.Parameters.Add("@Lingua", SqlDbType.Char).Value = "IT";
                        cmd.Parameters.Add("@CodiceNazione", SqlDbType.Char).Value = "IT";
                        cmd.Parameters.Add("@CodiceComune", SqlDbType.VarChar).Value = ad.idcomune;

                        if (ad.idquartiere > 0)
                        {
                            cmd.Parameters.Add("@CodiceQuartiere", SqlDbType.Int).Value = ad.idquartiere;
                        }
                        else
                        {
                            cmd.Parameters.Add("@CodiceQuartiere", SqlDbType.Int).Value = DBNull.Value;
                        }
                        cmd.Parameters.Add("@IDQuartierePortale", SqlDbType.Int).Value = ad.idquartiereportale;
                        cmd.Parameters.Add("@Zona", SqlDbType.NVarChar).Value = ad.zona;
                        cmd.Parameters.Add("@Categoria", SqlDbType.TinyInt).Value = ad.idcategoria;
                        cmd.Parameters.Add("@Contratto", SqlDbType.Char).Value = ad.contratto;
                        cmd.Parameters.Add("@IDTipologia", SqlDbType.Int).Value = ad.idtipologia;
                        cmd.Parameters.Add("@NrLocali", SqlDbType.TinyInt).Value = ad.locali;
                        cmd.Parameters.Add("@Prezzo", SqlDbType.Money).Value = ad.price;
                        cmd.Parameters.Add("@MQSuperficie", SqlDbType.Int).Value = ad.mq;
                        cmd.Parameters.Add("@URLFotoPrincipale", SqlDbType.VarChar).Value = ad.imgSrc;
                        cmd.Parameters.Add("@URLImmobile", SqlDbType.VarChar).Value = ad.url;
                        cmd.Parameters.Add("@Testo", SqlDbType.NVarChar).Value = ad.title;
                        cmd.Parameters.Add("@DataInserimento", SqlDbType.DateTime).Value = ad.date;
                        cmd.Parameters.Add("@DataModifica", SqlDbType.DateTime).Value = ad.date;
                        r = cmd.ExecuteNonQuery();
                        esitoAnnunciLavorati += 1;
                    }
                }
            }
            catch (SqlException ex)
            {
                logger.Error("Errore esecuzione Stored Procedure ", ex);
                esitoErrors += 1;
                esitoErrorsNotes += "\r\nErrore esecuzione Stored Procedure. " + ex.Message;
            }
            return r;
        }
Exemple #3
0
        static void crawl(SqlConnection connImport01, string contratto, dbparams tip)
        {
            try
            {

                ArrayList urls = new ArrayList();
                foreach (string siglaProvincia in province)
                {
                
                    string categoria = "";
                    switch (tip.idcategoria)
                    {
                        case 1:
                            categoria = “xxxx”;
                            break;
                        case 2:
                            categoria = “xxxx”;
                            break;
                    }

                    
                    url uIT = new url();
                    
                    uIT.uri = string.Format(“xxxxxxxxxxxxxxxxxxx”,
                        contratto,
                        categoria,
                        tip.tipologia,
                        siglaProvincia.ToLower()
                        );
                    uIT.siglaprovincia = siglaProvincia;
                    urls.Add(uIT);
                }
                    for (int u = 0; u < urls.Count; u++)
                    {
                    ForUrls:
                        url uu = (url)urls[u];
                        logger.Info(uu.uri);
                        string baseUrl = uu.uri;
                        HtmlDocument basedom = new HtmlDocument();
                        basedom = Functions.GetHtmlDocumentByUrl(baseUrl, logger);
                        string baseHtml = basedom.DocumentNode.InnerHtml;
                        HtmlNode content = basedom.GetElementbyId("results");
                        if (content != null)
                        {
                            string pagination_results = Functions.extractFirstOccur(content.InnerHtml,
                                " di * xxxx”);
                            if (pagination_results.IsNumeric() && pagination_results.Trim() != "0")
                            {
                                string limit = pagination_results;
                                limit = limit.Replace(".", "");
                                if (Functions.IsNumeric(limit))
                                {
                                    int numPages = Convert.ToInt32(limit) / resultsPerPage;
                                    if (numPages * resultsPerPage < Convert.ToInt32(limit))
                                    {
                                        numPages += 1;
                                    }

                                    for (int x = 1; x <= numPages; x++)
                                    {
                                        try
                                        {
                                            Console.WriteLine("Pagina " + x.ToString());
                                            HtmlDocument pagedom = new HtmlDocument();
                                            if (x > 1)
                                            {
                                                pagedom = Functions.GetHtmlDocumentByUrl(baseUrl.Replace("lista-1", "lista-" + x.ToString()), logger);
                                            }
                                            else
                                            {
                                                pagedom = basedom;
                                            }

                                            HtmlNode searchResults = pagedom.GetElementbyId("searchResultsTbl");
                                            if (searchResults != null)
                                            {
                                                ArrayList results = Functions.getElementsByClass(searchResults, "resultBody first tier1", "resultBody tier1");
                                                foreach (HtmlNode hn in results)
                                                {
                                                    try
                                                    {
                                                        bool nuovoAnnuncio = hn.InnerHtml.Contains("<div class=\"newIcon\"></div>");
                                                        if (!nuovoAnnuncio)
                                                        {
                                                            u++;
                                                            if (u == urls.Count)
                                                            {
                                                                return;
                                                            }
                                                            else
                                                            {
                                                                goto ForUrls;
                                                            }

                                                        }
                                                        if (nuovoAnnuncio)
                                                        {
                                                            adResult ad = new adResult();
                                                            ad.idtipologia = tip.idtipologia;
                                                            ad.idcategoria = tip.idcategoria;
                                                            ad.url = Functions.extractFirstOccur(hn.InnerHtml, "href=\"*\"");
                                                            ad.id = hn.Id.Replace("t", "");

                                                            string city = Functions.retrieveStringAfterChars(ad.url, tip.tipologia + "-" + uu.siglaprovincia.ToLower() + "-");
                                                            city = Functions.retrieveStringBeforeChars(city, "-");
                                                            city = city.Replace("+", " ");
                                                            if (ad.url != "")
                                                            {
                                                                ad.url = ConfigurationManager.AppSettings["urlPortale"] + ad.url;
                                                            }
                                                            string imgSrc = Functions.extractFirstOccur(hn.InnerHtml, "data-src='*'");
                                                            if (imgSrc.Contains("placeholder.jpg"))
                                                            {
                                                                imgSrc = "";
                                                            }
                                                            ad.imgSrc = imgSrc;
                                                            ad.date = DateTime.Now;
                                                            comune comuneByName = getComuneByName(connImport01, city);
                                                            if (comuneByName != null)
                                                            {
                                                                ad.idcomune = comuneByName.IDComune.ToString();
                                                                ad.city = comuneByName.Comune;
                                                            }

                                                            if (ad.idcomune != "" && ad.idcomune != null)
                                                            {
                                                                string zone = Functions.extractFirstOccur(hn.InnerHtml, "<p class=\"zone\">*</p>");
                                                                if (comuniConQuartieri.Contains(city))
                                                                {
                                                                    quartiereportale qp = getQuartierePortaleByName(connImport01, comuneByName.IDComune, zone);
                                                                    if (qp != null)
                                                                    {
                                                                        ad.idquartiere = qp.idquartiere;
                                                                        ad.idquartiereportale = qp.nrtavola;
                                                                    }
                                                                }
                                                                else
                                                                {
                                                                    ad.idquartiereportale = 0;
                                                                    ad.idquartiere = 0;
                                                                }
                                                                ad.zona = zone;

                                                                HtmlDocument basedomScheda = new HtmlDocument();
                                                                basedomScheda = Functions.GetHtmlDocumentByUrl(ad.url, logger);
                                                                string htmlScheda = basedomScheda.DocumentNode.InnerHtml;
                                                                ad.title = Functions.extractFirstOccur(htmlScheda, "<title>*-", "", true);
                                                                Console.WriteLine(ad.id + " - " + ad.city + " - " + ad.title);
                                                                string pr = Functions.extractFirstOccur(htmlScheda, "class=\"price\"><span class=\"hidden\">€ *</span>", "", true).Replace(".", "");
                                                                if (Functions.IsNumeric(pr))
                                                                {
                                                                    ad.price = Convert.ToDecimal(pr);
                                                                }
                                                                else
                                                                {
                                                                    ad.price = 0;
                                                                }

                                                                string description = Functions.extractFirstOccur(htmlScheda, "<p class=\"body\">*</p>");
                                                                if (description != "")
                                                                {
                                                                    ad.title = description;
                                                                }
                                                                if (ad.title.Length > 2000)
                                                                {
                                                                    ad.title = ad.title.Substring(0, 1997) + "...";
                                                                }
                                                                ad.idtipologia = disambiguateTipologia(ad, tip);
                                                                string mq = Functions.extractFirstOccur(htmlScheda,
                                                                    "<li>Metri quadri:<span>* mq</span></li>");
                                                                string locali = Functions.extractFirstOccur(htmlScheda,
                                                                    "<li>Locali:<span>*</span></li>");
                                                                if (Functions.IsNumeric(locali))
                                                                {
                                                                    ad.locali = Convert.ToInt32(locali);
                                                                }
                                                                else
                                                                {
                                                                    ad.locali = 0;
                                                                }
                                                                if (Functions.IsNumeric(mq))
                                                                {
                                                                    ad.mq = Convert.ToInt32(mq);
                                                                }
                                                                else
                                                                {
                                                                    ad.mq = 0;
                                                                }
                                                                ad.contratto = contratto.Substring(0, 1).ToUpper();
                                                                saveAd(connImport01, ad);

                                                            }


                                                        }
                                                    }
                                                    catch (Exception ex)
                                                    {
                                                        Console.WriteLine(ex.Message);
                                                        logger.Error("Errore", ex);
                                                        esitoErrors += 1;
                                                        esitoErrorsNotes += "\r\n" + ex.Message;
                                                    }
                                                }

                                            }

                                        }
                                        catch (Exception ex)
                                        {
                                            Console.WriteLine(ex.Message);
                                            logger.Error("Errore", ex);
                                            esitoErrors += 1;
                                            esitoErrorsNotes += "\r\n" + ex.Message;
                                        }



                                    }
                                }
                            }

                        }

                    }

              



            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                logger.Error("Errore", ex);
                esitoErrors += 1;
                esitoErrorsNotes += "\r\n" + ex.Message;
            }
        }