Exemplo n.º 1
0
        public string TestXpath(string DetailUrl, string configXpath)
        {
            string Text = "";

            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
            string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(DetailUrl, 45, 2);

            html = html.Replace("<form", "<div");
            html = html.Replace("</form", "</div");
            doc.LoadHtml(html);
            if (!string.IsNullOrEmpty(configXpath))
            {
                var Nodes = doc.DocumentNode.SelectNodes(configXpath);
                if (Nodes != null)
                {
                    foreach (var node in Nodes)
                    {
                        Text = node.InnerText.ToString();
                    }
                }
            }
            else
            {
                Text = "Không phân tích được!";
            }
            return(Text);
        }
Exemplo n.º 2
0
        private void AnalysicProduct(string urlCurrent, GABIZ.Base.HtmlAgilityPack.HtmlDocument doc)
        {
            if (IsDetailUrl(urlCurrent, _detailLinkRegexs))
            {
                if (_company.Status == Common.CompanyStatus.TIN)
                {
                    Product product = new Product();
                    product.Analytics(doc, urlCurrent, _config, false, _company.Domain);
                }
                else
                {
                    var pt = new Product();
                    pt.Analytics(doc, urlCurrent, _config, false, _company.Domain);
                    if (pt.IsSuccessData(this._config.CheckPrice))
                    {
                        pt.Valid = false;
                        if (!IsExistsProduct(pt.ID))
                        {
                            if (!this._hsHashDuplicate.ContainsKey(pt.GetHashCheckDuplicate()))
                            {
                                _totalProductBefore++;
                                _productAdapter.InsertProduct(pt);

                                _redisLastUpdateProduct.UpdateBathLastUpdateProduct(this._companyId, new List <long> {
                                    pt.ID
                                }, DateTime.Now);
                                _cacheProductHash.SetCacheProductHash(_companyId, new List <QT.Entities.CrawlerProduct.Cache.ProductHash>
                                {
                                    new ProductHash()
                                    {
                                        HashChange    = pt.GetHashChange(),
                                        HashDuplicate = pt.GetHashCheckDuplicate(),
                                        Id            = pt.ID,
                                        Price         = pt.Price,
                                        url           = pt.DetailUrl,
                                        HashImage     = pt.GetHashImage(),
                                    }
                                });
                                //_productAdapter.PushMQChangeImage(new List<long> { pt.ID });
                                _mqLogChangePrice.PushQueueChangePriceLog(
                                    new JobRabbitChangePrice()
                                {
                                    Name      = pt.Name,
                                    OldPrice  = 0,
                                    NewPrice  = pt.Price,
                                    ProductID = pt.ID,
                                    CompanyID = pt.IDCongTy
                                });

                                AddToDuplicate(pt.GetHashCheckDuplicate(), pt.ID);
                            }
                            else
                            {
                                _log.Info("Duplicate data");
                            }
                        }
                    }
                }
            }
        }
Exemplo n.º 3
0
        private void btnDuyetTang_Click(object sender, EventArgs e)
        {
            ProductSaleNewDataAdapter productAdapter = new ProductSaleNewDataAdapter(new QT.Entities.Data.SqlDb(QT.Entities.Server.ConnectionStringCrawler));

            try
            {
                string root    = "http://bantoyota.com.vn";
                Uri    uriroot = new Uri(root);
                string html    = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(root, 45, 2);
                GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
                var nodeMakers = doc.DocumentNode.SelectNodes(@"//ul[@class='sub-menu']//li//a"); //Danh sách các dòng xe
                foreach (var nodeModelCar in nodeMakers)                                          //Vào node dòng xe
                {
                    string strModelCar = nodeModelCar.InnerText.Trim();
                    if (productAdapter.CheckExitFullLink("toyota->" + strModelCar) >= 0)
                    {
                        string urlToModel = nodeModelCar.Attributes["href"].Value.ToString();
                        urlToModel = uriroot.Scheme + @"://" + uriroot.Host + urlToModel;

                        GABIZ.Base.HtmlAgilityPack.HtmlDocument docModel = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                        docModel.LoadHtml(System.Web.HttpUtility.HtmlDecode(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlToModel, 45, 2)));
                        string xPathKeyWord  = "//div[@class='rightsearch classhot']//div[@class='item']//a";
                        var    nodesKeyWords = docModel.DocumentNode.SelectNodes(xPathKeyWord);
                        if (nodesKeyWords != null)
                        {
                            foreach (var nodeKeyWord in nodesKeyWords)
                            {
                                string keyWord          = nodeKeyWord.Attributes["title"].Value.Trim().ToLower().Replace("  ", "");
                                string urlDetailKeyWord = nodeKeyWord.Attributes["href"].Value.ToString();
                                urlDetailKeyWord = uriroot.Scheme + @"://" + uriroot.Host + urlDetailKeyWord;


                                GABIZ.Base.HtmlAgilityPack.HtmlDocument docKeyWordDetail = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                docKeyWordDetail.LoadHtml(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlDetailKeyWord, 45, 2));

                                string descXPath   = @"//meta[@name='description']/@content";
                                var    node        = docKeyWordDetail.DocumentNode.SelectSingleNode(descXPath);
                                string description = "";// (node == null) ? "" : Common.GetTextOfXPath(descXPath,,)[0];

                                //Lưu dữ liệu.
                                try
                                {
                                    productAdapter.SaveKeyWord("toyota", strModelCar, keyWord.Replace("bán xe", ""), keyWord, description);
                                }
                                catch (Exception ex)
                                {
                                    MessageBox.Show(ex.Message);
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
            MessageBox.Show("Hoàn tất!");
        }
Exemplo n.º 4
0
        private void AddCategoryName()
        {
            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
            DataTable tblCategory = sqlDb.GetTblData("select * from Category_Lazada");

            foreach (DataRow RowInfo in tblCategory.Rows)
            {
                long   ID  = QT.Entities.Common.Obj2Int64(RowInfo["ID"]);
                string url = QT.Entities.Common.Obj2String(RowInfo["Url"]);

                string xpathName = "//li[@class='last-child']//span[@class='header-breadcrumb__element']";
                doc.LoadHtml(this.GetHtml(url));
                var nodes = doc.DocumentNode.SelectNodes(xpathName);

                if (nodes != null)
                {
                    foreach (var node in nodes)
                    {
                        int    count = nodes.Count;
                        string name  = node.InnerText.ToString();
                        sqlDb.RunQuery("Update Category_Lazada set CategoryName = @CategoryName where ID = @ID", CommandType.Text, new System.Data.SqlClient.SqlParameter[] {
                            sqlDb.CreateParamteter("@CategoryName", name, SqlDbType.NVarChar),
                            sqlDb.CreateParamteter("@ID", ID, SqlDbType.BigInt)
                        });
                    }
                }

                Console.WriteLine("Success: " + ID);
            }
            Console.WriteLine("Done!");
            Console.ReadLine();
        }
Exemplo n.º 5
0
        private void ExtractionLink(GABIZ.Base.HtmlAgilityPack.HtmlDocument doc)
        {
            var nodeLinks = doc.DocumentNode.SelectNodes("//a[@href]");

            if (nodeLinks != null)
            {
                List <string> linkOfUrl = new List <string>();
                foreach (var itemNode in nodeLinks)
                {
                    linkOfUrl.Add(itemNode.Attributes["href"].Value.ToString());
                }
                foreach (string newLink in linkOfUrl)
                {
                    string newLinkFull = System.Web.HttpUtility.HtmlDecode(Common.GetAbsoluteUrl(newLink, _rootUri)).Trim();
                    if (newLinkFull.Length < MaxLengthUrl)
                    {
                        long crcNewLink = Common.GetIDProduct(newLinkFull);
                        if (!_visitedCrc.ContainsKey(crcNewLink) && !IsNoVisitUrl(newLinkFull) && IsVisitLink(newLinkFull))
                        {
                            _visitedCrc.Add(crcNewLink, true);
                            _linkQueue.Enqueue(newLinkFull);
                        }
                    }
                }
            }
        }
Exemplo n.º 6
0
        private void btnTest_Click(object sender, EventArgs e)
        {
            string             urlTest            = linkTestTextBox.Text;
            IDownloadHtml      dowloadHtml        = new DownloadHtmlCrawler();
            WebExceptionStatus webExceptionStatus = WebExceptionStatus.Success;
            string             html = System.Web.HttpUtility.HtmlDecode(dowloadHtml.GetHtml(urlTest, 45, 2, out webExceptionStatus));

            if (!string.IsNullOrEmpty(html))
            {
                HtmlDocument htmlDocument = new HtmlDocument();
                htmlDocument.LoadHtml(html);


                var     kerner      = new StandardKernel(new DomainModule());
                IParser parseNormal = kerner.Get <IParser>();
                parseNormal.Init(domainTextBox.Text);
                var propertiesData = parseNormal.ParseData(htmlDocument);
                if (propertiesData != null)
                {
                    txtProductTest.Text = propertiesData.GetJSonDisplay();
                }
            }
            else
            {
                MessageBox.Show("Can't download html");
            }
        }
Exemplo n.º 7
0
        public List <string> GetListTag(string url, string xpath)
        {
            QT.Entities.RaoVat.HandlerContentOfHtml handlerContentHtml = new Entities.RaoVat.HandlerContentOfHtml();
            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2, true));
            List <string> lstTag = QT.Entities.Common.GetTextInNode(doc, xpath);

            return(lstTag);
        }
Exemplo n.º 8
0
        private void StartCrawler()
        {
            Queue <QT.Moduls.Crawler.Job> queueWait = new Queue <QT.Moduls.Crawler.Job>();

            queueWait.Enqueue(new QT.Moduls.Crawler.Job()
            {
                url       = "http://www.lazada.vn",
                ProductId = QT.Entities.Common.CrcProductID("http://www.lazada.vn")
            });
            Dictionary <long, string> dicVited = new Dictionary <long, string>();

            do
            {
                QT.Moduls.Crawler.Job jobData = queueWait.Dequeue();
                GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(this.GetHtml(jobData.url));
                //Extraction
                var a_nodes = doc.DocumentNode.SelectNodes("//a[@href]");
                if (a_nodes != null)
                {
                    List <string> lstLink = new List <string>();
                    foreach (var itemNode in a_nodes)
                    {
                        lstLink.Add(itemNode.Attributes["href"].Value.ToString());
                    }
                    foreach (string aUrl in lstLink)
                    {
                        if (QT.Entities.Common.CheckRegex(aUrl, this.lstRegexOK, this.lstRegexIgone, false))
                        {
                            long LinkID = QT.Entities.Common.CrcProductID(aUrl);
                            bool bAdded = false;
                            addedQueue.TryGetValue(LinkID, out bAdded);
                            if (!bAdded)
                            {
                                this.addedQueue.Add(LinkID, true);
                                queueWait.Enqueue(new QT.Moduls.Crawler.Job()
                                {
                                    ConfigID  = 0,
                                    deep      = jobData.deep + 1,
                                    ProductId = LinkID,
                                    url       = aUrl
                                });
                            }
                        }
                    }
                    //ProductAnalysic
                    if (QT.Entities.Common.CheckRegex(jobData.url, this.lstRegexProduct, null, true))
                    {
                        this.sqlDb.RunQuery("Insert into Category_Lazada (ID, Url) Values (@ID, @Url)", System.Data.CommandType.Text,
                                            new System.Data.SqlClient.SqlParameter[] {
                            SqlDb.CreateParamteterSQL("@ID", jobData.ProductId, System.Data.SqlDbType.BigInt),
                            SqlDb.CreateParamteterSQL("@Url", jobData.url, System.Data.SqlDbType.NVarChar)
                        }, true, 10);
                    }
                }
            }while (queueWait.Count > 0);
        }
Exemplo n.º 9
0
        private void btnTestProduct_Click(object sender, EventArgs e)
        {
            string urlTest = urlTestTextBox.Text;

            if (!string.IsNullOrWhiteSpace(urlTest))
            {
                string url  = urlTestTextBox.Text;
                string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);
                GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();

                WebExceptionStatus status = WebExceptionStatus.Success;
                html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2, out status);
                doc.LoadHtml(html);

                ConfigXPaths config = this.raovatSqlAdapter.GetConfigByID((int)this.configXPathIDSpinEdit.Value);
                if (config == null)
                {
                    config = new ConfigXPaths()
                    {
                        ID = -1
                    }
                }
                ;
                if (this.LoadFormToConfig(ref config))
                {
                    var product = new ProductSaleNew();
                    int iError  = this.hanlerContentOfHtml.AnalyticsProductSaleNew(config.domain, urlTest, config, product,
                                                                                   this.raovatSqlAdapter.GetDicMapClassificationAndCategories(config.website_id),
                                                                                   this.raovatSqlAdapter.GetDicCityAndRegex());

                    FrmDataShow frmDataShow = new FrmDataShow(product.ToString());
                    frmDataShow.btnSave.Click += new EventHandler(delegate(object obj, EventArgs eventArg)
                    {
                        if (MessageBox.Show("Save to Cassandra?", "Warning", MessageBoxButtons.YesNo, MessageBoxIcon.Warning) == System.Windows.Forms.DialogResult.Yes)
                        {
                            bool bExits = this.mongoDbAdapter.CheckExistsProductSalenew(product.id);
                            if (bExits)
                            {
                                mongoDbAdapter.UpdateProduct(product);
                                mongoDbAdapter.SaveHtml(product.id, html, bExits);
                            }
                            else
                            {
                                mongoDbAdapter.InsertProduct(product);
                                mongoDbAdapter.SaveHtml(product.id, html, bExits);
                            }
                        }
                    });
                    frmDataShow.ShowDialog();
                }
            }
            else
            {
                MessageBox.Show("Not url test");
            }
        }
Exemplo n.º 10
0
        private void Extraction(HtmlDocument doc, JobFindNew job)
        {
            var countLinkAdds = 0;
            var countLinks    = 0;

            if (job.Deep > _config.MaxDeep)
            {
                _log.Info("Over dee. Not extraction");
                return;
            }
            else if (_visitedCrc.Count > _config.MaxLinksFindNew)
            {
                _log.Info("Over max link crc. Not extraction");
                return;
            }
            var nodeLinks = doc.DocumentNode.SelectNodes("//a[@href]");

            if (nodeLinks != null)
            {
                foreach (var nodelink in nodeLinks)
                {
                    countLinks++;
                    var link =
                        System.Web.HttpUtility.HtmlDecode(Common.GetAbsoluteUrl(nodelink.Attributes["href"].Value,
                                                                                _rootUri)).Trim();

                    if (_companyId == 480254425312154563 && link.Contains("sid"))
                    {
                        link = link.Substring(0, link.IndexOf("sid", StringComparison.Ordinal) - 1);
                    }

                    if (link.Length < MaxLengthUrl)
                    {
                        var crcNewLink = Common.GetIDProduct(link);
                        if (!_visitedCrc.Contains(crcNewLink) &&
                            !_crcProductOldGroup.Contains(crcNewLink) &&
                            !_hsDuplicateProduct.Contains(crcNewLink) &&
                            Common.CheckRegex(link, _config.VisitUrlsRegex, _config.NoVisitUrlRegex, false))
                        {
                            countLinkAdds++;
                            _visitedCrc.Add(crcNewLink);
                            _linkQueue.Enqueue(new JobFindNew()
                            {
                                Url      = link,
                                Deep     = job.Deep + 1,
                                ParentId = job.Id,
                                Id       = Common.CrcProductID(link)
                            });
                            _log.Debug("Add link to queue:" + link);
                        }
                    }
                }
            }
            _log.Info(GetPrefixLog() + string.Format("NumberLinkAdded {0}/{1}", countLinkAdds, countLinks));
        }
Exemplo n.º 11
0
        private void ProcessLink(JobFindNew jobCrawl, string html)
        {
            var doc = new HtmlDocument();

            doc.LoadHtml(html);
            if (IsDetailUrl(jobCrawl.Url))
            {
                Analysic(jobCrawl, doc);
            }
            Extraction(doc, jobCrawl);
        }
Exemplo n.º 12
0
        private static List <PropertyEntyties> GetListPropertiesFyi(string html)
        {
            List <PropertyEntyties> rlist = new List <PropertyEntyties>();

            try
            {
                GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
                var nodes   = doc.DocumentNode.SelectNodes("//tr");
                var nodesTD = doc.DocumentNode.SelectNodes("//tr[1]");
                if (nodes != null)
                {
                    string tenNhom = "", tenthuoctinh = "", giatri = "";
                    tenNhom = "Thông số chung";
                    PropertyEntyties item = new PropertyEntyties();
                    int  stt       = 1;
                    bool check3cot = false;
                    for (int i = 0; i < nodes.Count; i++)
                    {
                        //if (nodes[i].ChildNodes[1].InnerText.Trim().Length > 0)
                        //{
                        //    check3cot = true;
                        //}
                        //if (check3cot)
                        //{

                        //}
                        //else
                        //{
                        tenthuoctinh = nodes[i].ChildNodes[1].InnerText.Trim();
                        giatri       = nodes[i].ChildNodes[3].InnerText.Trim();
                        //}
                        //tenNhom = nodes[i].ChildNodes[1].InnerText.Trim();
                        //tenthuoctinh = nodes[i].ChildNodes[3].InnerText.Trim();
                        //giatri = nodes[i].ChildNodes[5].InnerText.Trim();
                        item          = new PropertyEntyties();
                        item.ID       = Common.GetID_Properties(tenthuoctinh + tenNhom);
                        item.IDType   = Common.GetID_Properties(tenNhom);
                        item.IDValue  = Common.GetID_Properties(giatri);
                        item.Name     = tenthuoctinh;
                        item.NameType = tenNhom;
                        item.Value    = giatri;
                        item.STT      = stt;
                        stt++;
                        rlist.Add(item);
                    }
                }
            }
            catch (Exception)
            {
            }
            return(rlist);
        }
Exemplo n.º 13
0
        private WebExceptionStatus Analysic(Product product, string url)
        {
            var outException = new WebExceptionStatus();
            var html         = this.GetHtmlCode(url, _config.UseClearHtml, out outException);

            if (html != "")
            {
                var doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
                product.Analytics(doc, url, _config, false, _company.Domain);
                product.Valid = product.IsSuccessData(_config.CheckPrice);
            }
            return(outException);
        }
Exemplo n.º 14
0
        private void Crawl()
        {
            InitSession();
            while (_linkQueue.Count > 0 &&
                   (DateTime.Now - startCrawler).TotalHours < _config.MaxHourFindNew &&
                   _countVisited < _config.MaxLinksFindNew)
            {
                try
                {
                    this._tokenCrawler.ThrowIfCancellationRequested();
                    DelayCrawler();
                    _urlCurrent = _linkQueue.Dequeue();

                    SetRunningCompany();

                    LogData(string.Format("THR: {4} Cmp: {5} Q: {0} cVs: {1}  cNP: {2} TTP: {6} cC: {7} Url: {3}"
                                          , _linkQueue.Count, _countVisited, _totalProductBefore
                                          , _urlCurrent, _indexThread
                                          , _company.Domain.PadRight(50, ' ')
                                          , _totalProduct
                                          , _countCompany));

                    if (!IsNoVisitUrl(_urlCurrent))
                    {
                        _countVisited++;
                        var html = GetHtmlCode(_urlCurrent, _config.UseClearHtml);
                        PushLogVisited(_urlCurrent, false);
                        if (html != "")
                        {
                            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                            doc.LoadHtml(html);
                            AnalysicProduct(_urlCurrent, doc);
                            ExtractionLink(doc);
                        }
                    }
                }
                catch (OperationCanceledException)
                {
                    throw;
                }
                catch (Exception ex01)
                {
                    _log.Error(ex01);
                }
            }
            CheckWarningOverMax();
            EndSession();
        }
Exemplo n.º 15
0
        public static void ShowProduct(long CompanyId)
        {
            Entities.Company company   = new Entities.Company(CompanyId);
            Configuration    config    = new Configuration(CompanyId);
            ProductParse     pp        = new ProductParse();
            ProductEntity    product   = new ProductEntity();
            string           detailUrl = config.LinkTest;

            GABIZ.Base.HtmlAgilityPack.HtmlDocument document = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
            pp.Analytics(product, document, config.LinkTest, config, config.Domain);
            string strDataShow = "";

            strDataShow    += string.Format("\r\n Name: {0}", product.Name);
            frmShow.Visible = true;
            frmShow.Show();
        }
Exemplo n.º 16
0
        public static string GetHtmlFromUrl(string url, bool UseClearHtml, int timeOut = 15, int loopTry = 2)
        {
            string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, timeOut, loopTry);

            if (html != "")
            {
                if (UseClearHtml)
                {
                    html = QT.Entities.Common.TidyCleanR(html);
                }
                GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                html = html.Replace("<form", "<div");
                html = html.Replace("</form", "</div");
            }
            return(html);
        }
Exemplo n.º 17
0
 private void btnRun_Click(object sender, EventArgs e)
 {
     Task.Factory.StartNew(new Action(() =>
     {
         SqlDb sqlDb   = new SqlDb(@"Data Source=WIN-6ICNIQVFE0A;Initial Catalog=SaleNews;Integrated Security=True");
         DataTable tbl = sqlDb.GetTblData("", CommandType.Text, new System.Data.SqlClient.SqlParameter[] { });
         foreach (DataRow rowInfo in tbl.Rows)
         {
             string key = "";
             GABIZ.Base.HtmlAgilityPack.HtmlDocument document = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
             string url = string.Format(@"https://www.google.com/?gws_rd=ssl#safe=off&q=con+ch%C3%B3", key.Replace(" ", "-"));
             document.Load(url);
             document.DocumentNode.SelectNodes("");
         }
     }));
 }
Exemplo n.º 18
0
 private void btnGetHTML_Click(object sender, EventArgs e)
 {
     try
     {
         var config = this.raovatSqlAdapter.GetConfigByID((int)this.configXPathIDSpinEdit.Value);
         GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
         string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlTestTextBox.Text, 45, 2, true);
         doc.LoadHtml(html);
         FrmDataShow fr = new FrmDataShow(html);
         fr.ShowDialog();
     }
     catch (Exception ex)
     {
         MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
     }
 }
Exemplo n.º 19
0
        public static List <string> GetWebsiteInGoogle(string Keyword)
        {
            string        xpath     = @"//div[@class='ads-visurl']/cite";
            string        url       = "https://www.google.com.vn/search?q=";
            StringBuilder sb        = new StringBuilder();
            List <string> listLinks = new List <string>();

            for (int i = 0; i < 10; i++)
            {
                url = "https://www.google.com.vn/search?q=" + System.Web.HttpUtility.UrlEncode(Keyword) + "&safe=off&start=" + (i * 10).ToString();
                try
                {
                    Uri            urlRoot = new Uri(url, UriKind.RelativeOrAbsolute);
                    HttpWebRequest oReq    = (HttpWebRequest)WebRequest.Create(urlRoot);
                    oReq.AllowAutoRedirect = true;
                    oReq.UserAgent         = @"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.152 Safari/537.22";
                    oReq.Timeout           = 3000;
                    HttpWebResponse resp     = (HttpWebResponse)oReq.GetResponse();
                    var             encoding = Encoding.GetEncoding(resp.CharacterSet);
                    if (resp.ContentType.StartsWith("text/html", StringComparison.InvariantCultureIgnoreCase))
                    {
                        GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                        var resultStream = resp.GetResponseStream();
                        doc.Load(resultStream, encoding);
                        #region Get Value
                        GABIZ.Base.HtmlAgilityPack.HtmlNodeCollection node = doc.DocumentNode.SelectNodes(xpath);
                        if (node != null)
                        {
                            foreach (GABIZ.Base.HtmlAgilityPack.HtmlNode item in node)
                            {
                                string strLink = item.InnerText;
                                string Domain  = QT.Entities.Common.GetDomainFromUrl(strLink);
                                string Website = QT.Entities.Common.GetWebsiteFromUrl(strLink);
                                listLinks.Add(strLink);
                            }
                        }
                        #endregion
                        resultStream.Close();
                    }
                    resp.Close();
                }
                catch (Exception ex01)
                {
                }
            }
            return(listLinks);
        }
Exemplo n.º 20
0
        private void Analysic(JobFindNew jobCrawl, HtmlDocument doc)
        {
            if (_company.Status == Common.CompanyStatus.TIN)
            {
                var product = new Product();
                product.Analytics(doc, jobCrawl.Url, _config, false, _company.Domain);
            }
            else
            {
                var product      = new ProductEntity();
                var productParse = new ProductParse();
                productParse.Analytics(product, doc, jobCrawl.Url, _config, _company.Domain);

                if (product.IsSuccessData(_config.CheckPrice))
                {
                    product.Valid = false;
                    if (!IsExistsProduct(product.ID))
                    {
                        if (!_dicDuplicate.ContainsKey(product.GetHashDuplicate()))
                        {
                            product.StatusChange.IsNew = true;
                            PushChangeProduct(product);
                            _dicDuplicate.Add(product.GetHashDuplicate(), product.ID);
                            _crcProductOldGroup.Add(product.ID);
                            _countNewProduct++;
                        }

                        else
                        {
                            _producerDuplicateProduct.PublishString(
                                Newtonsoft.Json.JsonConvert.SerializeObject(new ProductDuplicate()
                            {
                                CId   = _companyId,
                                Id    = product.ID,
                                Hash  = product.GetHashDuplicate(),
                                IdDup = _dicDuplicate[product.GetHashDuplicate()],
                                Url   = product.DetailUrl
                            }), true);
                        }
                    }
                }
            }
        }
Exemplo n.º 21
0
        private string GetDescription(string html, Configuration configXPath)
        {
            List <string> lstDescripotionHtml = new List <string>();

            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(html);
            if (configXPath.ShortDescriptionXPath != null && configXPath.ShortDescriptionXPath.Count > 0)
            {
                for (int i = 0; i < configXPath.ShortDescriptionXPath.Count; i++)
                {
                    if (configXPath.ShortDescriptionXPath[i].Trim() != "")
                    {
                        var node_ShortDescription = doc.DocumentNode.SelectSingleNode(configXPath.ShortDescriptionXPath[i]);
                        if (node_ShortDescription != null)
                        {
                            lstDescripotionHtml.Add(node_ShortDescription.OuterHtml);
                        }
                    }
                }
            }
            return(string.Join("||||", lstDescripotionHtml));
        }
Exemplo n.º 22
0
        public List <string> GetProxy()
        {
            List <string> lstProxy = new List <string>();

            foreach (ConfigWebsite configItem in FactoryConfigWebsite.Instance().GetListConfigWebsite())
            {
                foreach (string url in configItem.RootLinks)
                {
                    GABIZ.Base.HtmlAgilityPack.HtmlDocument document = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                    document.LoadHtml(GetHtmlCode(url));
                    var nodes = document.DocumentNode.SelectNodes(configItem.XPath);
                    if (nodes != null && nodes.Count > 0)
                    {
                        foreach (var aNode in nodes)
                        {
                            lstProxy.Add(aNode.Attributes["href"].Value.ToString());
                        }
                    }
                }
            }
            return(lstProxy);
        }
Exemplo n.º 23
0
        private static List <PropertyEntyties> GetListPropertiesVatGia(string html)
        {
            List <PropertyEntyties> rlist = new List <PropertyEntyties>();

            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(html);
            var nodes = doc.DocumentNode.SelectNodes("//tr");

            if (nodes != null)
            {
                string           tenNhom = "", tenthuoctinh = "", giatri = "";
                PropertyEntyties item = new PropertyEntyties();
                int stt = 1;
                for (int i = 0; i < nodes.Count; i++)
                {
                    if (nodes[i].Attributes.Count == 0)
                    {
                        // tên nhóm
                        tenNhom = nodes[i].ChildNodes[1].InnerText.Trim();
                    }
                    else
                    {
                        tenthuoctinh  = nodes[i].ChildNodes[1].InnerText.Trim();
                        giatri        = nodes[i].ChildNodes[3].InnerText.Trim();
                        item          = new PropertyEntyties();
                        item.ID       = Common.GetID_Properties(tenthuoctinh + tenNhom);
                        item.IDType   = Common.GetID_Properties(tenNhom);
                        item.IDValue  = Common.GetID_Properties(giatri);
                        item.Name     = tenthuoctinh;
                        item.NameType = tenNhom;
                        item.Value    = giatri;
                        item.STT      = stt;
                        stt++;
                        rlist.Add(item);
                    }
                }
            }
            return(rlist);
        }
Exemplo n.º 24
0
        public void Start()
        {
            string        patternQuery = "update company set ConfigSuccess = {0} where id = {1}";
            List <string> query        = new List <string>();

            QT.Entities.Server.ConnectionString = this.connectionString;
            ProductAdapter productAdapter = new ProductAdapter(new SqlDb(QT.Entities.Server.ConnectionString));

            foreach (DataRow row in productAdapter.GetLinkTestCrawlerAllCompany().Rows)
            {
                try
                {
                    Thread.Sleep(1000);
                    long    CompanyID    = Convert.ToInt64(row["Id"]);
                    string  domain       = Convert.ToString(row["Domain"]);
                    string  LinkAutoTest = Convert.ToString(row["LinkAutoTest"]);
                    var     config       = new Configuration(CompanyID);
                    bool    IsOK         = false;
                    Product product      = new Product();
                    string  html         = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHtmlNomarlTag(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(LinkAutoTest, 45, 2));
                    if (html != "")
                    {
                        GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                        doc.LoadHtml(html);
                        product.Analytics(doc, LinkAutoTest, config, true, domain, null);
                        IsOK = product.IsSuccessData(config.CheckPrice);
                        query.Add(string.Format(patternQuery, IsOK == true ? "1" : "0", CompanyID));
                        if (query.Count > 10)
                        {
                        }
                    }
                }
                catch (Exception ex01)
                {
                    log.Error(ex01);
                }
            }
        }
Exemplo n.º 25
0
 private void EventCheckXPaths(object sender, KeyEventArgs e)
 {
     if (e.KeyCode == Keys.F10)
     {
         try
         {
             string url  = urlTestTextBox.Text;
             string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);
             GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
             doc.LoadHtml(html);
             var sXPaths = (sender as RichTextBox).Text.Trim().Split(SqlDb.arSplit, 100, StringSplitOptions.RemoveEmptyEntries);
             if (sXPaths != null)
             {
                 foreach (var xPath in sXPaths)
                 {
                     var nodes = doc.DocumentNode.SelectNodes(xPath);
                     if (nodes != null)
                     {
                         foreach (var node1 in nodes)
                         {
                             MessageBox.Show(node1.InnerText);
                         }
                     }
                     else
                     {
                         MessageBox.Show("NoData", "NoData", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                     }
                 }
             }
         }
         catch (Exception ex)
         {
             MessageBox.Show(ex.Message, "Error XPaths Config", MessageBoxButtons.OK, MessageBoxIcon.Error);
         }
     }
 }
Exemplo n.º 26
0
 private void btnGetProductLInk_Click(object sender, EventArgs e)
 {
     try
     {
         var           config         = this.raovatSqlAdapter.GetConfigByID((int)this.configXPathIDSpinEdit.Value);
         List <string> lstLink        = Common.GetListXPathFromString(urlTestTextBox.Text);
         List <string> lstExtractLink = new List <string>();
         foreach (string str in lstLink)
         {
             GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
             string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(str, 45, 2, true);
             doc.LoadHtml(html);
             var nodes = doc.DocumentNode.SelectNodes(@"//a[@href]");
             if (nodes != null)
             {
                 foreach (var node in nodes)
                 {
                     string url = Common.GetAbsoluteUrl(node.Attributes["href"].Value.Trim(), config.domain);
                     if (QT.Entities.Common.CheckRegex(url, config.ProductUrlsRegex, config.NoProductUrlRegex, false))
                     {
                         if (!lstExtractLink.Contains(url.Trim()))
                         {
                             lstExtractLink.Add(url.Trim());
                         }
                     }
                 }
             }
         }
         FrmDataShow fr = new FrmDataShow(QT.Entities.Common.ConvertToString(lstExtractLink, "\n"));
         fr.ShowDialog();
     }
     catch (Exception ex)
     {
         MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
     }
 }
Exemplo n.º 27
0
        public void Analysic()
        {
            try
            {
                long                productID      = 0;
                ProductAdapter      productAdapter = new ProductAdapter(sqldb);
                Configuration       configXPath    = new Configuration(companyID);
                QT.Entities.Company company        = new Entities.Company(companyID);
                DataTable           tblProduct     = sqldb.GetTblData("Select ID,DetailUrl From Product Where Company = @CompanyID", CommandType.Text,
                                                                      new SqlParameter[] {
                    SqlDb.CreateParamteterSQL("@CompanyID", companyID, SqlDbType.BigInt)
                }, null, true);
                foreach (DataRow rowInfo in tblProduct.Rows)
                {
                    productID = QT.Entities.Common.Obj2Int64(rowInfo["ID"]);
                    if (!DicDetailUrl.ContainsKey(QT.Entities.Common.Obj2String(rowInfo["DetailUrl"])))
                    {
                        sqldb.RunQuery("update Product set Valid = 0 where Company = @CompanyID and ID = @productID", CommandType.Text, new SqlParameter[] {
                            sqldb.CreateParamteter("@CompanyID", companyID, SqlDbType.BigInt),
                            sqldb.CreateParamteter("@productID", productID, SqlDbType.BigInt)
                        });
                    }
                }
                foreach (var DetailUrl in DicDetailUrl)
                {
                    string strDetailUrl = DetailUrl.Key.ToString();
                    productID = QT.Entities.Common.GetIDProduct(strDetailUrl);
                    if (this.bDeleteProductData)
                    {
                        sqldb.RunQuery("delete product where id = @id", CommandType.Text, new SqlParameter[] {
                            SqlDb.CreateParamteterSQL("@id", productID, SqlDbType.BigInt)
                        });
                    }
                    Product pt = new Product();
                    GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(GetHtmlCode(strDetailUrl, false));
                    pt.Analytics(doc, strDetailUrl, configXPath, false, company.Domain, null);
                    if (pt.IsSuccessData(this.config.CheckPrice))
                    {
                        productAdapter.InsertProduct(pt);
                        lstProductIDChangeImage.Add(productID);

                        this.Invoke(new Action(() =>
                        {
                            richTextBox1.AppendText("\r\nSuccess link: " + strDetailUrl);
                        }));
                    }
                    else
                    {
                        this.Invoke(new Action(() =>
                        {
                            richTextBox1.AppendText("\r\nNo product link: " + strDetailUrl);
                        }));
                    }
                }
            }
            catch (Exception ex)
            {
                this.Invoke(new Action(() =>
                {
                    MessageBox.Show(ex.Message + ex.StackTrace);
                }));
            }
            MessageBox.Show(string.Format("Crawler {0} \nSuccess Link {1} \nFail link {2}", DicDetailUrl.Count, lstProductIDChangeImage.Count, (DicDetailUrl.Count - lstProductIDChangeImage.Count)));
        }
Exemplo n.º 28
0
        public void StartCrawler()
        {
            log.InfoFormat("START CRALWER:{0}", this.idCrawler);
            if (this.eventWhenStart != null)
            {
                this.eventWhenStart(this, "Started");
            }
            Job task = null;

            while (!this.IsEnded && !(this.eventCheckOutSide != null && this.eventCheckOutSide(this)))
            {
                task = queueWaitRun.GetJob();
                if (task == null)
                {
                    if (AddJobToQueue())
                    {
                        continue;
                    }
                    else
                    {
                        break;
                    }
                }
                else if (task != null)
                {
                    if (this.eventWhenGetJob != null)
                    {
                        this.eventWhenGetJob(this, task.ToString());
                    }
                    if (!CheckStopCrawler(task))
                    {
                        string html = GetHtmlOfWeb(task.url);
                        if (html != "")
                        {
                            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                            doc.LoadHtml(html);

                            //Extraction=
                            if (CheckExtractionLink(task))
                            {
                                var a_nodes = doc.DocumentNode.SelectNodes("//a[@href]");
                                if (a_nodes != null)
                                {
                                    #region add link to process
                                    for (int i = 0; i < a_nodes.Count; i++)
                                    {
                                        string s           = QT.Entities.Common.GetAbsoluteUrl(a_nodes[i].Attributes["href"].Value, this.Domain);
                                        string compactLink = QT.Entities.Common.CompactUrl(s);
                                        int    s_crc       = Math.Abs(GABIZ.Base.Tools.getCRC32(QT.Entities.Common.CompactUrl(s)));
                                        if (CheckRegexVisit(s) && !setAddedQueue.Exists(s_crc))
                                        {
                                            try
                                            {
                                                //Thêm vào danh sách đã duyệt.
                                                this.setAddedQueue.Add(s_crc, s);

                                                //Đẩy thêm việc vào queue.
                                                this.PushQueue(new Job()
                                                {
                                                    deep = task.deep + 1,
                                                    url  = s
                                                });
                                            }
                                            catch (Exception ex2)
                                            {
                                                log.ErrorFormat(ex2.Message);
                                            }
                                        }
                                    }
                                    #endregion
                                }
                            }

                            //AnalysicProduct.
                            if (CheckRegexProduct(QT.Entities.Common.CompactUrl(task.url)))
                            {
                                ProcessProductData(task, doc);
                            }
                        }
                        this.UpdateProcessedJob(task);
                    }
                }
            }
            if (this.eventWhenEnd != null)
            {
                this.eventWhenEnd(this, "End");
            }

            UpdateWhenEnd();

            CleanDataAfterCrawler();
        }
Exemplo n.º 29
0
        protected override void OnStart(string[] args)
        {
            log.Info("Start service");
            try
            {
                InitializeComponent();
                cancelTokenSource = new CancellationTokenSource();
                string rabbitMQServerName = ConfigurationManager.AppSettings["rabbitMQServerName"];
                workers        = new Worker[workerCount];
                rabbitMQServer = RabbitMQManager.GetRabbitMQServer(rabbitMQServerName);

                string connectToSQL        = @"Data Source=172.22.30.86,1455;Initial Catalog=QT_2;Persist Security Info=True;User ID=qt_vn;Password=@F4sJ=l9/ryJt9MT;connection timeout=200";
                string connectToConnection = @"Data Source=42.112.28.93;Initial Catalog=QT_2;Persist Security Info=True;User ID=wss_price;Password=HzlRt4$$axzG-*UlpuL2gYDu;connection timeout=200";
                CrawlerProductAdapter crawlerProductAdapter = new CrawlerProductAdapter(new SqlDb(connectToSQL));
                ProductAdapter        productAdapter        = new ProductAdapter(new SqlDb(connectToConnection));


                for (int i = 0; i < workerCount; i++)
                {
                    log.InfoFormat("Start worker {i}", i.ToString());
                    var worker = new Worker(AddProductToSqlJobName, false, rabbitMQServer);
                    workers[i] = worker;
                    var  token      = this.cancelTokenSource.Token;
                    Task workerTask = new Task(() =>
                    {
                        worker.JobHandler = (downloadImageJob) =>
                        {
                            try
                            {
                                token.ThrowIfCancellationRequested();

                                QT.Entities.CrawlerProduct.RabbitMQ.MsSaveProduct Mss = QT.Entities.CrawlerProduct.RabbitMQ.MsSaveProduct.GetDataFromMessage(downloadImageJob.Data);
                                string Url     = Mss.Url;
                                string Domain  = QT.Entities.Common.GetDomainFromUrl(Url);
                                long CompanyID = QT.Entities.Common.GetIDCompany(Domain);
                                QT.Entities.Configuration config = new QT.Entities.Configuration(CompanyID);
                                if (_company.Status == Common.CompanyStatus.WEB_CRAWLERDOMAIN)
                                {
                                    List <QT.Entities.Company> ls = new List <QT.Entities.Company>();
                                    QT.Entities.CrawlerDomain obj = new CrawlerDomain();
                                    string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(Url.Trim(), 15, 1);
                                    GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                    html = html.Replace("<form", "<div");
                                    html = html.Replace("</form", "</div");
                                    doc.LoadHtml(html);
                                }
                                else
                                {
                                    int numberItemSaved = 0;
                                    string[] arLink     = Url.Trim().Split(SqlDb.arSplit, StringSplitOptions.RemoveEmptyEntries);
                                    foreach (var item in arLink)
                                    {
                                        QT.Entities.Product _product = new Product();
                                        string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(item, 45, 2);
                                        if (config.ContentAnanyticXPath.Count >= 1)
                                        {
                                            int i1 = 0, i2 = 0;
                                            i1     = html.IndexOf(config.ContentAnanyticXPath[0]);
                                            if (i1 >= 0)
                                            {
                                                html = html.Substring(i1);
                                                if (config.ContentAnanyticXPath.Count >= 2)
                                                {
                                                    i2 = html.IndexOf(config.ContentAnanyticXPath[1]);
                                                    if (i2 >= 0)
                                                    {
                                                        html = html.Substring(0, i2 + config.ContentAnanyticXPath[1].Length);
                                                    }
                                                }
                                            }
                                            html = html.Replace("<form", "<div");
                                            html = html.Replace("</form", "</div");
                                            html = Common.TidyCleanR(html);
                                        }

                                        _htmlSource = html;
                                        GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                        html = html.Replace("<form", "<div");
                                        html = html.Replace("</form", "</div");
                                        doc.LoadHtml(html);

                                        List <Product> lstUpdateProduct = new List <Product>();
                                        List <Product> lstInsertProduct = new List <Product>();

                                        _product.Analytics(doc, item, config, true, _company.Domain);

                                        if (_product != null && _product.IsSuccessData(config.CheckPrice))
                                        {
                                            numberItemSaved++;
                                            if (productAdapter.CheckExistInDb(_product.ID))
                                            {
                                                lstUpdateProduct.Add(_product);
                                            }
                                            else
                                            {
                                                lstInsertProduct.Add(_product);
                                            }

                                            productAdapter.UpdateProductsChangeToDb(lstUpdateProduct);
                                            productAdapter.InsertListProduct(lstInsertProduct);

                                            productAdapter.PushQueueIndexCompany(config.CompanyID);
                                            productAdapter.PushQueueChangeChangeImage(new MQChangeImage()
                                            {
                                                ProductID = _product.ID,
                                                Type      = 1
                                            });

                                            log.InfoFormat("Saved {0} item product!", _product.Name);
                                        }
                                    }
                                }

                                return(true);
                            }
                            catch (OperationCanceledException opc)
                            {
                                log.Info("End worker");
                                return(false);
                            }
                        };
                        worker.Start();
                    }, token);
                    workerTask.Start();
                    log.InfoFormat("Worker {0} started", i);
                }
            }
            catch (Exception ex)
            {
                log.Error("Start error", ex);
                throw;
            }
        }
Exemplo n.º 30
0
        private void DoCrawler()
        {
            Dictionary <long, int[]>   dicMapClassAndCategori = this.raovatSqlAdapter.GetDicMapClassificationAndCategories(this.websiteRaoVat.id);
            Dictionary <int, string[]> dicMapCity             = this.raovatSqlAdapter.GetDicCityAndRegex();

            while (true)
            {
                try
                {
                    int igone = 0;

                    //Khởi tạo.
                    Queue <JobCrawlerSale>    queueUrl   = new Queue <JobCrawlerSale>();
                    Dictionary <long, string> dicVisited = new Dictionary <long, string>();
                    foreach (var item in this.RunnerCrawler.root_link)
                    {
                        queueUrl.Enqueue(new JobCrawlerSale()
                        {
                            deep = 0,
                            url  = item
                        });
                    }
                    this.ShowQueue(queueUrl.Count);

                    while (!this.Pause && queueUrl != null && queueUrl.Count > 0)
                    {
                        JobCrawlerSale job = queueUrl.Dequeue();

                        ShowUrlCurrent(job.url);
                        ShowQueue(queueUrl.Count);

                        if (configXPath.TimeDelay > 0)
                        {
                            Thread.Sleep(configXPath.TimeDelay);
                        }
                        string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(job.url, 45, 2);
                        if (!string.IsNullOrEmpty(html))
                        {
                            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                            doc.LoadHtml(html);

                            //Extraction.
                            var a_nodes = doc.DocumentNode.SelectNodes("//a[@href]");
                            if (a_nodes != null)
                            {
                                foreach (var a_node in a_nodes)
                                {
                                    string url1       = QT.Entities.Common.GetAbsoluteUrl(a_node.Attributes["href"].Value, this.websiteRaoVat.base_link);
                                    string compacLink = QT.Entities.Common.CompactUrl(url1);
                                    long   s_crc      = Math.Abs(GABIZ.Base.Tools.getCRC64(compacLink));
                                    if (!dicVisited.ContainsKey(s_crc))
                                    {
                                        dicVisited.Add(s_crc, "");
                                        ShowVisited(dicVisited.Count);

                                        bool bRegexProduct = QT.Entities.Common.CheckRegex(compacLink, configXPath.ProductUrlsRegex, configXPath.NoProductUrlRegex, false);
                                        bool bRegexExtract = QT.Entities.Common.CheckRegex(compacLink, configXPath.VisitUrlsRegex, configXPath.NoVisitUrlRegex, false);
                                        if (bRegexExtract)
                                        {
                                            if (job.deep + 1 < this.RunnerCrawler.max_deep)
                                            {
                                                queueUrl.Enqueue(new JobCrawlerSale()
                                                {
                                                    url  = url1,
                                                    deep = job.deep + 1
                                                });
                                                ShowQueue(queueUrl.Count);
                                            }
                                        }
                                        else
                                        {
                                            if (bRegexProduct)
                                            {
                                                queueUrl.Enqueue(new JobCrawlerSale()
                                                {
                                                    url  = url1,
                                                    deep = job.deep + 1
                                                });
                                                ShowQueue(queueUrl.Count);
                                            }
                                        }
                                    }
                                }
                            }

                            //AnalysicData.
                            if (QT.Entities.Common.CheckRegex(
                                    QT.Entities.Common.CompactUrl(job.url), configXPath.ProductUrlsRegex, configXPath.NoProductUrlRegex, false))
                            {
                                QT.Entities.RaoVat.HandlerContentOfHtml handlerContentHtml = new Entities.RaoVat.HandlerContentOfHtml();
                                ProductSaleNew productSaleNew = new ProductSaleNew();
                                handlerContentHtml.AnalyticsProductSaleNew(websiteRaoVat.domain, job.url, doc, configXPath
                                                                           , productSaleNew, dicMapClassAndCategori, dicMapCity);

                                if (productSaleNew.IsDetailSucess)
                                {
                                    //SaveClassification
                                    try
                                    {
                                        this.raovatSqlAdapter.SaveClassification(productSaleNew.website_id, productSaleNew.web_category);
                                    }
                                    catch (Exception ex01)
                                    {
                                    }

                                    if (!this.mongoDbAdapter.CheckExistsProductSalenew(productSaleNew.id))
                                    {
                                        this.mongoDbAdapter.InsertProduct(productSaleNew);
                                    }
                                    else
                                    {
                                        this.mongoDbAdapter.UpdateProduct(productSaleNew);
                                    }
                                    ShowProduct(productSaleNew);
                                }
                                else
                                {
                                    ShowIgone(igone++);
                                }
                            }
                        }
                    }

                    this.Invoke(new Action(() =>
                    {
                        richTextBox1.AppendText("\n\rWait to next run!");
                    }));

                    Thread.Sleep(10000);
                }
                catch (ThreadAbortException threadAbortException)
                {
                    return;
                }
            }
        }