Esempio n. 1
0
 public List <string> GetImage(Uri uri, HtmlDocument document, ConfigXPaths configXPath)
 {
     try
     {
         List <string> lstReuslt = new List <string>();
         List <string> lst       = configXPath.ImageUrlsXPaths;
         if (lst != null && lst.Count > 0)
         {
             foreach (string str in lst)
             {
                 List <string> lstImage = Common.GetTextInNode(document, str, configXPath);
                 if (lstImage != null)
                 {
                     foreach (string s in lstImage)
                     {
                         lstReuslt.Add(Common.GetAbsoluteUrl(s, uri));
                     }
                 }
             }
         }
         return(lstReuslt);
     }
     catch (Exception ex)
     {
         logErrorDb.SaveError(LogErrorToDb.errorParseImage, uri.AbsoluteUri, configXPath.ID, ex.Message);
         return(null);
     }
 }
Esempio n. 2
0
 /// <summary>
 /// Trả về số dưựa ào config list
 /// 1 phần tử có dạng: Regex:Int.
 /// Ví dụ: *.ô tô.*:9
 /// </summary>
 /// <param name="uri"></param>
 /// <param name="lstRegexToInt"></param>
 /// <param name="WebCategory"></param>
 /// <param name="doc"></param>
 /// <returns></returns>
 public int RecognizeCategory(string url, ConfigXPaths config, string WebCategory, int defaultReturn = 0)
 {
     try
     {
         int           iCat          = 0;
         List <string> lstRegexToInt = config.RegexStringToCategory;
         if (lstRegexToInt != null && lstRegexToInt.Count > 0)
         {
             foreach (string aRule in lstRegexToInt)
             {
                 if (aRule.Contains(":"))
                 {
                     string[] regexs = aRule.Split(new char[] { ':' });
                     if (regexs.Length > 1)
                     {
                         if (Regex.IsMatch(WebCategory, regexs[0]))
                         {
                             if (Int32.TryParse(regexs[1], out iCat))
                             {
                                 return(iCat);
                             }
                         }
                     }
                 }
             }
         }
         return(defaultReturn);
     }
     catch (Exception ex)
     {
         this.logErrorDb.SaveError(LogErrorToDb.errorRegonizeCategory, url, config.ID, ex.Message);
         return(0);
     }
 }
Esempio n. 3
0
        private void btnTestProduct_Click(object sender, EventArgs e)
        {
            string urlTest = urlTestTextBox.Text;

            if (!string.IsNullOrWhiteSpace(urlTest))
            {
                string url  = urlTestTextBox.Text;
                string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);
                GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();

                WebExceptionStatus status = WebExceptionStatus.Success;
                html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2, out status);
                doc.LoadHtml(html);

                ConfigXPaths config = this.raovatSqlAdapter.GetConfigByID((int)this.configXPathIDSpinEdit.Value);
                if (config == null)
                {
                    config = new ConfigXPaths()
                    {
                        ID = -1
                    }
                }
                ;
                if (this.LoadFormToConfig(ref config))
                {
                    var product = new ProductSaleNew();
                    int iError  = this.hanlerContentOfHtml.AnalyticsProductSaleNew(config.domain, urlTest, config, product,
                                                                                   this.raovatSqlAdapter.GetDicMapClassificationAndCategories(config.website_id),
                                                                                   this.raovatSqlAdapter.GetDicCityAndRegex());

                    FrmDataShow frmDataShow = new FrmDataShow(product.ToString());
                    frmDataShow.btnSave.Click += new EventHandler(delegate(object obj, EventArgs eventArg)
                    {
                        if (MessageBox.Show("Save to Cassandra?", "Warning", MessageBoxButtons.YesNo, MessageBoxIcon.Warning) == System.Windows.Forms.DialogResult.Yes)
                        {
                            bool bExits = this.mongoDbAdapter.CheckExistsProductSalenew(product.id);
                            if (bExits)
                            {
                                mongoDbAdapter.UpdateProduct(product);
                                mongoDbAdapter.SaveHtml(product.id, html, bExits);
                            }
                            else
                            {
                                mongoDbAdapter.InsertProduct(product);
                                mongoDbAdapter.SaveHtml(product.id, html, bExits);
                            }
                        }
                    });
                    frmDataShow.ShowDialog();
                }
            }
            else
            {
                MessageBox.Show("Not url test");
            }
        }
Esempio n. 4
0
 /// <summary>
 /// 0-Full.
 /// 1-RealTime.
 /// </summary>
 /// <param name="iType"></param>
 public SimpleCrawlerRaoVat(int ConfigID, ETypeCrawlRaoVat TypeCrawler)
 {
     this.sqlDb            = new SqlDb(QT.Entities.Server.ConnectionString);
     this.iTypeCrawler     = TypeCrawler;
     this.setAddedQueue    = new SetCrawlerRaoVat(this.sqlDb, ConfigID, Convert.ToInt32(this.iTypeCrawler));
     this.queueWaitRun     = new QueueCrawlerRaoVat(this.sqlDb, ConfigID, Convert.ToInt32(this.iTypeCrawler));
     this.sqlRaoVatAdapter = new RaoVatSQLAdapter(this.sqlDb);
     this.configXPath      = sqlRaoVatAdapter.GetConfigByID(ConfigID);
     this.Domain           = this.configXPath.domain;
     this.mongoDbAdapter   = new MongoDbRaoVat();
     this.dicMapClassificationAndCategories = this.sqlRaoVatAdapter.GetDicMapClassificationAndCategories(this.configXPath.website_id);
     this.dicCityAndRegex = this.sqlRaoVatAdapter.GetDicCityAndRegex();
 }
Esempio n. 5
0
        public ConfigXPaths GetConfigByID(int ConfigID)
        {
            ConfigXPaths config = null;
            RowSet       rows   = this._session.Execute(string.Format("SELECT * FROM Config_Crawl WHERE ID = {0}", ConfigID), 1);

            if (rows.Info.TriedHosts.Count == 1)
            {
                foreach (Row row in rows)
                {
                    config                   = new ConfigXPaths();
                    config.ID                = Convert.ToInt32(row.GetValue(typeof(int), CassandraColumn.id));
                    config.AddressXPaths     = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.address_xpaths));
                    config.AvaiableXPaths    = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.avaiable_xpaths));
                    config.CategoryXPaths    = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.category_xpaths));
                    config.ContentXPaths     = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.content_xpaths));
                    config.ImageUrlsXPaths   = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.image_url_xpaths));
                    config.ProvinceXPaths    = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.province_xpaths));
                    config.QualityXPaths     = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.quality_xpaths));
                    config.ThumbUrlXPaths    = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.thumb_url_xpaths));
                    config.TitleXPaths       = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.title_xpaths));
                    config.WebCategoryXPaths = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.web_category_xpaths));
                    config.PostDateXPaths    = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.post_date_xpaths));
                    config.LastChangeXPaths  = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.last_change_xpaths));
                    config.PhoneSalerXPaths  = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.phone_saler_xpaths));
                    config.PriceXPaths       = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.price_xpaths));
                    config.WebCategoryXPaths = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.web_category_xpaths));
                    config.UserNameXPaths    = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.saler_name_xpaths));

                    config.UrlTest       = row.GetValue <string>(CassandraColumn.url_test);
                    config.TimeDelay     = Common.Obj2Int(row.GetValue(typeof(string), CassandraColumn.time_delay));
                    config.ItemReCrawler = Common.Obj2Int(row.GetValue(typeof(string), CassandraColumn.item_recrawl));
                    config.CategoryID    = Common.Obj2Int(row.GetValue(typeof(string), CassandraColumn.category_id));

                    config.NoVisitUrlRegex   = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.novisit_url_regex));
                    config.NoProductUrlRegex = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.noproduct_url_regex));
                    config.ProductUrlsRegex  = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.product_url_regex));
                    config.VisitUrlsRegex    = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.visit_url_regex));
                    config.tags_xpaths       = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.tags_xpaths));

                    string str = row.GetValue <string>(CassandraColumn.extend_xpaths);
                    if (!string.IsNullOrEmpty(str))
                    {
                        config.extend_xpaths = str.Split(new char[] { '\n' }, 200, StringSplitOptions.RemoveEmptyEntries).ToList();
                    }

                    config.spe_car_carmaker_xpaths = Common.GetListXPathFromString(row.GetValue <string>(CassandraColumn.spe_car_carmaker_xpaths));
                    return(config);
                }
            }
            return(null);
        }
Esempio n. 6
0
        private string GetLocation(HtmlDocument document, ConfigXPaths configXPath)
        {
            List <string> xPaths = configXPath.XPaths08;

            if (xPaths != null && xPaths.Count > 0)
            {
                foreach (string xPath in xPaths)
                {
                    var    node     = document.DocumentNode.SelectSingleNode(xPath);
                    string location = System.Text.RegularExpressions.Regex.Replace(node.InnerText.Trim(), "\r|\n|\t", "");
                    if (!string.IsNullOrEmpty(location))
                    {
                        return(location.Contains(':') ? location.Substring(location.LastIndexOf(':') + 1) : "");
                    }
                }
            }
            return("");
        }
Esempio n. 7
0
        private void SaveData()
        {
            try
            {
                bool         bExits = false;
                ConfigXPaths config = this.raovatSqlAdapter.GetConfigByID((int)configXPathIDSpinEdit.Value);
                if (config == null)
                {
                    config = new ConfigXPaths()
                    {
                        ID = (int)this.configXPathIDSpinEdit.Value
                    };
                    bExits = false;
                }
                else
                {
                    bExits = true;
                }
                if (this.LoadFormToConfig(ref config))
                {
                    if ((!bExits) || (
                            bExits && MessageBox.Show("Đã tồn tại, muốn ghi đè không", "Warning"
                                                      , MessageBoxButtons.YesNo, MessageBoxIcon.Warning, MessageBoxDefaultButton.Button2) == System.Windows.Forms.DialogResult.Yes))
                    {
                        if (this.raovatSqlAdapter.UpdateConfigXPath(config))
                        {
                            //Cap nhat len Redis.
                            config = this.raovatSqlAdapter.GetConfigByID(config.ID);
                            LoadConfigToForm(config);

                            //this.redisDb.SetValueSession("", config.ID
                            //    , RedisDb.FieldSession_LastUpdateToSql
                            //    , (config.wws_last_change_config).ToString(RedisDb.Format_DateTime));
                            MessageBox.Show("Updated!");
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
Esempio n. 8
0
 public string GetWebCategory(string url, HtmlDocument document, ConfigXPaths config)
 {
     try
     {
         var xpaths = config.WebCategoryXPaths;
         if (xpaths == null || xpaths.Count == 0)
         {
             return("");
         }
         else
         {
             string xPath = xpaths[0];
             var    nodes = document.DocumentNode.SelectNodes(xPath);
             if (nodes != null)
             {
                 string str = "";
                 foreach (var node in nodes)
                 {
                     string s = node.InnerText;
                     s   = Common.ChangeSpaceCharacter(s);
                     s   = Common.RemoveDumplicateSpace(s);
                     s   = s.Trim();
                     str = str + (string.IsNullOrEmpty(str) ? "" : ">") + s;
                 }
                 str = Common.RemoveDumplicateSpace(str);
                 str = str.Replace("/", ">");
                 str = str.Trim();
                 str = str.ToLower();
                 str = Common.ChuanHoaUnicode(str);
                 str = Regex.Replace(str, @"\s>", ">");
                 str = Regex.Replace(str, @">\s", ">");
                 return(str);
             }
         }
         return("");
     }
     catch (Exception ex)
     {
         this.logErrorDb.SaveError(LogErrorToDb.errorParseWebCategory, url, config.ID, ex.Message);
         return("");
     }
 }
Esempio n. 9
0
        private void button6_Click_1(object sender, EventArgs e)
        {
            ConfigXPaths config = this.raovatSqlAdapter.GetConfigByID((int)configXPathIDSpinEdit.Value);

            if (config == null)
            {
                config = new ConfigXPaths()
                {
                    ID = (int)this.configXPathIDSpinEdit.Value
                }
            }
            ;
            if (this.LoadFormToConfig(ref config))
            {
                if (this.raovatSqlAdapter.UpdateConfigXPath(config))
                {
                    MessageBox.Show("Updated!");
                }
            }
        }
Esempio n. 10
0
 private string GetCategory(HtmlDocument document, ConfigXPaths configXPath)
 {
     if (configXPath.XPaths52 != null && configXPath.XPaths52.Count > 0)
     {
         var nodes = document.DocumentNode.SelectSingleNode("//nav/fieldset//span");
         if (nodes != null)
         {
             string str = Regex.Replace(Regex.Replace(nodes.InnerText, "\n|\r|\t", ""), "&gt;", "->");
             if (str.StartsWith("->"))
             {
                 str = str.Remove(0, 2);
             }
             else if (str.EndsWith("->"))
             {
                 str = str.Remove(str.Length - 2, 2);
             }
             return(str);
         }
     }
     return("");
 }
Esempio n. 11
0
 private string GetTitleSaleNew(string url, HtmlDocument document, ConfigXPaths configXPath)
 {
     try
     {
         foreach (var xPath in configXPath.TitleXPaths)
         {
             HtmlNodeCollection a = document.DocumentNode.SelectNodes(xPath);
             if (a != null && a.Count > 0 && a[0].InnerText != "")
             {
                 string result = a[0].InnerText;
                 result = Regex.Replace(result, "\n|\t|\r", " ");
                 return(Common.RemoveDumplicateSpace(result).Trim());
             }
         }
         return("");
     }
     catch (Exception ex)
     {
         this.logErrorDb.SaveError(LogErrorToDb.errorParseTitleProduct, url, configXPath.ID, ex.Message);
         return("");
     }
 }
Esempio n. 12
0
 public string GetContent(string url, HtmlDocument document, ConfigXPaths configXPath)
 {
     try
     {
         string        strResult = "";
         List <string> lst       = configXPath.ContentXPaths;
         if (lst != null && lst.Count > 0)
         {
             foreach (string xPath in lst)
             {
                 List <string> lstTitle = Common.GetTextInNode(document, xPath, configXPath);
                 strResult += Common.ConvertToString(lstTitle, " ");
             }
         }
         return(Common.ChuanHoaUnicode(strResult));
     }
     catch (Exception ex)
     {
         logErrorDb.SaveError(LogErrorToDb.errorParseContent, url, configXPath.ID, ex.Message);
         return("");
     }
 }
Esempio n. 13
0
        private void ProcessData_ReloadAllField(object objSender, BsonDocument productNeedProcess)
        {
            ProductSaleNew productSaleNew = new ProductSaleNew();
            ConfigXPaths   configXPath    = this.dicConfigXPath[productNeedProcess["source_id"].AsInt32];
            int            iFail          = handerContent.AnalyticsProductSaleNew(configXPath.domain,
                                                                                  productNeedProcess["url"].AsString, configXPath, productSaleNew,
                                                                                  null, null);

            if (productSaleNew.IsDetailSucess)
            {
                //Cập nhật hàng hóa.
                mon.UpdateProduct(productSaleNew);
                mon.colProduct.UpdateOneAsync(
                    Builders <BsonDocument> .Filter.Eq("_id", productNeedProcess["_id"].AsObjectId)
                    , Builders <BsonDocument> .Update
                    .Set("processed", true));
            }
            else
            {
                int iNumberFail    = 0;
                int iStatusCurrent = productNeedProcess["status"].AsInt32;
                if (productNeedProcess.Contains("fail_count"))
                {
                    iNumberFail = productNeedProcess["fail_count"].AsInt32;
                }
                iNumberFail++;
                if (iNumberFail >= 1 && iStatusCurrent == 1)
                {
                    iStatusCurrent = 2;
                }
                mon.colProduct.UpdateOneAsync(
                    Builders <BsonDocument> .Filter.Eq("_id", productNeedProcess["_id"].AsObjectId)
                    , Builders <BsonDocument> .Update
                    .Set("status", iStatusCurrent)
                    .Set("fail_count", iNumberFail)
                    .Set("is_solr_updated", false)
                    .CurrentDate("updated_at"));
            }
        }
Esempio n. 14
0
 private decimal GetPriceSaleNew(HtmlDocument document, ConfigXPaths configXPath)
 {
     if (configXPath.XPaths04 != null && configXPath.XPaths04.Count > 0)
     {
         foreach (string xPath in configXPath.XPaths04)
         {
             var node = document.DocumentNode.SelectSingleNode(xPath);
             if (node != null)
             {
                 string textPrice = node.InnerText.Trim();
                 if (!string.IsNullOrWhiteSpace(textPrice))
                 {
                     decimal price = Common.ParsePrice(textPrice, true);
                     if (price > 0)
                     {
                         return(price);
                     }
                 }
             }
         }
     }
     return(0);
 }
Esempio n. 15
0
        public string GetTagsString(HtmlDocument document, ConfigXPaths configXPath)
        {
            string        str = "";
            List <string> lst = configXPath.tags_xpaths;

            if (lst != null && lst.Count > 0)
            {
                foreach (var xPath in lst)
                {
                    string        sItem  = "";
                    List <string> arData = Common.GetTextInNode(document, xPath, configXPath);
                    for (int i = 0; i < arData.Count; i++)
                    {
                        sItem = (arData == null || arData.Count > 0) ? arData[i] : "";
                        sItem = Common.RemoveDumplicateSpace(sItem);
                        sItem = sItem.ToLower();
                        sItem = sItem.Trim();
                        str   = str + (string.IsNullOrEmpty(str) ? "" : ",") + sItem;
                    }
                }
            }
            return(str);
        }
Esempio n. 16
0
 private string GetPhoneSaler(HtmlDocument document, ConfigXPaths configXPath)
 {
     try
     {
         List <string> lst = configXPath.PhoneSalerXPaths;
         if (lst != null && lst.Count > 0)
         {
             foreach (string xPath in lst)
             {
                 List <string> phone01 = Common.GetTextInNode(document, xPath, configXPath);
                 if (phone01 != null && phone01.Count > 0)
                 {
                     return(phone01[0]);
                 }
             }
         }
     }
     catch (Exception ex)
     {
         log.ErrorFormat("Exception:{0}", ex.Message);
     }
     return("");
 }
Esempio n. 17
0
 private string GetProvince(HtmlDocument document, ConfigXPaths configXPath)
 {
     try
     {
         List <string> lst = configXPath.ProvinceXPaths;
         if (lst != null && lst.Count > 0)
         {
             foreach (string str in lst)
             {
                 string strText = Common.ChuanHoaUnicode(Common.GetTextOfXPath(document, lst[0]));
                 if (!string.IsNullOrEmpty(strText))
                 {
                     return(strText);
                 }
             }
         }
         return("");
     }
     catch (Exception ex)
     {
         log.ErrorFormat("Exception:{0}", ex.Message);
         return("");
     }
 }
Esempio n. 18
0
 private string GetQuality(HtmlDocument document, ConfigXPaths configXPath)
 {
     try
     {
         List <string> lst = configXPath.QualityXPaths;
         if (lst != null && lst.Count > 0)
         {
             if (configXPath.ID == 1)
             {
                 var node = document.DocumentNode.SelectSingleNode(lst[0]);
                 if (node != null)
                 {
                     string   textTable = node.InnerText;
                     string[] ar        = textTable.Replace("&gt;", "").Split(new char[] { '\n', '\r', '\t' }, StringSplitOptions.RemoveEmptyEntries);
                     for (int i = 0; i < ar.Length; i++)
                     {
                         if (ar[i].Contains("Tình trạng:") && i < ar.Length - 1)
                         {
                             return(ar[i + 1]);
                         }
                     }
                 }
             }
             else
             {
                 return(Regex.Replace(this.GetDataFromNodes(document, lst, false), "\n|\t|\r", "").Trim());
             }
         }
         return("");
     }
     catch (Exception ex)
     {
         log.ErrorFormat("Exception:{0}", ex.Message);
         return("");
     }
 }
Esempio n. 19
0
 private DateTime GetLastEdit(string url, HtmlDocument document, ConfigXPaths configXPath)
 {
     return(ParseDate(url, document, configXPath.ID, configXPath.last_edit_xpaths));
 }
Esempio n. 20
0
        private bool LoadFormToConfig(ref ConfigXPaths config)
        {
            try
            {
                config.website_id              = Convert.ToInt32(CboWebSiteID.SelectedValue);
                config.TitleXPaths             = wssCommon.GetListXPathFromString(this.TitleXPathsTextBox.Text);
                config.PriceXPaths             = wssCommon.GetListXPathFromString(this.PriceXPathsTextBox.Text);
                config.PostDateXPaths          = wssCommon.GetListXPathFromString(this.PostDateXPathsTextBox.Text);
                config.LastChangeXPaths        = wssCommon.GetListXPathFromString(this.LastChangeXPathsTextBox.Text);
                config.ProvinceXPaths          = wssCommon.GetListXPathFromString(this.ProvinceXPathsTextBox.Text);
                config.PhoneSalerXPaths        = wssCommon.GetListXPathFromString(this.PhoneSalerXPathsTextBox.Text);
                config.AddressXPaths           = wssCommon.GetListXPathFromString(this.AddressXPathsTextBox.Text);
                config.ContentXPaths           = wssCommon.GetListXPathFromString(this.ContentXPathTextbox.Text);
                config.AvaiableXPaths          = wssCommon.GetListXPathFromString(this.AvaiableXPathsTextBox.Text);
                config.QualityXPaths           = wssCommon.GetListXPathFromString(this.QualityXPathsTextBox.Text);
                config.UserNameXPaths          = wssCommon.GetListXPathFromString(this.UserNameXPathsTextBox.Text);
                config.ThumbUrlXPaths          = wssCommon.GetListXPathFromString(this.txtThumbXPaths.Text);
                config.AllowExtractProductLink = ckExtractProduct.Checked;

                config.ReloadVisitUrlsRegex    = wssCommon.GetListXPathFromString(this.ExtrationRegex_richTextBox4.Text);
                config.ReloadNoVisitUrlsRegex  = wssCommon.GetListXPathFromString(this.NoExtrationRegex_richTextBox4.Text);
                config.ReloadProductUrlsRegex  = wssCommon.GetListXPathFromString(this.ProductRegex_RichTextBox.Text);
                config.ReloadNoProductUrlRegex = wssCommon.GetListXPathFromString(this.NoProductRegex_RichTextBox.Text);
                config.RegexReloadLikeFull     = this.RegexReloadLikeFull_CheckEdit.Checked;

                config.image_regex   = wssCommon.GetListXPathFromString(this.txtImage.Text);
                config.noimage_regex = wssCommon.GetListXPathFromString(this.txtNoRegexImage.Text);

                config.UrlTest                 = this.urlTestTextBox.Text;
                config.VisitUrlsRegex          = wssCommon.GetListXPathFromString(this.visitUrlsRegexTextBox.Text);
                config.NoVisitUrlRegex         = wssCommon.GetListXPathFromString(this.noVisitUrlsRegexTextBox.Text);
                config.ProductUrlsRegex        = wssCommon.GetListXPathFromString(this.ProductUrlsRegexTextBox.Text);
                config.NoProductUrlRegex       = wssCommon.GetListXPathFromString(this.NoProductUrlRegexTextBox.Text);
                config.AllowExtractProductLink = ckExtractProduct.Checked;
                config.TimeDelay               = (int)timeDelaySpinEdit.Value;
                config.ItemReCrawler           = (int)itemReCrawlerSpinEdit.Value;
                config.RootLink                = Common.GetListXPathFromString(rootLinkTextBox.Text);
                config.ID = Convert.ToInt32(configXPathIDSpinEdit.Value);
                config.ImageUrlsXPaths   = wssCommon.GetListXPathFromString(this.ImageUrlsXPathsTextBox.Text);
                config.WebCategoryXPaths = wssCommon.GetListXPathFromString(this.webCategoryXPathsTextBox.Text);
                config.extend_xpaths     = Common.GetListXPathFromString(this.textPropertyTextBox.Text);

                config.tags_xpaths = Common.GetListXPathFromString(txtTagXPaths.Text);

                config.wss_interval_push       = Convert.ToInt32(minMinuteWaitSpin.Value);
                config.wss_allow_auto_push     = Convert.ToBoolean(allowAutoPushCheckBox.Checked);
                config.wss_deep_full_crawler   = Convert.ToInt32(wss_deep_full_crawler_TextBox.Value);
                config.wss_deep_reload_crawler = Convert.ToInt32(wss_deep_reload_crawler_TextBox.Value);

                config.RegexStringToCategory = Common.GetListXPathFromString(txtRegexCatToID.Text);
                config.Name = txtNameConfig.Text;

                config.last_edit_xpaths    = wssCommon.GetListXPathFromString(this.txtLastEditXPaths.Text);
                config.last_comment_xpaths = wssCommon.GetListXPathFromString(this.richTextBox2.Text);

                config.domain            = this.txtDomain.Text;
                config.view_count_xpaths = wssCommon.GetListXPathFromString(this.txtViewCount.Text);
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
                return(false);
            }

            return(true);
        }
Esempio n. 21
0
        private void LoadConfigToForm(ConfigXPaths config)
        {
            if (config == null)
            {
                return;
            }
            this.TitleXPathsTextBox.Text      = wssCommon.ConvertToString(config.TitleXPaths);
            this.PriceXPathsTextBox.Text      = wssCommon.ConvertToString(config.PriceXPaths);
            this.PostDateXPathsTextBox.Text   = wssCommon.ConvertToString(config.PostDateXPaths);
            this.LastChangeXPathsTextBox.Text = wssCommon.ConvertToString(config.LastChangeXPaths);
            this.ProvinceXPathsTextBox.Text   = wssCommon.ConvertToString(config.ProvinceXPaths);
            this.PhoneSalerXPathsTextBox.Text = wssCommon.ConvertToString(config.PhoneSalerXPaths);
            this.ContentXPathTextbox.Text     = wssCommon.ConvertToString(config.ContentXPaths);
            this.AddressXPathsTextBox.Text    = wssCommon.ConvertToString(config.AddressXPaths);
            this.AvaiableXPathsTextBox.Text   = wssCommon.ConvertToString(config.AvaiableXPaths);
            this.QualityXPathsTextBox.Text    = wssCommon.ConvertToString(config.QualityXPaths);
            this.ImageUrlsXPathsTextBox.Text  = wssCommon.ConvertToString(config.ImageUrlsXPaths);
            this.UserNameXPathsTextBox.Text   = wssCommon.ConvertToString(config.UserNameXPaths);
            this.urlTestTextBox.Text          = config.UrlTest;

            this.ExtrationRegex_richTextBox4.Text      = wssCommon.ConvertToString(config.ReloadVisitUrlsRegex);
            this.NoExtrationRegex_richTextBox4.Text    = wssCommon.ConvertToString(config.ReloadNoVisitUrlsRegex);
            this.ProductRegex_RichTextBox.Text         = wssCommon.ConvertToString(config.ReloadProductUrlsRegex);
            this.NoProductRegex_RichTextBox.Text       = wssCommon.ConvertToString(config.ReloadNoProductUrlRegex);
            this.RegexReloadLikeFull_CheckEdit.Checked = config.RegexReloadLikeFull;

            this.txtImage.Text        = wssCommon.ConvertToString(config.image_regex);
            this.txtNoRegexImage.Text = wssCommon.ConvertToString(config.noimage_regex);

            this.visitUrlsRegexTextBox.Text    = wssCommon.ConvertToString(config.VisitUrlsRegex);
            this.noVisitUrlsRegexTextBox.Text  = wssCommon.ConvertToString(config.NoVisitUrlRegex);
            this.ProductUrlsRegexTextBox.Text  = wssCommon.ConvertToString(config.ProductUrlsRegex);
            this.NoProductUrlRegexTextBox.Text = wssCommon.ConvertToString(config.NoProductUrlRegex);
            this.txtThumbXPaths.Text           = wssCommon.ConvertToString(config.ThumbUrlXPaths);

            this.timeDelaySpinEdit.Value     = config.TimeDelay;
            this.itemReCrawlerSpinEdit.Value = config.ItemReCrawler;
            this.rootLinkTextBox.Text        = Common.ConvertToString(config.RootLink);


            this.webCategoryXPathsTextBox.Text = wssCommon.ConvertToString(config.WebCategoryXPaths);
            this.textPropertyTextBox.Text      = Common.ConvertToString(config.extend_xpaths);
            this.txtTagXPaths.Text             = Common.ConvertToString(config.tags_xpaths);
            this.ckExtractProduct.Checked      = config.AllowExtractProductLink;

            this.allowAutoPushCheckBox.Checked = config.wss_allow_auto_push;
            this.minMinuteWaitSpin.Value       = config.wss_interval_push;
            this.dateTimePicker1.Value         = config.wss_last_push;

            this.wss_deep_full_crawler_TextBox.Value   = config.wss_deep_full_crawler;
            this.wss_deep_reload_crawler_TextBox.Value = config.wss_deep_reload_crawler;

            this.txtRegexCatToID.Text = Common.ConvertToString(config.RegexStringToCategory);
            this.txtNameConfig.Text   = Common.Obj2String(config.Name);

            this.txtLastEditXPaths.Text = wssCommon.ConvertToString(config.last_edit_xpaths);
            this.richTextBox2.Text      = wssCommon.ConvertToString(config.last_comment_xpaths);
            this.txtViewCount.Text      = wssCommon.ConvertToString(config.view_count_xpaths);

            this.txtDomain.Text             = config.domain;
            this.CboWebSiteID.SelectedValue = config.website_id;
        }
Esempio n. 22
0
        public Model.SaleNews.TypeCrawlerData GetTypeOfLink(string c_url, GABIZ.Base.HtmlAgilityPack.HtmlDocument document, ConfigXPaths config)
        {
            string category = GetCategory(document, config).Trim();

            if (!string.IsNullOrEmpty(category))
            {
                if (Regex.IsMatch(category, "Nhật tảo->Mua bán->Điện thoại.*"))
                {
                    return(Model.SaleNews.TypeCrawlerData.PhoneComputer);
                }
            }
            return(Model.SaleNews.TypeCrawlerData.None);
        }
Esempio n. 23
0
 private DateTime GetLastChange(HtmlDocument document, ConfigXPaths configXPath)
 {
     return(SqlDb.MinDateDb);
 }
Esempio n. 24
0
        public void doCrawl(int configID, ETypeCrawlRaoVat typeCrawler)
        {
            SqlDb            sqlDb         = new SqlDb(QT.Entities.Server.ConnectionStringCrawler);
            RaoVatSQLAdapter adapter       = new Entities.RaoVat.RaoVatSQLAdapter(sqlDb);
            ConfigXPaths     configuration = adapter.GetConfigByID(this.ConfigID);

            while (!this.bPause)
            {
                try
                {
                    SimpleCrawlerRaoVat crawler = new SimpleCrawlerRaoVat(configID, typeCrawler);

                    crawler.eventWhenStart += new Crawler.AbstractionCrawler.EventReportRun(delegate(object sender, string mss)
                    {
                        WriteLog("eventWhenStart", mss);
                    });

                    crawler.eventWhenGetJob += new Crawler.AbstractionCrawler.EventReportRun(delegate(object sender, string mss)
                    {
                        WriteLog("eventWhenGetJob", mss);

                        this.Invoke(new Action(() =>
                        {
                            this.spinnumberVisitedLink.Value = Convert.ToInt32(spinnumberVisitedLink.Value) + 1;
                        }));
                    });

                    crawler.eventWhenPushJob += new Crawler.AbstractionCrawler.EventReportRun(delegate(object sender, string mss)
                    {
                        WriteLog("eventWhenPushJob", mss);
                    });

                    crawler.eventWhenEnd += new Crawler.AbstractionCrawler.EventReportRun(delegate(object sender, string mss)
                    {
                        WriteLog("eventWhenEnd", mss);
                    });

                    crawler.eventWhenSuccessProduct += new Crawler.AbstractionCrawler.EventReportRun(delegate(object sender, string mss)
                    {
                        WriteLog("eventWhenSuccessProduct", mss);
                        this.Invoke(new Action(() =>
                        {
                            this.spinNumberProduct.Value = Convert.ToInt32(spinNumberProduct.Value) + 1;
                        }));
                    });

                    crawler.StartCrawler();
                    Thread.Sleep(20000);
                }
                catch (ThreadAbortException ex1)
                {
                    break;
                }
                catch (Exception ex)
                {
                    log.ErrorFormat(ex.Message);
                    WriteLog("Exception", string.Format("Exception:{0}", ex.Message));
                    Thread.Sleep(20000);
                }
            }
        }