private void Run() { DataTable dtfinal = new DataTable(); dtfinal = new DataTable(); dtfinal.Columns.Add("ID", typeof(string)); dtfinal.Columns.Add("Domain", typeof(string)); dtfinal.Columns.Add("Detail", typeof(string)); WSS.DownloadImageByHand.DBTableAdapters.CompanyTableAdapter companyAdapter = new DBTableAdapters.CompanyTableAdapter(); companyAdapter.Connection.ConnectionString = connectionString; WSS.DownloadImageByHand.DB.CompanyDataTable companyTable = new DB.CompanyDataTable(); var fileStream = new FileStream(txtFolder.Text, FileMode.Open, FileAccess.Read); int i = 0; using (var streamReader = new StreamReader(fileStream, Encoding.UTF8)) { string line; while ((line = streamReader.ReadLine()) != null) { this.Invoke(new Action(() => { richTextBox1.AppendText(i + ". " + line + System.Environment.NewLine); })); i++; companyTable.Clear(); long idcompany = QT.Entities.Common.GetIDCompany(line); companyAdapter.FillBy_ID(companyTable, idcompany); if (companyTable.Rows.Count == 0) { DataRow dr = dtfinal.NewRow(); dr["ID"] = idcompany; dr["Domain"] = line; dr["Detail"] = "Not in Database"; dtfinal.Rows.Add(dr); } else { int totalvalid = QT.Entities.Common.Obj2Int(companyTable.Rows[0]["TotalValid"].ToString()); if (totalvalid == 0) { DataRow dr = dtfinal.NewRow(); dr["ID"] = idcompany; dr["Domain"] = line; dr["Detail"] = "Total Valid = 0"; dtfinal.Rows.Add(dr); } } } this.Invoke(new Action(() => { gridControl1.DataSource = dtfinal; })); } }
public Company(long id) { this.MaxHourCrawlerReload = 7; ID = id; _adt = new DBTableAdapters.CompanyTableAdapter(); _dt = new DB.CompanyDataTable(); _adt.Connection.ConnectionString = Server.ConnectionString; //_adt.Connection.Open(); _adt.FillBy_ID(_dt, ID); //_adt.Connection.Close(); _adtProduct = new DBTableAdapters.ProductTableAdapter(); _adtProduct.Connection.ConnectionString = Server.ConnectionString; if (_dt.Rows.Count > 0) { Name = _dt.Rows[0]["Name"].ToString(); Description = _dt.Rows[0]["Description"].ToString(); Website = _dt.Rows[0]["Website"].ToString(); Domain = _dt.Rows[0]["Domain"].ToString(); AddDate = Common.ObjectToDataTime(_dt.Rows[0]["AddDate"].ToString()); Phone = _dt.Rows[0]["Phone"].ToString(); Fax = _dt.Rows[0]["Fax"].ToString(); Yahoo = _dt.Rows[0]["Yahoo"].ToString(); Address = _dt.Rows[0]["Address"].ToString(); Status = Common.Obj2Byte(_dt.Rows[0]["Status"].ToString()); //UseDataFeed = _dt.Rows[0]["UseDataFeed"] != DBNull.Value && (Boolean) _dt.Rows[0]["UseDataFeed"]; Image = _dt.Rows[0]["Image"].ToString(); PageRank = Common.Obj2Int(_dt.Rows[0]["PageRank"].ToString()); AlexaRank = Common.Obj2Int(_dt.Rows[0]["AlexaRank"].ToString()); // TimeDelay = Common.Obj2Int(_dt.Rows[0]["TimeDelay"].ToString()); TotalProduct = Common.Obj2Int(_dt.Rows[0]["TotalProduct"].ToString()); LastCrawler = Common.ObjectToDataTime(_dt.Rows[0]["LastCrawler"].ToString()); FullCrawlerDay = Common.Obj2Int(_dt.Rows[0]["FullCrawlerDay"].ToString()); // LastFullCrawler = Common.ObjectToDataTime(_dt.Rows[0]["LastCrawler"].ToString()); //DaatFeed DataFeedPath = Common.Obj2String(_dt.Rows[0]["DataFeedUrl"].ToString()); LastUpdateDataFeedTime = Common.ObjectToDataTime(_dt.Rows[0]["LastUpdateDataFeed"]); UpdateDataFeedFrequency = new TimeSpan(Common.Obj2Int(_dt.Rows[0]["UpdateFreq"]), 0, 0); CompanyDataFeedType = (DataFeedType)Common.Obj2Int(_dt.Rows[0]["DataFeedType"]); //User and Password của URL datafeed UserDatafeed = Common.Obj2String(_dt.Rows[0]["UserDatafeed"].ToString()); PasswordDatafeed = Common.Obj2String(_dt.Rows[0]["PasswordDatafeed"].ToString()); notVisibleProduct = (_dt.Rows[0]["NotVisibleProduct"] == DBNull.Value) ? false : Common.Obj2Bool(_dt.Rows[0]["NotVisibleProduct"]); AllowAutoPushNewProduct = Common.Obj2Bool(_dt.Rows[0]["AllowAutoPushNewProduct"]); AllowAutoBlackLink = Common.Obj2Bool(_dt.Rows[0]["AllowAutoBlackLink"]); ClearQueueWhenFN = Common.Obj2Bool(_dt.Rows[0]["ClearQueueWhenFN"]); /* * public String DataFeedPath { get; set; } //DataFeedUrl or File Path * public DataFeedType CompanyDataFeedType { get; set; } * public TimeSpan UpdateDataFeedFrequency { get; set; } * public DateTime LastUpdateDataFeedTime { get; set; } */ #region Lấy Type của công ty _adtmanagerTypeRCompany = new DBTableAdapters.ManagerTypeRCompanyTableAdapter(); _adtmanagerTypeRCompany.Connection.ConnectionString = Server.ConnectionString; DB.ManagerTypeRCompanyDataTable managerTable = new DB.ManagerTypeRCompanyDataTable(); try { _adtmanagerTypeRCompany.FillBy_IDCompany(managerTable, ID); } catch (Exception) { } if (managerTable.Rows.Count > 0) { IDManagerType = Common.Obj2Int(managerTable.Rows[0]["IDType"].ToString()); } else { IDManagerType = 0; } #endregion } else { Name = "Not In Database"; } }
private void Run() { DBTableAdapters.CompanyTableAdapter companyTableAdapter = new CompanyTableAdapter(); companyTableAdapter.Connection.ConnectionString = _connectionString; DBTableAdapters.Company_AddressTableAdapter addressTableAdapter = new Company_AddressTableAdapter(); addressTableAdapter.Connection.ConnectionString = _connectionString; DB.CompanyDataTable companyDataTable = new DB.CompanyDataTable(); DataTable addressDataTable = new DataTable(); DataTable dtCompany = new DataTable(); dtCompany.Columns.Add("ID", typeof(string)); dtCompany.Columns.Add("Domain", typeof(string)); dtCompany.Columns.Add("Address", typeof(string)); dtCompany.Columns.Add("ThanhPho", typeof(string)); dtCompany.Columns.Add("Phone", typeof(string)); var listDomain = memoEdit1.Text.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); int i = 0; foreach (var item in listDomain) { i++; this.Invoke(new Action(() => { rbError.AppendText(String.Format("{0}. {1}", i, item) + System.Environment.NewLine); })); companyDataTable.Clear(); addressDataTable.Clear(); long idCompany = Common.GetIDCompany(item); try { companyTableAdapter.FillBy_ID(companyDataTable, idCompany); } catch (Exception exception) { this.Invoke(new Action(() => { rbError.AppendText(String.Format("{0}. {1} Fill By ID error: {2}", i, item, exception) + System.Environment.NewLine); })); } if (companyDataTable.Rows.Count > 0) { DataRow dataRow = dtCompany.NewRow(); dataRow["ID"] = idCompany; dataRow["Domain"] = item; dataRow["Address"] = companyDataTable.Rows[0]["Address"]; dataRow["Phone"] = companyDataTable.Rows[0]["Phone"]; //try //{ // string querry = @"SELECT Distinct ThanhPho FROM Company_Address WHERE CompanyID =" + idCompany; // SqlDb sqldb = new SqlDb(_connectionString); // try // { // addressDataTable = sqldb.GetTblData(querry, CommandType.Text, null); // } // catch (Exception) // { // } // string thanhpho = ""; // for (int j = 0; j < addressDataTable.Rows.Count; j++) // { // if (addressDataTable.Rows[j]["ThanhPho"] != DBNull.Value) // { // thanhpho += addressDataTable.Rows[j]["ThanhPho"].ToString()+" , "; // } // } // dataRow["ThanhPho"] = thanhpho; //} //catch (Exception exception) //{ // this.Invoke(new Action(() => // { // rbError.AppendText(String.Format("{0}. {1} Get address error: {2}", i, item, exception)); // })); //} dtCompany.Rows.Add(dataRow); } else { this.Invoke(new Action(() => { rbError.AppendText(String.Format("{0}. {1} Khong ton tai trong SQL", i, item) + System.Environment.NewLine); })); } } this.Invoke(new Action(() => { gridControl1.DataSource = dtCompany; })); }
void doCrawler() { dtCom = new DB.CompanyDataTable(); adtCom = new DBTableAdapters.CompanyTableAdapter(); adtCom.Connection.ConnectionString = QT.Entities.Server.ConnectionString; if (adtCom.Connection.State == ConnectionState.Closed) { adtCom.Connection.Open(); } if (chkFind.Checked == true) { webCRC = new List <long>(); adtCom.Fill(dtCom); int i0 = 0; foreach (var dr in dtCom) { i0 = webCRC.BinarySearch(dr.ID); if (i0 < 0) { webCRC.Insert(~i0, dr.ID); } } //adtCom.Connection.Close(); //adtCom.Dispose(); //dtCom.Dispose(); } visitedCount = 0; crawlerLink = new Queue <string>(); visitedCRC = new List <long>(); rootUri = new Uri(rootUrl); crawlerLink.Enqueue(rootUrl); while (crawlerLink.Count > 0) { if (finish) { break; } if (!pause) { string c_url = crawlerLink.Dequeue(); try { string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(c_url, 45, 2); if (html != "") { GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); var a_nodes = doc.DocumentNode.SelectNodes("//a[@href]"); if (a_nodes != null) { #region add link to process for (int i = 0; i < a_nodes.Count; i++) { string s = Common.GetAbsoluteUrl(a_nodes[i].Attributes["href"].Value, rootUri); if (!IsNoVisitUrl(s)) { long s_crc = Tools.getCRC64(LinkCanonicalization.NormalizeLink(s)); int index = visitedCRC.BinarySearch(s_crc); if (index < 0) { if (IsRelevantUrl(s)) { crawlerLink.Enqueue(s); } visitedCRC.Insert(~index, s_crc); if (chkFind.Checked == true) { if (!IsRelevantUrl(s)) { Uri uri = new Uri(s); TimeSpan timestartup = new TimeSpan(0, 1, 1, 0); TimeSpan timeSleep = new TimeSpan(0, 1, 1, 0); String domain = uri.Host.ToLower(); domain = domain.Replace("www.", ""); long idcom = Common.GetIDCompany(domain); int index1 = webCRC.BinarySearch(idcom); if (index1 < 0) { Alexa a = new Alexa(); a = Common.GetRankAlexa(uri.Host); Thread.Sleep(Common.Obj2Int(txtDelay.Text.Trim())); countWeb++; webCRC.Insert(~index1, idcom); adtCom.Insert( idcom, "", "Tìm thấy từ " + txtURL.Text, domain, domain, DateTime.Now, "", "", "", "", Common.CompanyStatus.WEB_ADDNEWS, false, "", a.AlexaRankContries, a.AlexaRank, timestartup, timeSleep, 500, 0, DateTime.Now, DateTime.Now, 30, 0, 0, 0, "", DateTime.Now, "", 0, DateTime.Now, 0, "", "", true, false, false, true, true, true, null, null, false, "", 3); } } } } } } #endregion } if (showLog) { #region show log this.Invoke((MethodInvoker) delegate { lblVisited.Text = visitedCount.ToString(); lblQueue.Text = crawlerLink.Count.ToString(); lblProduct.Text = countWeb.ToString(); txtUrlCurrent.Text = currentUrl; var xx = DateTime.Now - start; DateTime mydate = new DateTime(xx.Ticks); lblTime.Text = mydate.ToString("HH:mm:ss"); lblIgnored.Text = ignoredCount.ToString(); }); #endregion } } visitedCount++; currentUrl = c_url; } catch (Exception ex) { FileLog.WriteAppendText(DateTime.Now.ToString("dd/MM HH:mm:ss") + "\t, " + c_url + "\r\n" + ex.ToString(), rootUri.Host + ".csv"); } } } finish = true; crawlerLink.Clear(); crawlerLink = null; this.timer1.Start(); if (crawlerThread != null) { if (crawlerThread.IsAlive) { crawlerThread.Abort(); crawlerThread.Join(); crawlerThread = null; } } }