Exemple #1
0
        private void Run()
        {
            DataTable dtfinal = new DataTable();

            dtfinal = new DataTable();
            dtfinal.Columns.Add("ID", typeof(string));
            dtfinal.Columns.Add("Domain", typeof(string));
            dtfinal.Columns.Add("Detail", typeof(string));

            WSS.DownloadImageByHand.DBTableAdapters.CompanyTableAdapter companyAdapter = new DBTableAdapters.CompanyTableAdapter();
            companyAdapter.Connection.ConnectionString = connectionString;
            WSS.DownloadImageByHand.DB.CompanyDataTable companyTable = new DB.CompanyDataTable();
            var fileStream = new FileStream(txtFolder.Text, FileMode.Open, FileAccess.Read);
            int i          = 0;

            using (var streamReader = new StreamReader(fileStream, Encoding.UTF8))
            {
                string line;
                while ((line = streamReader.ReadLine()) != null)
                {
                    this.Invoke(new Action(() =>
                    {
                        richTextBox1.AppendText(i + ". " + line + System.Environment.NewLine);
                    }));
                    i++;
                    companyTable.Clear();
                    long idcompany = QT.Entities.Common.GetIDCompany(line);
                    companyAdapter.FillBy_ID(companyTable, idcompany);
                    if (companyTable.Rows.Count == 0)
                    {
                        DataRow dr = dtfinal.NewRow();
                        dr["ID"]     = idcompany;
                        dr["Domain"] = line;
                        dr["Detail"] = "Not in Database";
                        dtfinal.Rows.Add(dr);
                    }
                    else
                    {
                        int totalvalid = QT.Entities.Common.Obj2Int(companyTable.Rows[0]["TotalValid"].ToString());
                        if (totalvalid == 0)
                        {
                            DataRow dr = dtfinal.NewRow();
                            dr["ID"]     = idcompany;
                            dr["Domain"] = line;
                            dr["Detail"] = "Total Valid = 0";
                            dtfinal.Rows.Add(dr);
                        }
                    }
                }
                this.Invoke(new Action(() =>
                {
                    gridControl1.DataSource = dtfinal;
                }));
            }
        }
Exemple #2
0
        public Company(long id)
        {
            this.MaxHourCrawlerReload = 7;

            ID   = id;
            _adt = new DBTableAdapters.CompanyTableAdapter();
            _dt  = new DB.CompanyDataTable();
            _adt.Connection.ConnectionString = Server.ConnectionString;

            //_adt.Connection.Open();
            _adt.FillBy_ID(_dt, ID);
            //_adt.Connection.Close();

            _adtProduct = new DBTableAdapters.ProductTableAdapter();
            _adtProduct.Connection.ConnectionString = Server.ConnectionString;
            if (_dt.Rows.Count > 0)
            {
                Name        = _dt.Rows[0]["Name"].ToString();
                Description = _dt.Rows[0]["Description"].ToString();
                Website     = _dt.Rows[0]["Website"].ToString();
                Domain      = _dt.Rows[0]["Domain"].ToString();
                AddDate     = Common.ObjectToDataTime(_dt.Rows[0]["AddDate"].ToString());
                Phone       = _dt.Rows[0]["Phone"].ToString();
                Fax         = _dt.Rows[0]["Fax"].ToString();
                Yahoo       = _dt.Rows[0]["Yahoo"].ToString();
                Address     = _dt.Rows[0]["Address"].ToString();
                Status      = Common.Obj2Byte(_dt.Rows[0]["Status"].ToString());
                //UseDataFeed = _dt.Rows[0]["UseDataFeed"] != DBNull.Value && (Boolean) _dt.Rows[0]["UseDataFeed"];
                Image     = _dt.Rows[0]["Image"].ToString();
                PageRank  = Common.Obj2Int(_dt.Rows[0]["PageRank"].ToString());
                AlexaRank = Common.Obj2Int(_dt.Rows[0]["AlexaRank"].ToString());
                //
                TimeDelay      = Common.Obj2Int(_dt.Rows[0]["TimeDelay"].ToString());
                TotalProduct   = Common.Obj2Int(_dt.Rows[0]["TotalProduct"].ToString());
                LastCrawler    = Common.ObjectToDataTime(_dt.Rows[0]["LastCrawler"].ToString());
                FullCrawlerDay = Common.Obj2Int(_dt.Rows[0]["FullCrawlerDay"].ToString());
                //
                LastFullCrawler = Common.ObjectToDataTime(_dt.Rows[0]["LastCrawler"].ToString());
                //DaatFeed
                DataFeedPath = Common.Obj2String(_dt.Rows[0]["DataFeedUrl"].ToString());

                LastUpdateDataFeedTime  = Common.ObjectToDataTime(_dt.Rows[0]["LastUpdateDataFeed"]);
                UpdateDataFeedFrequency = new TimeSpan(Common.Obj2Int(_dt.Rows[0]["UpdateFreq"]), 0, 0);
                CompanyDataFeedType     = (DataFeedType)Common.Obj2Int(_dt.Rows[0]["DataFeedType"]);

                //User and Password của URL datafeed
                UserDatafeed      = Common.Obj2String(_dt.Rows[0]["UserDatafeed"].ToString());
                PasswordDatafeed  = Common.Obj2String(_dt.Rows[0]["PasswordDatafeed"].ToString());
                notVisibleProduct = (_dt.Rows[0]["NotVisibleProduct"] == DBNull.Value) ? false : Common.Obj2Bool(_dt.Rows[0]["NotVisibleProduct"]);

                AllowAutoPushNewProduct = Common.Obj2Bool(_dt.Rows[0]["AllowAutoPushNewProduct"]);
                AllowAutoBlackLink      = Common.Obj2Bool(_dt.Rows[0]["AllowAutoBlackLink"]);
                ClearQueueWhenFN        = Common.Obj2Bool(_dt.Rows[0]["ClearQueueWhenFN"]);

                /*
                 *   public String DataFeedPath { get; set; } //DataFeedUrl or File Path
                 * public DataFeedType CompanyDataFeedType { get; set; }
                 * public TimeSpan UpdateDataFeedFrequency { get; set; }
                 * public DateTime LastUpdateDataFeedTime { get; set; }
                 */

                #region Lấy Type của công ty
                _adtmanagerTypeRCompany = new DBTableAdapters.ManagerTypeRCompanyTableAdapter();
                _adtmanagerTypeRCompany.Connection.ConnectionString = Server.ConnectionString;
                DB.ManagerTypeRCompanyDataTable managerTable = new DB.ManagerTypeRCompanyDataTable();
                try
                {
                    _adtmanagerTypeRCompany.FillBy_IDCompany(managerTable, ID);
                }
                catch (Exception)
                {
                }
                if (managerTable.Rows.Count > 0)
                {
                    IDManagerType = Common.Obj2Int(managerTable.Rows[0]["IDType"].ToString());
                }
                else
                {
                    IDManagerType = 0;
                }
                #endregion
            }
            else
            {
                Name = "Not In Database";
            }
        }
Exemple #3
0
        private void Run()
        {
            DBTableAdapters.CompanyTableAdapter companyTableAdapter = new CompanyTableAdapter();
            companyTableAdapter.Connection.ConnectionString = _connectionString;
            DBTableAdapters.Company_AddressTableAdapter addressTableAdapter = new Company_AddressTableAdapter();
            addressTableAdapter.Connection.ConnectionString = _connectionString;
            DB.CompanyDataTable companyDataTable = new DB.CompanyDataTable();
            DataTable           addressDataTable = new DataTable();
            DataTable           dtCompany        = new DataTable();

            dtCompany.Columns.Add("ID", typeof(string));
            dtCompany.Columns.Add("Domain", typeof(string));
            dtCompany.Columns.Add("Address", typeof(string));
            dtCompany.Columns.Add("ThanhPho", typeof(string));
            dtCompany.Columns.Add("Phone", typeof(string));
            var listDomain = memoEdit1.Text.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
            int i          = 0;

            foreach (var item in listDomain)
            {
                i++;
                this.Invoke(new Action(() =>
                {
                    rbError.AppendText(String.Format("{0}. {1}", i, item) + System.Environment.NewLine);
                }));
                companyDataTable.Clear();
                addressDataTable.Clear();
                long idCompany = Common.GetIDCompany(item); try
                {
                    companyTableAdapter.FillBy_ID(companyDataTable, idCompany);
                }
                catch (Exception exception)
                {
                    this.Invoke(new Action(() =>
                    {
                        rbError.AppendText(String.Format("{0}. {1} Fill By ID error: {2}", i, item, exception) + System.Environment.NewLine);
                    }));
                }
                if (companyDataTable.Rows.Count > 0)
                {
                    DataRow dataRow = dtCompany.NewRow();
                    dataRow["ID"]      = idCompany;
                    dataRow["Domain"]  = item;
                    dataRow["Address"] = companyDataTable.Rows[0]["Address"];
                    dataRow["Phone"]   = companyDataTable.Rows[0]["Phone"];
                    //try
                    //{
                    //    string querry = @"SELECT Distinct ThanhPho FROM Company_Address WHERE CompanyID =" + idCompany;
                    //    SqlDb sqldb = new SqlDb(_connectionString);
                    //    try
                    //    {
                    //        addressDataTable = sqldb.GetTblData(querry, CommandType.Text, null);
                    //    }
                    //    catch (Exception)
                    //    {
                    //    }
                    //    string thanhpho = "";
                    //    for (int j = 0; j < addressDataTable.Rows.Count; j++)
                    //    {
                    //        if (addressDataTable.Rows[j]["ThanhPho"] != DBNull.Value)
                    //        {
                    //            thanhpho += addressDataTable.Rows[j]["ThanhPho"].ToString()+" , ";
                    //        }
                    //    }
                    //    dataRow["ThanhPho"] = thanhpho;
                    //}
                    //catch (Exception exception)
                    //{
                    //    this.Invoke(new Action(() =>
                    //    {
                    //        rbError.AppendText(String.Format("{0}. {1} Get address error: {2}", i, item, exception));
                    //    }));
                    //}
                    dtCompany.Rows.Add(dataRow);
                }
                else
                {
                    this.Invoke(new Action(() =>
                    {
                        rbError.AppendText(String.Format("{0}. {1} Khong ton tai trong SQL", i, item) + System.Environment.NewLine);
                    }));
                }
            }
            this.Invoke(new Action(() =>
            {
                gridControl1.DataSource = dtCompany;
            }));
        }
Exemple #4
0
        void doCrawler()
        {
            dtCom  = new DB.CompanyDataTable();
            adtCom = new DBTableAdapters.CompanyTableAdapter();
            adtCom.Connection.ConnectionString = QT.Entities.Server.ConnectionString;
            if (adtCom.Connection.State == ConnectionState.Closed)
            {
                adtCom.Connection.Open();
            }
            if (chkFind.Checked == true)
            {
                webCRC = new List <long>();
                adtCom.Fill(dtCom);
                int i0 = 0;
                foreach (var dr in dtCom)
                {
                    i0 = webCRC.BinarySearch(dr.ID);
                    if (i0 < 0)
                    {
                        webCRC.Insert(~i0, dr.ID);
                    }
                }
                //adtCom.Connection.Close();
                //adtCom.Dispose();
                //dtCom.Dispose();
            }


            visitedCount = 0;
            crawlerLink  = new Queue <string>();
            visitedCRC   = new List <long>();
            rootUri      = new Uri(rootUrl);
            crawlerLink.Enqueue(rootUrl);
            while (crawlerLink.Count > 0)
            {
                if (finish)
                {
                    break;
                }
                if (!pause)
                {
                    string c_url = crawlerLink.Dequeue();
                    try
                    {
                        string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(c_url, 45, 2);

                        if (html != "")
                        {
                            GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                            doc.LoadHtml(html);

                            var a_nodes = doc.DocumentNode.SelectNodes("//a[@href]");
                            if (a_nodes != null)
                            {
                                #region add link to process
                                for (int i = 0; i < a_nodes.Count; i++)
                                {
                                    string s = Common.GetAbsoluteUrl(a_nodes[i].Attributes["href"].Value, rootUri);
                                    if (!IsNoVisitUrl(s))
                                    {
                                        long s_crc = Tools.getCRC64(LinkCanonicalization.NormalizeLink(s));
                                        int  index = visitedCRC.BinarySearch(s_crc);
                                        if (index < 0)
                                        {
                                            if (IsRelevantUrl(s))
                                            {
                                                crawlerLink.Enqueue(s);
                                            }
                                            visitedCRC.Insert(~index, s_crc);
                                            if (chkFind.Checked == true)
                                            {
                                                if (!IsRelevantUrl(s))
                                                {
                                                    Uri      uri         = new Uri(s);
                                                    TimeSpan timestartup = new TimeSpan(0, 1, 1, 0);
                                                    TimeSpan timeSleep   = new TimeSpan(0, 1, 1, 0);
                                                    String   domain      = uri.Host.ToLower();
                                                    domain = domain.Replace("www.", "");

                                                    long idcom  = Common.GetIDCompany(domain);
                                                    int  index1 = webCRC.BinarySearch(idcom);
                                                    if (index1 < 0)
                                                    {
                                                        Alexa a = new Alexa();
                                                        a = Common.GetRankAlexa(uri.Host);
                                                        Thread.Sleep(Common.Obj2Int(txtDelay.Text.Trim()));
                                                        countWeb++;
                                                        webCRC.Insert(~index1, idcom);
                                                        adtCom.Insert(
                                                            idcom,
                                                            "",
                                                            "Tìm thấy từ " + txtURL.Text,
                                                            domain,
                                                            domain,
                                                            DateTime.Now,
                                                            "",
                                                            "",
                                                            "",
                                                            "",
                                                            Common.CompanyStatus.WEB_ADDNEWS,
                                                            false,
                                                            "",
                                                            a.AlexaRankContries,
                                                            a.AlexaRank,
                                                            timestartup,
                                                            timeSleep,
                                                            500,
                                                            0,
                                                            DateTime.Now,
                                                            DateTime.Now,
                                                            30,
                                                            0,
                                                            0,
                                                            0, "", DateTime.Now, "", 0, DateTime.Now, 0, "", "", true, false, false, true, true, true, null, null, false, "", 3);
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                                #endregion
                            }


                            if (showLog)
                            {
                                #region show log
                                this.Invoke((MethodInvoker) delegate
                                {
                                    lblVisited.Text    = visitedCount.ToString();
                                    lblQueue.Text      = crawlerLink.Count.ToString();
                                    lblProduct.Text    = countWeb.ToString();
                                    txtUrlCurrent.Text = currentUrl;
                                    var xx             = DateTime.Now - start;
                                    DateTime mydate    = new DateTime(xx.Ticks);
                                    lblTime.Text       = mydate.ToString("HH:mm:ss");
                                    lblIgnored.Text    = ignoredCount.ToString();
                                });
                                #endregion
                            }
                        }
                        visitedCount++;
                        currentUrl = c_url;
                    }
                    catch (Exception ex)
                    {
                        FileLog.WriteAppendText(DateTime.Now.ToString("dd/MM HH:mm:ss") + "\t, " + c_url + "\r\n" + ex.ToString(), rootUri.Host + ".csv");
                    }
                }
            }
            finish = true;
            crawlerLink.Clear();
            crawlerLink = null;
            this.timer1.Start();
            if (crawlerThread != null)
            {
                if (crawlerThread.IsAlive)
                {
                    crawlerThread.Abort();
                    crawlerThread.Join();
                    crawlerThread = null;
                }
            }
        }