Пример #1
0
        public M4Spider()
        {
            _maxOutboundLinks = 99;
            _numOutboundLinks = 0;
            _avoidHttps       = true;
            _insideLinks      = false;
            _verifyBackLinks  = true;
            _unSpidered       = new List <string>();
            _spideredURLs     = new List <string>();
            _obLinks          = new List <string>();
            _avoidPatterns    = new List <string>();
            _proxyDomain      = "";
            _proxyLogin       = "";
            _proxyPassword    = "";
            _proxyPort        = 0;
            _backLinkDN       = "";
            SR      = new SpiderResults();
            PrxShut = new ProxyShuttler();



            AvoidPatterns.Add("google.");
            AvoidPatterns.Add("yahoo.");
            AvoidPatterns.Add("bing.");
            AvoidPatterns.Add("altavista.");
            AvoidPatterns.Add("princeton.edu");
            AvoidPatterns.Add("amazon.com");
            AvoidPatterns.Add("baidu.com");
            AvoidPatterns.Add("planet-lab.edu");
        }
Пример #2
0
        private void btnSpider_Click(object sender, EventArgs e)
        {
            string Dname;
            string line;
            int    cntr      = 0;
            double timeAvg   = 0;
            double timeTotal = 0;

            ProxyShuttler PrxShtlr = new ProxyShuttler();

            while ((Dname = GetDN()) != null)
            {
                DateTime startTime = DateTime.Now;
                M4Spider Spidey    = new M4Spider();
                Spidey.MaxOutboundLinks = 3;
                Spidey.ProxyDomain      = PrxShtlr.CurrentProxy.ToString();
                Spidey.AddUnspidered("http://www.alexa.com/site/linksin/" + Dname);//"http://www.databasejobs.com/");//
                Spidey.BackLinkDN = Dname;
                ArrayList BackLinksList;
                if (AV_links > 0)
                {
                    bool lb_rtn = Spidey.CrawlNext();
                    if (!lb_rtn)
                    {
                        continue;
                    }

                    if (Spidey.SR1 != null)
                    {
                        BackLinksList = Spidey.SR1.HyperLinks;
                    }
                    else
                    {
                        BackLinksList = new ArrayList();
                        BackLinksList.Add(Dname);
                    }
                }
                else
                {
                    BackLinksList = new ArrayList();
                    BackLinksList.Add(Dname);
                }



                LinkRaker LR = new LinkRaker();
                LR.lb_Proxy = true; // While at work . . .
                LR.RunAlexa(Dname);

                LR.RunCompete(Dname);

                //Run against Quantcast
                LR.RunQuantcast(Dname);

                DateTime StopTime = DateTime.Now;
                TimeSpan timeDif  = StopTime - startTime;
                double   procTime = timeDif.TotalSeconds;

                bool blrtn = CycleTime("Parent Domain", Dname, (float)procTime);

                using (SqlConnection conn = new SqlConnection())
                {
                    conn.ConnectionString = Properties.Settings.Default.ConnStr;
                    conn.Open();
                    SqlCommand cmd = new SqlCommand();
                    cmd.CommandText = "INSERT INTO TrafficRoutine4"
                                      + " (traf4_traf3_seqno, traf4_domname, traf4_bl_domainname, traf4_compete_esttraffic, traf4_compete_perdiff,"
                                      + " traf4_quantcast_esttraffic, traf4_alexa_tr, traf4_alexa_reach, traf4_alexa_search, traf4_alexa_country1,"
                                      + " traf4_alexa_country1_per, traf4_alexa_country2, traf4_alexa_country2_per, traf4_alexa_country3, traf4_alexa_country3_per, traf4_procTime)"
                                      + " VALUES (@seqno, @domname, @bl_domname, @compete_esttraf, @compete_perdiff, @quantcast_esttraf, @alexa_tr,"
                                      + " @alexa_reach, @alexa_search, @alexa_ctry1, @alexa_ctry1_per, @alexa_ctry2, @alexa_ctry2_per, @alexa_ctry3, @alexa_ctry3_per, @timedif)";
                    cmd.Parameters.AddWithValue("@seqno", traf3seqno);
                    cmd.Parameters.AddWithValue("@domname", Dname);
                    cmd.Parameters.AddWithValue("@bl_domname", "");
                    cmd.Parameters.AddWithValue("@compete_esttraf", LR.CmptRec.CmptMonth1);
                    cmd.Parameters.AddWithValue("@compete_perdiff", 0);
                    cmd.Parameters.AddWithValue("@quantcast_esttraf", LR.dnRec.QuantcastCtr);
                    cmd.Parameters.AddWithValue("@alexa_tr", LR.AlxRec.AlxHighRank);
                    cmd.Parameters.AddWithValue("@alexa_reach", LR.AlxRec.AlxHighReach);
                    cmd.Parameters.AddWithValue("@alexa_search", LR.AlxRec.AlxHighSearch);
                    cmd.Parameters.AddWithValue("@alexa_ctry1", (LR.AlxRec.AlxCountry1.ToString() == null ? "" : LR.AlxRec.AlxCountry1.ToString()));
                    cmd.Parameters.AddWithValue("@alexa_ctry1_per", LR.AlxRec.AlxCntry1_per);
                    cmd.Parameters.AddWithValue("@alexa_ctry2", (LR.AlxRec.AlxCountry2.ToString() == null ? "" : LR.AlxRec.AlxCountry2.ToString()));
                    cmd.Parameters.AddWithValue("@alexa_ctry2_per", LR.AlxRec.AlxCntry2_per);
                    cmd.Parameters.AddWithValue("@alexa_ctry3", (LR.AlxRec.AlxCountry3.ToString() == null ? "" : LR.AlxRec.AlxCountry3.ToString()));
                    cmd.Parameters.AddWithValue("@alexa_ctry3_per", LR.AlxRec.AlxCntry3_per);
                    cmd.Parameters.AddWithValue("@timedif", procTime);
                    cmd.Connection = conn;

                    int rtn = cmd.ExecuteNonQuery();
                }

                DateTime bckLnkStartTime = DateTime.Now;
                int      count           = 0;
                foreach (object ALobj in BackLinksList)
                {
                    if (ALobj.ToString().ToUpper().Contains(Dname.ToUpper()))
                    {
                        continue;
                    }
                    count++;
                    LinkRaker LR1 = new LinkRaker();
                    LR1.lb_Proxy = true; // While at work . . .
                    LR1.RunAlexa(ALobj.ToString());

                    LR1.RunCompete(ALobj.ToString());

                    LR.RunQuantcast(ALobj.ToString());

                    StopTime = DateTime.Now;
                    TimeSpan timeDifA    = StopTime - bckLnkStartTime;
                    double   subProcTime = timeDifA.TotalSeconds;
                    blrtn = CycleTime("Backlink" + count.ToString() + " Domain", Dname, (float)subProcTime);

                    using (SqlConnection conn = new SqlConnection())
                    {
                        conn.ConnectionString = Properties.Settings.Default.ConnStr;
                        conn.Open();
                        SqlCommand cmd = new SqlCommand();
                        cmd.CommandText = "INSERT INTO TrafficRoutine4"
                                          + " (traf4_traf3_seqno, traf4_domname, traf4_bl_domainname, traf4_compete_esttraffic, traf4_compete_perdiff,"
                                          + " traf4_quantcast_esttraffic, traf4_alexa_tr, traf4_alexa_reach, traf4_alexa_search, traf4_alexa_country1,"
                                          + " traf4_alexa_country1_per, traf4_alexa_country2, traf4_alexa_country2_per, traf4_alexa_country3, traf4_alexa_country3_per, traf4_procTime)"
                                          + " VALUES (@seqno, @domname, @bl_domname, @compete_esttraf, @compete_perdiff, @quantcast_esttraf, @alexa_tr,"
                                          + " @alexa_reach, @alexa_search, @alexa_ctry1, @alexa_ctry1_per, @alexa_ctry2, @alexa_ctry2_per, @alexa_ctry3, @alexa_ctry3_per, @timedif)";
                        cmd.Parameters.AddWithValue("@seqno", traf3seqno);
                        cmd.Parameters.AddWithValue("@domname", Dname);
                        cmd.Parameters.AddWithValue("@bl_domname", webAddr(ALobj.ToString()));
                        cmd.Parameters.AddWithValue("@compete_esttraf", LR1.CmptRec.CmptMonth1);
                        cmd.Parameters.AddWithValue("@compete_perdiff", 0);
                        cmd.Parameters.AddWithValue("@quantcast_esttraf", LR1.dnRec.QuantcastCtr);
                        cmd.Parameters.AddWithValue("@alexa_tr", LR1.AlxRec.AlxHighRank);
                        cmd.Parameters.AddWithValue("@alexa_reach", LR1.AlxRec.AlxHighReach);
                        cmd.Parameters.AddWithValue("@alexa_search", LR1.AlxRec.AlxHighSearch);
                        cmd.Parameters.AddWithValue("@alexa_ctry1", (LR1.AlxRec.AlxCountry1.ToString() == null ? "" : LR1.AlxRec.AlxCountry1.ToString()));
                        cmd.Parameters.AddWithValue("@alexa_ctry1_per", LR1.AlxRec.AlxCntry1_per);
                        cmd.Parameters.AddWithValue("@alexa_ctry2", (LR1.AlxRec.AlxCountry2.ToString() == null ? "" : LR1.AlxRec.AlxCountry2.ToString()));
                        cmd.Parameters.AddWithValue("@alexa_ctry2_per", LR1.AlxRec.AlxCntry2_per);
                        cmd.Parameters.AddWithValue("@alexa_ctry3", (LR1.AlxRec.AlxCountry3.ToString() == null ? "" : LR1.AlxRec.AlxCountry3.ToString()));
                        cmd.Parameters.AddWithValue("@alexa_ctry3_per", LR1.AlxRec.AlxCntry3_per);
                        cmd.Parameters.AddWithValue("@timedif", subProcTime);
                        cmd.Connection = conn;

                        bckLnkStartTime = DateTime.Now;

                        int rtn = cmd.ExecuteNonQuery();

                        StopTime    = DateTime.Now;
                        timeDifA    = StopTime - bckLnkStartTime;
                        subProcTime = timeDifA.TotalSeconds;
                        blrtn       = CycleTime("Backlink" + count.ToString() + " UpdateTime", Dname, (float)subProcTime);
                    }

                    LR1 = null;
                }

                LR                 = null;
                Spidey             = null;
                txtBxExecTime.Text = timeDif.TotalSeconds.ToString();
                UpdateDNprocessTime(Dname, timeDif);
                cntr++;
                label2.Text = cntr.ToString();

                StopTime = DateTime.Now;
                timeDif  = StopTime - startTime;

                line      = cntr.ToString() + " - " + Dname + " -- Processing time: " + timeDif.TotalSeconds.ToString();
                timeTotal = timeTotal + timeDif.TotalSeconds;
                timeAvg   = timeTotal / cntr;

                label3.Text = timeAvg.ToString();
                //listBox1.SelectedItem = listBox1.Items.Add(line.ToString());

                Application.DoEvents();
            }

            MessageBox.Show("It is finished! Total: " + cntr.ToString() + " Domains cycled.");
        }