Esempio n. 1
0
        public void run()
        {
            long         id   = 0;
            Regular_News mreg = new Regular_News();

            while (stopflog)
            {
                string      sql = "select id,WWW from domain where id >" + id + "and id<" + (id + 10);
                data_domain mm  = new data_domain();

                SortedDictionary <long, string> msortWWW = mm.select(sql);
                id = mm.getlastid();
                mm.close();
                if (msortWWW == null)
                {
                    continue;
                }

                foreach (KeyValuePair <long, string> mk in msortWWW)
                {
                    string html = this.mfind.read_utf8(mk.Value);


                    if (html == null)
                    {
                        continue;
                    }
                    SortedDictionary <string, string> ml = this.mreg.getWWW(mk.Value, html, this.mreg.text_all);
                    if (ml == null && ml.Count == 0)
                    {
                        continue;
                    }

                    string title = mreg.gettitle();//获取网页的头

                    //保存到数据库

                    foreach (KeyValuePair <string, string> mkey in ml)
                    {
                        string html1 = this.mfind.read_utf8(mkey.Value);


                        if (html == null)
                        {
                            continue;
                        }
                        SortedDictionary <string, string> mll = this.mreg.getWWW(mkey.Value, html1, this.mreg.text_all);
                        foreach (KeyValuePair <string, string> kk in mll)
                        {
                            ml.Add(kk.Key, kk.Value);
                        }
                        mll = null;
                        //处理文章和网页


                        ml.Remove(mkey.Key);
                    }
                }
            }
        }
Esempio n. 2
0
        //验证是否重复
        public static bool checkwww(string www)
        {
            data_domain k = new data_domain();

            double h = k.readid(www);

            if (h == 0)
            {
                return(true);
            }
            else
            {
                return(false);
            }
        }
Esempio n. 3
0
        /*
         * public void run() {
         *
         *  bool folg=true;
         *  string tempw="http://www.qq.com/";
         * // string www = this.mdata.readone();
         *   string url1 = "";
         *  string data = "";
         *  SortedDictionary<string, string> mm = new SortedDictionary<string, string>();
         *  while(stopflog){
         *
         *      data_domain mdata = new data_domain();
         *      mdata.chagedatabase("linbei_spider");
         *      url1 = html_string.is_domain(tempw);
         *
         *
         *      int o=0;
         *      foreach (KeyValuePair<string, string> k in mm) {
         *
         *          Console.Write("正在保存数据{0}>>{1}\n",k.Key,url1);
         *          bool f=false;
         *         f =  mdata.insert(k.Key,o);
         *
         *         if (!f) {
         *             Console.Write("存储失败");
         *
         *         }
         *          o++;
         *
         *          if (folg)
         *          {
         *               string temp_www = html_string.is_domain(k.Key);
         *             if(url1 != temp_www){
         *
         *                  tempw = k.Key;
         *                  folg = false;
         *              }
         *
         *          }
         *
         *      }
         *
         *
         *      if (folg)
         *      {
         *
         *          tempw = mdata.last();
         *
         *          if (tempw == null) {
         *              tempw = "http://www.hao123.com/";
         *          }
         *          tempw = tempw.Trim();
         *      }
         *      else {
         *
         *          folg = true;
         *      }
         *      data = mfind.read_utf8(tempw);
         *      if (data == null) {
         *
         *          continue;
         *      }
         *      mdata.close();
         *      mm.Clear();
         *  mm = mreg.getWWW(tempw, data, this.mreg.text_4mu);
         *  }
         *
         *
         *
         *
         *
         * }*/
        public void run()
        {
            string[] mu = new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" };

            string[] domains = new string[] {
                "com", "cn", "com.cn", "net", "gov.cn", "gov", "edu.cn", "mil", "cc", "org", "win", "bid", "pro"
            };
            data_domain mdata = new data_domain();

            mdata.chagedatabase("linbei_spider");

            int[] mint = new int[8] {
                0, -1, -1, -1, -1, -1, -1, -1
            };
            string url = "";

            while (true)
            {
                if (mint[7] >= 35)
                {
                    break;
                }

                for (int i = 0; i < 7; i++)
                {
                    if (mint[i] > 35)
                    {
                        if (mint[i + 1] == -1)
                        {
                            mint[i + 1] = 0;
                        }
                        mint[i + 1]++;
                        mint[i] = 0;
                    }
                }
                Console.Write("" + mint[0] + mint[1] + mint[2] + mint[3] + mint[4] + mint[5] + mint[6] + mint[7] + "\n");
                for (int a = 0; a < 7; a++)
                {
                    if (mint[a] == -1)
                    {
                        break;
                    }
                    url = url + mu[mint[a]];
                }
                //int state = 1;
                for (int b = 0; b < 13; b++)
                {
                    string urls = "www." + url + "." + domains[b];
                    bool   f    = mdata.insert(urls, b);

                    if (!f)
                    {
                        Console.Write("存储失败");
                    }

/*
 *                  Console.Write(urls + "\n");
 *                  IPHostEntry hostInfo = null;
 *                  try
 *                  {
 *                      hostInfo = Dns.GetHostByName(urls);
 *
 *
 *                  }
 *                  catch (Exception e)
 *                  {
 *
 *                      state = 0;
 *                  }
 *                  if (hostInfo != null)
 *                  {
 *                      string ip = hostInfo.AddressList.ElementAt(0).ToString();
 *                      Console.Write(ip + "\n");
 *                  }
 *                  else
 *                  {
 *
 *                      state = 0;
 *                      Console.Write("不存在" + "\n");
 *                  }*/
                }



                url = "";
                mint[0]++;
            }

            mdata.close();
        }