Example #1
0
        //public List<string> GetAllMailbox(List<DeepWebPage> deepWebPages)
        //{
        //    List<string> Mailboxes = new List<string>();
        //    foreach (var dwp in deepWebPages)
        //    {

        //    }
        //    return Mailboxes;
        //}
        public List <string> GetSingleMailbox(DeepWebPage deepWebPage)
        {
            List <string> smailboxes  = new List <string>();
            WebClient     MyWebClient = new WebClient();
            string        htmlCode    = "";



            MyWebClient.Credentials = CredentialCache.DefaultCredentials;   //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
            Byte[] pageData = MyWebClient.DownloadData(deepWebPage.webUrl); //从指定网站下载数据
            if (IsUtf8(pageData))
            {
                string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
                htmlCode = pageHtml;
            }
            else
            {
                string pageHtml = Encoding.Default.GetString(pageData);  //如果获取网站页面采用的是GB2312,则使用这句
                htmlCode = pageHtml;
            }
            /*暂时认为一个页面只有一个电子邮箱*/
            string key1 = "<li>电子邮箱:<span><strong>";//寻找url的第一个键
            int    pos  = htmlCode.IndexOf(key1);
            string key2 = "user-name";
            int    pos1 = htmlCode.IndexOf(key2);

            smailboxes.Add(htmlCode.Substring(pos1 + 11, htmlCode.Remove(0, pos1 + 11).IndexOf("</h5>")) + "\t" + htmlCode.Substring(pos + 23, htmlCode.Remove(0, pos + 23).IndexOf("</strong>")) + "\n");//24可修改
            return(smailboxes);
        }
Example #2
0
        public List <DeepWebPage> getSonPage(String url)
        {
            List <DeepWebPage> deepWebPages = new List <DeepWebPage>();

            string    htmlCode    = "";
            WebClient MyWebClient = new WebClient();

            MyWebClient.Credentials = CredentialCache.DefaultCredentials; //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
            Byte[] pageData = MyWebClient.DownloadData(url);              //从指定网站下载数据
            if (IsUtf8(pageData))
            {
                string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
                htmlCode = pageHtml;
            }
            else
            {
                string pageHtml = Encoding.Default.GetString(pageData);  //如果获取网站页面采用的是GB2312,则使用这句
                htmlCode = pageHtml;
            }
            string      key1 = "http://pdc.hzau.edu.cn/jgfw/desktop/";//寻找url的第一个键
            int         pos  = htmlCode.IndexOf(key1);
            IList <int> list = new List <int>();

            while (pos > -1)
            {
                list.Add(pos);
                pos += key1.Length;
                if (pos >= htmlCode.Length)
                {
                    break;
                }
                pos = htmlCode.IndexOf(key1, pos);
            }
            foreach (var item in list)
            {
                DeepWebPage deepWebPage = new DeepWebPage();
                deepWebPage.webUrl = htmlCode.Substring(item, 66);//截取网址长度可以配
                deepWebPages.Add(deepWebPage);
                //var l = htmlCode.Remove(0, item).IndexOf("\"");
                //surfaceWebPage.sonUrls.Add(htmlCode.Substring(item, 66));
                //Console.WriteLine(htmlCode.Substring(item, item + 10).Length);
            }
            //foreach (var item in surfaceWebPage.sonUrls)
            //{
            //    Console.WriteLine(item);
            //}
            return(deepWebPages);
        }