Esempio n. 1
0
        public static string Bind(string url, string conditions, Encoding encode, string starts, string ends)
        {
            string firstPage = url + conditions;
            string keyText   = "";

            try
            {
                WebClient astoWebClient = DataMiningHelper.CreateWebClient();
                astoWebClient.Credentials = CredentialCache.DefaultCredentials;   //获取或设置用于对向Internet资源的请求进行身份验证的网络凭据。
                Byte[] pageData = astoWebClient.DownloadData(firstPage);          //从指定网站下载数据
                string pageHtml = encode.GetString(pageData);                     //获取的网站页面采用的是什么编码格式如:UTF-8
                pageHtml = pageHtml.Trim();                                       //先去掉头部多余的空格
                if (starts.Trim() != "" && ends.Trim() != "")
                {
                    int m = pageHtml.IndexOf(starts.Trim());                        //找出starts的位置
                    if (m == -1)
                    {
                        return("没找到当前指定的START");                              //没有查找到数据,直接返回
                    }
                    string pageText = pageHtml.Remove(0, m);                  //删除starts以上的html文本
                    int    n        = pageText.IndexOf(ends.Trim());          //找出ends的位置
                    keyText = pageText.Remove(n);                             //删除ends以下的html文本
                }
                else
                {
                    keyText = pageHtml;
                }
            }
            catch (WebException webEx)
            {
                throw new Exception(webEx.Message);
            }
            keyText = keyText.Replace("\n", "");
            return(keyText);
        }
Esempio n. 2
0
        public static DataTable GetData(string UrlHtml, string[] Columns, string TbPattern, bool bt)
        {
            System.Data.DataRow dr;
            DataTable           dt = new DataTable();

            for (int i = 0; i < Columns.Length; i++)
            {
                dt.Columns.Add(new System.Data.DataColumn(Columns[i].Trim(), typeof(System.String)));
            }
            string          fileConent    = string.Empty;
            string          tableContent  = string.Empty;
            string          rowContent    = string.Empty;
            string          columnConent  = string.Empty;
            string          rowPatterm    = @"<tr[^>]*>[\s\S]*?<\/tr>";
            string          columnPattern = TbPattern;
            MatchCollection rowCollection = Regex.Matches(UrlHtml, rowPatterm, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //对tr进行筛选

            for (int i = 1; i < rowCollection.Count; i++)
            {
                rowContent = rowCollection[i].Value;
                MatchCollection columnCollection = Regex.Matches(rowContent, columnPattern, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //对td进行筛选
                dr = dt.NewRow();
                for (int j = 0; j < columnCollection.Count; j++)
                {
                    string strWeb = DataMiningHelper.RemoveHtml(columnCollection[j].Value);
                    dr[Columns[j].ToString().Trim()] = strWeb;
                }
                if (columnCollection.Count >= 1)
                {
                    dt.Rows.Add(dr);
                    dt.AcceptChanges();
                }
            }
            return(dt);
        }