public static string Bind(string url, string conditions, Encoding encode, string starts, string ends) { string firstPage = url + conditions; string keyText = ""; try { WebClient astoWebClient = DataMiningHelper.CreateWebClient(); astoWebClient.Credentials = CredentialCache.DefaultCredentials; //获取或设置用于对向Internet资源的请求进行身份验证的网络凭据。 Byte[] pageData = astoWebClient.DownloadData(firstPage); //从指定网站下载数据 string pageHtml = encode.GetString(pageData); //获取的网站页面采用的是什么编码格式如:UTF-8 pageHtml = pageHtml.Trim(); //先去掉头部多余的空格 if (starts.Trim() != "" && ends.Trim() != "") { int m = pageHtml.IndexOf(starts.Trim()); //找出starts的位置 if (m == -1) { return("没找到当前指定的START"); //没有查找到数据,直接返回 } string pageText = pageHtml.Remove(0, m); //删除starts以上的html文本 int n = pageText.IndexOf(ends.Trim()); //找出ends的位置 keyText = pageText.Remove(n); //删除ends以下的html文本 } else { keyText = pageHtml; } } catch (WebException webEx) { throw new Exception(webEx.Message); } keyText = keyText.Replace("\n", ""); return(keyText); }
public static DataTable GetData(string UrlHtml, string[] Columns, string TbPattern, bool bt) { System.Data.DataRow dr; DataTable dt = new DataTable(); for (int i = 0; i < Columns.Length; i++) { dt.Columns.Add(new System.Data.DataColumn(Columns[i].Trim(), typeof(System.String))); } string fileConent = string.Empty; string tableContent = string.Empty; string rowContent = string.Empty; string columnConent = string.Empty; string rowPatterm = @"<tr[^>]*>[\s\S]*?<\/tr>"; string columnPattern = TbPattern; MatchCollection rowCollection = Regex.Matches(UrlHtml, rowPatterm, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //对tr进行筛选 for (int i = 1; i < rowCollection.Count; i++) { rowContent = rowCollection[i].Value; MatchCollection columnCollection = Regex.Matches(rowContent, columnPattern, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //对td进行筛选 dr = dt.NewRow(); for (int j = 0; j < columnCollection.Count; j++) { string strWeb = DataMiningHelper.RemoveHtml(columnCollection[j].Value); dr[Columns[j].ToString().Trim()] = strWeb; } if (columnCollection.Count >= 1) { dt.Rows.Add(dr); dt.AcceptChanges(); } } return(dt); }