/// <summary> /// ��ȡһ��ҳ������ݲ����� /// </summary> /// <param name="pageUri">ҳ���ַ</param> /// <param name="number">ά��һ������,�ж������Ƿ����</param> /// <returns></returns> private DataPack GetPageData(string pageUri, ref int number, DataPackFunc func) { DataPack dp; int bufferLength = 10; byte[] buffer = new byte[bufferLength]; //���ص����ݻ����� StringBuilder sb = new StringBuilder(); //���췵�صĽ�� Match match; //����ƥ�� //ҳ���ַ��ҳ���ַ����ƥ�䣡 if (!Regex.IsMatch(pageUri, this.FormatedPageUriRule)) { ++number; state.FailCount++; return(null); //throw new ArgumentException("ҳ���ַ��ҳ���ַ����ƥ�䣡", pageUri); } //���ҳ���ַΪ���·������������� if (absoluteUriRegex.IsMatch(pageUri)) { pageUri = GetBasePath(pageUri) + pageUri; } //����ҳ���HTML string html = String.Empty; try { HttpWebRequest req = (HttpWebRequest)WebRequest.Create(pageUri); req.Timeout = this.RequestTimeOut; Stream stream = req.GetResponse().GetResponseStream(); html = sb.ToString(); using (StreamReader sr = new StreamReader(stream, this.Encode)) { html = sr.ReadToEnd(); } } catch (Exception exc) { state.FailCount++; return(null); } //������ص����� #if DEBUG Console.WriteLine("\r\n------------------------------\r\n�õ�ƥ����б�����Ϊ:{0}", html); #endif dp = new DataPack(Rules, pageUri); foreach (string propertyName in this.Rules) { match = Regex.Match(html, this.Rules[propertyName]); if (match != null) { dp[propertyName] = match.Groups[1].Value; } } #if DEBUG Console.WriteLine("\r\n-------------------------\r\n"); foreach (KeyValuePair <string, string> pair in dp) { Console.WriteLine("{0}->{1}\r\n", pair.Key, pair.Value); } #endif //���¼��� ++number; #if DEBUG Console.WriteLine("flish"); #endif //ִ�л�ִ���� if (func != null) { func(dp); } //���һ���ɹ��ļ��� state.SuccessCount++; return(dp); }