public cExport(ContainerControl sender, Delegate senderDelegate, cGlobalParas.PublishType pType,string FileName,System.Data.DataTable pData ) { m_sender = sender; m_senderDelegate = senderDelegate; m_pType =pType ; m_FileName = FileName; m_pData = pData; }
public void WriteLog(string TaskName,cGlobalParas.LogType LogType, string strLog) { string FileName = Program.getPrjPath() + "Log\\" + TaskName + DateTime.Now.Year; if (DateTime.Now.Month.ToString().Length == 1) FileName = FileName + "0" + DateTime.Now.Month.ToString(); else FileName = FileName + DateTime.Now.Month.ToString(); if (DateTime.Now.Day.ToString().Length == 1) FileName = FileName + "0" + DateTime.Now.Day.ToString(); else FileName = FileName + DateTime.Now.Day.ToString(); FileName = FileName + ".txt"; string Log=""; if (!Directory.Exists(Path.GetDirectoryName(FileName))) Directory.CreateDirectory(Path.GetDirectoryName(FileName)); FileStream myStream = File.Open(FileName, FileMode.Append, FileAccess.Write, FileShare.Write); StreamWriter sw = new StreamWriter(myStream, System.Text.Encoding.GetEncoding("gb2312")); if (LogType ==cGlobalParas.LogType.GatherError ) { Log =TaskName + "\t" + DateTime.Now .ToString () + "\t" + "�ɼ�����" + "\t" + strLog ; sw.WriteLine(Log); } else if (LogType ==cGlobalParas.LogType.PublishError ) { Log =TaskName + "\t" + DateTime.Now .ToString () + "\t" + "��������" + "\t" + strLog ; sw.WriteLine(Log); } sw.Close(); myStream.Close(); }
public cRunTaskEventArgs(cGlobalParas.MessageType MessType ,string RunName ,string RunPara) { base.MessType = MessType; m_RunName = RunName; m_RunPara = RunPara; }
public cListenInitializedEventArgs(cGlobalParas.MessageType MessType) { m_MessType = MessType; }
/// <summary> /// /// </summary> /// <param name="old_state">�ɵ�״̬</param> /// <param name="new_statue">�µ�״̬</param> public TaskStateChangedEventArgs(Int64 TaskID, cGlobalParas.TaskState oldState, cGlobalParas.TaskState newState) { //m_TaskID = TaskID; base.TaskID = TaskID; m_OldState = oldState; m_NewState = newState; }
public cGatherUrlCountArgs(Int64 TaskID, cGlobalParas.UpdateUrlCountType uType, int TrueUrlCount) { //m_TaskID = TaskID; base.TaskID = TaskID; m_TrueUrlCount = TrueUrlCount; m_UType = uType; }
//����һ��ͨѶ�Ľӿڷ����������ɼ�����Ĵ����������Ҫ�ɼ�����ҳ�����ô˷��� //�ɴ˷�������cGatherWeb.GetGatherData�����η�����Ŀ����Ϊ�˿��Դ���������� private DataTable GetGatherData(string Url, cGlobalParas.WebCode webCode, string cookie, string startPos, string endPos, string sPath, bool IsAjax) { cGatherWeb gWeb = new cGatherWeb(); gWeb.CutFlag = m_TaskSplitData.CutFlag; DataTable tmpData ; int AgainTime = 0; GatherAgain: try { tmpData = gWeb.GetGatherData(Url, m_WebCode, m_Cookie, m_gStartPos, m_gEndPos, m_SavePath, IsAjax); } catch (System.Exception ex) { AgainTime++; if (AgainTime > m_AgainNumber) { if (m_IsErrorLog == true) { //���������־ } throw ex; } else { if (m_Ignore404 == true && ex.Message.Contains ("404")) { if (m_IsErrorLog == true) { //���������־ } throw ex; } else { e_Log(this, new cGatherTaskLogArgs(m_TaskID, ((int)cGlobalParas.LogType.Error).ToString() + "��ַ��" + Url + "���ʷ������������Ϣ��" + ex.Message + "���ȴ�3������\n", this.IsErrorLog)); Thread.Sleep(3000); e_Log(this, new cGatherTaskLogArgs(m_TaskID, ((int)cGlobalParas.LogType.Warning).ToString() + Url + "���ڽ��е�" + AgainTime + "������\n", this.IsErrorLog)); //�������� goto GatherAgain; } } } return tmpData; }
//���ݵ�������ȡ��ҳ��ַ����һ������ public List<string> GetUrlsByRule(string Url, string UrlRule,cGlobalParas.WebCode webCode, string cookie) { string Url1; List<string> Urls=new List<string> (); if (UrlRule.Trim() == "") { Urls.Add(Url); return Urls; } //�ж���ַ�Ƿ���ڲ�����������ڲ�����ȡ����һ��������ַ if (Regex.IsMatch(Url, "{.*}")) { List<string> Urls1 = SplitWebUrl(Url ); //,IsUrlEncode ,UrlEncode Url1 = Urls1[0].ToString(); } else { Url1 = Url; } //������ַ��Դ�룬��������ȡ������ȡ��������ַ //string UrlSource= cTool.GetHtmlSource(Url1,true ); cGatherWeb gW = new cGatherWeb(); string UrlSource = gW.GetHtml(Url1, webCode, cookie, "", "", true, false); gW = null; if (UrlSource == "") { return null ; } //string Rule=@"(?<=href=[\W])" + cTool.RegexReplaceTrans(UrlRule) + @"(\S[^'"">]*)(?=[\s'""])"; string Rule = ""; if (UrlRule.StartsWith("<Regex:")) { Rule = @"(?<=[href=|src=|open(][\W])"; //����ǰ string strPre = UrlRule.Substring(UrlRule.IndexOf("<Regex:")+7, UrlRule.IndexOf(">")-7); Rule += strPre; //�����м����� string cma=@"(?<=<Common:)\S+?(?=>)"; Regex cmas = new Regex(cma, RegexOptions.IgnoreCase | RegexOptions.Multiline); MatchCollection cs = cmas.Matches(UrlRule); foreach (Match ma in cs) { Rule +=@"(\S*)" + ma.Value.ToString (); } //����� if (Regex.IsMatch(UrlRule, "<End:")) { string s = UrlRule.Substring(UrlRule.IndexOf("<End:") + 5, UrlRule.Length - UrlRule.IndexOf("<End:") - 6); Rule += @"(\S*)" + s; } else { Rule += @"(\S[^'"">]*)(?=[\s'""])"; } } else { Rule = @"(?<=[href=|src=|open(][\W])" + cTool.RegexReplaceTrans(UrlRule) + @"(\S[^'"">]*)(?=[\s'""])"; } Regex re = new Regex(Rule, RegexOptions.IgnoreCase | RegexOptions.Multiline); MatchCollection aa = re.Matches(UrlSource); DataTable d1 = new DataTable(); d1.Columns.Add("Name"); foreach (Match ma in aa) { //Urls.Add(ma.Value.ToString()); d1.Rows.Add(ma.Value.ToString()); } //����ʱ���ܻ��ȡ�ظ���ַ���б��������Ҫȥ�� //ȥ���ظ��� string[] strComuns = new string[d1.Columns.Count]; for (int m = 0; m < d1.Columns.Count; m++) { strComuns[m] = d1.Columns[m].ColumnName; } DataView dv = new DataView(d1); DataTable d2 = dv.ToTable(true, strComuns); for (int i = 0; i < d2.Rows.Count; i++) { if (string.Compare(d2.Rows[i][0].ToString ().Substring (0,4), "http", true) != 0) { string PreUrl = Url; if (d2.Rows[i][0].ToString().Substring(0, 1) == "/") { PreUrl = PreUrl.Substring(7, PreUrl.Length - 7); PreUrl = PreUrl.Substring(0, PreUrl.IndexOf("/")); PreUrl = "http://" + PreUrl; } else { Match a = Regex.Match(PreUrl, ".*/"); PreUrl = a.Groups[0].Value.ToString(); } Urls.Add(PreUrl + d2.Rows[i][0].ToString()); } else { Urls.Add(d2.Rows[i][0].ToString()); } } return Urls; }
public void InsertOnceLog(cGlobalParas.LogType lType, string PlanID, string PlanName, cGlobalParas.RunTaskType rType, string FileName, string Para) { if (!IsExist()) NewLogFile(); cXmlIO xmlconfig = new cXmlIO(Program.getPrjPath() + "tasks\\plan\\RunLog.xml"); string strXml = "<LogType>" + lType + "</LogType>" + "<PlanID>" + PlanID + "</PlanID>" + "<PlanName>" + PlanName + "</PlanName>" + "<FileName>" + FileName + "</FileName>" + "<FilePara>" + Para + "</FilePara>" + "<TaskType>" + rType + "</TaskType>" + "<RunTime>" + DateTime.Now.ToString() + "</RunTime>"; xmlconfig.InsertElement("Logs", "Log", strXml); xmlconfig.Save(); xmlconfig = null; }
///����ָ���ĵ����������ҳ�浼������1.6�汾�У������˶�㵼���Ĺ��� ///��ַ����������һ�Զ�Ĺ�ϵ����ÿһ����ĵ�����������һ�ԶࣨҲ����һ��һ�Ĺ�ϵ�� ///�ڴ������Ǽ������������صĶ������յ���Ҫ�ɼ����ݵ���ַ ///��Ϊ�Ƕ�㵼�������������ڵݹ��һ���㷨 ///������ַ�صĶ��DZ���ַ��������������ַ����� public List<string> ParseUrlRule(string Url, List<cNavigRule> nRules,cGlobalParas.WebCode webCode, string cookie) { List<string> pUrls = new List<string>(); List<string> Urls = new List<string>(); pUrls.Add(Url); //��һ�㵼���ֽⶼ�Ǵ�һ����һ��ַ���У�֮���� //ѡ�ϣ���Ϊ��ͳһ���ýӿڲ��� try { Urls = PUrlRule(pUrls, 1, nRules, webCode, cookie); } catch (System.Exception ex) { //����ʧ�ܣ��������������� return null; } return Urls; }
/// <summary> /// �ɼ���ҳ���� /// </summary> /// <param name="Url">��ҳ��ַ</param> /// <param name="StartPos">��ʼ�ɼ�λ��</param> /// <param name="EndPos">��ֹ�ɼ�λ��</param> /// <returns></returns> public DataTable GetGatherData(string Url,cGlobalParas.WebCode webCode, string cookie, string startPos,string endPos,string sPath,bool IsAjax) { tempData = new DataTable("tempData"); int i ; int j; string strCut=""; bool IsDownloadFile = false; #region ������ṹ����������ȡ���� //����ҳ���ȡ�ı�־������ṹ for (i = 0; i < this.CutFlag.Count; i++) { tempData.Columns.Add(new DataColumn(this.CutFlag[i].Title, typeof(string))); if (this.CutFlag[i].DataType !=(int) cGlobalParas.GDataType.Txt && IsDownloadFile ==false) { IsDownloadFile = true; } } //�����û�ָ����ҳ���ȡλ�ù���������ʽ for (i = 0; i < this.CutFlag.Count; i++) { strCut += "(?<" + this.CutFlag[i].Title + ">" + cTool.RegexReplaceTrans(this.CutFlag[i].StartPos) + ")"; //strCut += "(?<=" + cTool.RegexReplaceTrans(this.CutFlag[i].StartPos) + ")"; switch (this.CutFlag[i].LimitSign ) { case (int)cGlobalParas.LimitSign.NoLimit : strCut += ".*?"; break; case (int)cGlobalParas.LimitSign.NoWebSign: strCut += "[^<>]*?"; break; case (int)cGlobalParas.LimitSign.OnlyCN: strCut += "[\\u4e00-\\u9fa5]*?"; break; case (int)cGlobalParas.LimitSign.OnlyDoubleByte: strCut += "[^\\x00-\\xff]*?"; break; case (int)cGlobalParas.LimitSign.OnlyNumber: strCut += "[\\d]*?"; break; case (int)cGlobalParas.LimitSign.OnlyChar: strCut += "[\\x00-\\xff]*?"; break; case (int)cGlobalParas.LimitSign.Custom: //strCut += cTool.RegexReplaceTrans(this.CutFlag[i].RegionExpression.ToString()); strCut += this.CutFlag[i].RegionExpression.ToString(); break; default: strCut += "[\\S\\s]*?"; break; } strCut += "(?=" + cTool.RegexReplaceTrans(this.CutFlag[i].EndPos) + ")|"; } #endregion #region ��ȡ��ҳԴ�� int rowCount = this.CutFlag.Count; //ȥ�����һ����|�� strCut = strCut.Substring(0, strCut.Length - 1); //��ȡ��ҳ��Ϣ //�жϴ����Url�Ƿ���ȷ���������ȷ���ؿ����� if (Regex.IsMatch(Url, "[\"\\s]")) { Match aa = Regex.Match(Url, "[\"\\s]"); tempData = null; return tempData; } try { GetHtml(Url, webCode, cookie, startPos, endPos, true, IsAjax); } catch (System.Web.HttpException ex) { throw ex; } #endregion //��ʼ��ȡ��ȡ���� Regex re = new Regex(@strCut, RegexOptions.IgnoreCase | RegexOptions.Multiline ); MatchCollection mc = re.Matches(this.WebpageSource); if (mc.Count == 0) { tempData = null; return tempData; } DataRow drNew=null ; i = 0; #region ��ʼ�����ȡ�ַ�����ƴ��һ���� //��ʼ���ݲɼ������ݹ������ݱ������� //�ڴ���Ҫ����ɼ������п��ܴ��е����� //���汻ע�͵Ĵ���������������ݱ�Ĵ��룬������ִ������� //Match ma; int rows = 0; //ͳ�ƹ��ɼ��˶����� int m = 0; //����ʹ�� try { while (m < mc.Count) { //�½����� drNew = tempData.NewRow(); rows++; for (i = 0; i < this.CutFlag.Count; i++) { if (m < mc.Count) { if (i == 0) { while (!mc[m].Value.StartsWith(this.CutFlag[i].StartPos, StringComparison.CurrentCultureIgnoreCase)) { m++; if (m >= mc.Count) { //�˳�����ѭ�� goto ExitWhile; } } drNew[i] = mc[m].Value.Substring(this.CutFlag[i].StartPos.Length, mc[m].Value.Length - this.CutFlag[i].StartPos.Length); m++; } else { if (mc[m].Value.StartsWith(this.CutFlag[i].StartPos, StringComparison.CurrentCultureIgnoreCase)) { drNew[i] = mc[m].Value.Substring(this.CutFlag[i].StartPos.Length, mc[m].Value.Length - this.CutFlag[i].StartPos.Length); m++; } else { if (mc[m].Value.StartsWith(this.CutFlag[i - 1].StartPos, StringComparison.CurrentCultureIgnoreCase)) { m++; i--; } else { if (i < this.CutFlag.Count - 1) { if (mc[m].Value.StartsWith(this.CutFlag[i + 1].StartPos, StringComparison.CurrentCultureIgnoreCase)) { } else { m++; i--; } } else { m++; i--; } //���ɼ�ʱ������ȱ�ٲɼ����ݣ����ô˷������вɼ����ݲ��� //drNew[i] = ""; //continue; } } } } } tempData.Rows.Add(drNew); drNew = null; } } catch (System.Exception ex) { throw ex; } #endregion #region ��ʼ����������ƣ����л�ȡ���ݼӹ� ExitWhile: //�ڴ��ж��Ƿ���Ҫ�����ʱ�������ݵ�����,��������汾1.2������������������� for (i = 0; i < this.CutFlag.Count; i++) { switch (this.CutFlag[i].ExportLimit) { case (int)cGlobalParas.ExportLimit.ExportNoLimit : break; case (int)cGlobalParas.ExportLimit.ExportNoWebSign: for (int index = 0; index < tempData.Rows.Count; index++) { tempData.Rows[index][i] = getTxt(tempData.Rows[index][i].ToString()); } break; case (int)cGlobalParas.ExportLimit.ExportPrefix: for (int index = 0; index < tempData.Rows.Count; index++) { tempData.Rows[index][i] =this.CutFlag[i].ExportExpression + tempData.Rows[index][i].ToString(); } break; case (int)cGlobalParas.ExportLimit.ExportReplace: for (int index = 0; index < tempData.Rows.Count; index++) { string oStr=this.CutFlag[i].ExportExpression .Substring (1,this.CutFlag[i].ExportExpression.IndexOf (",")-2); string nStr = this.CutFlag[i].ExportExpression.Substring(this.CutFlag[i].ExportExpression.IndexOf(",") + 2, this.CutFlag[i].ExportExpression.Length - this.CutFlag[i].ExportExpression.IndexOf(",") - 3); tempData.Rows[index][i] = tempData.Rows[index][i].ToString().Replace(oStr,nStr ); } break; case (int)cGlobalParas.ExportLimit.ExportSuffix: for (int index = 0; index < tempData.Rows.Count; index++) { tempData.Rows[index][i] = tempData.Rows[index][i].ToString() + this.CutFlag[i].ExportExpression; } break; case (int)cGlobalParas.ExportLimit.ExportTrimLeft: for (int index = 0; index < tempData.Rows.Count; index++) { int len = tempData.Rows[index][i].ToString().Length; int lefti = int.Parse(this.CutFlag[i].ExportExpression.ToString()); if (tempData.Rows[index][i].ToString().Length > lefti) { tempData.Rows[index][i] = tempData.Rows[index][i].ToString().Substring(lefti, len - lefti); } } break; case (int)cGlobalParas.ExportLimit.ExportTrimRight: for (int index = 0; index < tempData.Rows.Count; index++) { int len = tempData.Rows[index][i].ToString().Length; int righti = int.Parse(this.CutFlag[i].ExportExpression.ToString()); if (tempData.Rows[index][i].ToString().Length > righti) { tempData.Rows[index][i] = tempData.Rows[index][i].ToString().Substring(0, len - righti); } } break; case (int)cGlobalParas.ExportLimit.ExportTrim : for (int index = 0; index < tempData.Rows.Count; index++) { tempData.Rows[index][i] = tempData.Rows[index][i].ToString().Trim(); } break; case (int)cGlobalParas.ExportLimit.ExportRegexReplace : for (int index = 0; index < tempData.Rows.Count; index++) { //string oStr=cTool.RegexReplaceTrans( this.CutFlag[i].ExportExpression .Substring (1,this.CutFlag[i].ExportExpression.IndexOf (",")-2)); //string nStr = this.CutFlag[i].ExportExpression.Substring(this.CutFlag[i].ExportExpression.IndexOf(",") + 2, this.CutFlag[i].ExportExpression.Length - this.CutFlag[i].ExportExpression.IndexOf(",") - 3); //tempData.Rows[index][i] = Regex.Replace(tempData.Rows[index][i].ToString(), oStr, nStr, RegexOptions.IgnoreCase | RegexOptions.Multiline); string oStr = this.CutFlag[i].ExportExpression.Substring(1, this.CutFlag[i].ExportExpression.IndexOf(",") - 2); string nStr = this.CutFlag[i].ExportExpression.Substring(this.CutFlag[i].ExportExpression.IndexOf(",") + 2, this.CutFlag[i].ExportExpression.Length - this.CutFlag[i].ExportExpression.IndexOf(",") - 3); tempData.Rows[index][i] = Regex.Replace(tempData.Rows[index][i].ToString(), oStr, nStr, RegexOptions.IgnoreCase | RegexOptions.Multiline); } break; default : break; } } #endregion #region ��Բɼ���Ҫ�����ļ����ֶν����ļ����ش��� //�ж��Ƿ�����������ļ�����������У���ʼ���أ���Ϊ�˹���������������ͼƬʹ�� //������ר�õ����ع��ߣ����Զ����ش����û�е��������̴߳��� try { if (IsDownloadFile == true) { if (sPath == "") { sPath = Program.getPrjPath() + "data\\tem_file"; } if (!Directory.Exists(sPath)) { Directory.CreateDirectory(sPath); } string FileUrl = ""; string DownloadFileName = ""; for (i = 0; i < rows; i++) { for (j = 0; j < this.CutFlag.Count; j++) { if (this.CutFlag[j].DataType != (int)cGlobalParas.GDataType.Txt) { FileUrl = tempData.Rows[i][j].ToString(); //��ʼ��ȡ�����ļ����� Regex s = new Regex(@"(?<=/)[^/]*", RegexOptions.IgnoreCase | RegexOptions.Multiline); MatchCollection urlstr = s.Matches(FileUrl); if (urlstr.Count == 0) DownloadFileName = FileUrl; else DownloadFileName = urlstr[urlstr.Count - 1].ToString(); DownloadFileName = sPath + "\\" + DownloadFileName; if (string.Compare ( FileUrl.Substring(0, 4) , "http",true )==0) { DownloadFile(FileUrl, DownloadFileName); } else { if (FileUrl.Substring(0, 1) == "/") { Url = Url.Substring(7, Url.Length - 7); Url = FileUrl.Substring(0, Url.IndexOf("/")); Url = "http://" + Url; FileUrl = Url + FileUrl; } else if (FileUrl.IndexOf("/") <= 0) { Url = Url.Substring(0, Url.LastIndexOf("/") + 1); FileUrl = Url + FileUrl; } else { Url = Url.Substring(0, Url.LastIndexOf("/") + 1); FileUrl = Url + FileUrl; } DownloadFile(FileUrl, DownloadFileName); } } } } } } catch (System.Exception ex) { throw ex; } #endregion return tempData; }
/// <summary> /// ��ȡָ����ַԴ�� /// </summary> /// <param name="url">��ַ</param> /// <param name="webCode">��ҳ����</param> /// <param name="cookie">��ҳcookie</param> /// <param name="startPos">��ȡ��ҳԴ�����ʼλ��</param> /// <param name="endPos">��ȡ��ҳԴ�����ֹλ��</param> /// <param name="IsCutnr">�Ƿ��ȡ�س����з���Ĭ��Ϊtrue����ȡ</param> /// <returns></returns> public string GetHtml(string url, cGlobalParas.WebCode webCode, string cookie, string startPos, string endPos,bool IsCutnr,bool IsAjax) { //�ж���ҳ���� Encoding wCode; string PostPara = ""; CookieContainer CookieCon = new CookieContainer(); HttpWebRequest wReq ; if (Regex.IsMatch(url, @"<POST>.*</POST>", RegexOptions.IgnoreCase)) { wReq = (HttpWebRequest)WebRequest.Create(@url.Substring (0,url.IndexOf ("<POST>"))); } else { wReq = (HttpWebRequest)WebRequest.Create(@url ); } wReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50215;)"; //wReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); //wReq.Headers.Add("Accept-Encoding", "gzip, deflate"); //wReq.SendChunked = true; //wReq.TransferEncoding = "utf-8"; Match a = Regex.Match(url, @"(http://).[^/]*[?=/]", RegexOptions.IgnoreCase); string url1 = a.Groups[0].Value.ToString(); wReq.Referer = url1; //�ж��Ƿ���cookie if (cookie != "") { CookieCollection cl = new CookieCollection(); //foreach (string sc in cookie.Split(';')) //{ // string ss = sc.Trim(); // cl.Add(new Cookie(ss.Split('=')[0].Trim(), ss.Split('=')[1].Trim(), "/")); //} foreach (string sc in cookie.Split(';')) { string ss = sc.Trim(); if (ss.IndexOf("&") > 0) { foreach (string s1 in ss.Split('&')) { string s2 = s1.Trim(); string s4 = s2.Substring(s2.IndexOf("=")+1, s2.Length - s2.IndexOf("=")-1); cl.Add(new Cookie(s2.Split('=')[0].Trim(), s4, "/")); } } else { string s3 = sc.Trim(); cl.Add(new Cookie(s3.Split('=')[0].Trim(), s3.Split('=')[1].Trim(), "/")); } } CookieCon.Add(new Uri(url), cl); wReq.CookieContainer = CookieCon; } //�ж��Ƿ���POST���� if (Regex.IsMatch(url, @"(?<=<POST>)[\S\s]*(?=</POST>)", RegexOptions.IgnoreCase)) { Match s = Regex.Match(url, @"(?<=<POST>).*(?=</POST>)", RegexOptions.IgnoreCase); PostPara = s.Groups[0].Value.ToString(); byte[] pPara = Encoding.ASCII.GetBytes(PostPara); wReq.ContentType = "application/x-www-form-urlencoded"; wReq.ContentLength = pPara.Length; wReq.Method = "POST"; System.IO.Stream reqStream = wReq.GetRequestStream(); reqStream.Write(pPara, 0, pPara.Length); reqStream.Close(); } else { wReq.Method = "GET"; } //����ҳ�泬ʱʱ��Ϊ12�� wReq.Timeout = 12000; HttpWebResponse wResp = (HttpWebResponse)wReq.GetResponse(); System.IO.Stream respStream = wResp.GetResponseStream(); string strWebData = ""; switch (webCode) { case cGlobalParas.WebCode.auto: try { wCode = Encoding.Default; string cType = wResp.ContentType.ToLower(); Match charSetMatch = Regex.Match(cType, "(?<=charset=)([^<]*)*", RegexOptions.IgnoreCase | RegexOptions.Multiline); string webCharSet = charSetMatch.ToString(); wCode = System.Text.Encoding.GetEncoding(webCharSet); } catch { wCode = Encoding.Default; } break; case cGlobalParas.WebCode.gb2312: wCode = Encoding.GetEncoding("gb2312"); break; case cGlobalParas.WebCode.gbk: wCode = Encoding.GetEncoding("gbk"); break; case cGlobalParas.WebCode.utf8: wCode = Encoding.UTF8; break; default: wCode = Encoding.UTF8; break; } if (wResp.ContentEncoding == "gzip") { GZipStream myGZip = new GZipStream(respStream, CompressionMode.Decompress); System.IO.StreamReader reader; reader = new System.IO.StreamReader(myGZip, wCode); strWebData = reader.ReadToEnd(); reader.Close(); reader.Dispose(); } else { System.IO.StreamReader reader; reader = new System.IO.StreamReader(respStream, wCode); strWebData = reader.ReadToEnd(); reader.Close(); reader.Dispose(); } //ȥ���س����з��� if (IsCutnr == true) { strWebData = Regex.Replace(strWebData, "([\\r\\n])[\\s]+", "", RegexOptions.IgnoreCase | RegexOptions.Multiline); strWebData = Regex.Replace(strWebData, "\\n", "", RegexOptions.IgnoreCase | RegexOptions.Multiline); strWebData.Replace("\\r\\n", ""); } //��ȡ��ҳ��ı����ʽ,����Դ�����һ���ж�,�����û��Ƿ�ָ������ҳ���� //Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline); //string webCharSet = charSetMatch.Groups[2].Value; //string charSet = webCharSet; //if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != wCode) //{ // byte[] myDataBuffer; // myDataBuffer = System.Text.Encoding.GetEncoding(charSet).GetBytes(strWebData); // strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer); //} //���ս�ȡ��ҳ����ʼ��־����ֹ��־���н�ȡ //�����ʼ����ֹ��ȡ��ʶ��һ��Ϊ�գ����н�ȡ if (startPos != "" && endPos != "") { string Splitstr = "(" + startPos + ").*?(" + endPos + ")"; Match aa = Regex.Match(strWebData, Splitstr); strWebData = aa.Groups[0].ToString(); } if (IsAjax == true) { strWebData = System.Web.HttpUtility.UrlDecode(strWebData, Encoding.UTF8); } this.m_WebpageSource = strWebData; return strWebData; }
public Int64 InsertTaskComplete(Int64 TaskID,cGlobalParas.GatherResult tSate) { ///�����жϴ������ִ�е�Ŀ¼�Ƿ���� ///��Ŀ¼�ǹ̶�Ŀ¼�������ϵͳ\\data string cPath = Program.getPrjPath() + "data"; if (!System.IO.Directory.Exists(cPath)) { System.IO.Directory.CreateDirectory(cPath); } ///�Ƚ��������ժҪ��Ϣ���ص�index.xml�ļ��� Task.cTaskRun t = new Task.cTaskRun(); t.LoadSingleTask(TaskID); //��ʼ����xml�ڵ����� LoadTaskData(); //int MaxID=GetCount () + 1; string txml=""; txml = "<TaskID>" + t.GetTaskID (0) + "</TaskID>"; txml += "<TaskName>" + t.GetTaskName(0) + "</TaskName>"; txml += "<TaskState>" + tSate + "</TaskState>"; txml += "<TaskType>" + (int)t.GetTaskType(0) + "</TaskType>"; txml += "<RunType>" + (int)t.GetTaskRunType(0) + "</RunType>"; txml += "<ExportFile>" + t.GetExportFile(0) + "</ExportFile>"; txml += "<tempFile>" + t.GetTempFile(0) + "</tempFile>"; txml += "<UrlCount>" + t.GetUrlCount(0) + "</UrlCount>"; txml += "<GatheredUrlCount>" + t.GetGatheredUrlCount(0) + "</GatheredUrlCount>"; txml += "<IsLogin>" + t.GetIsLogin(0) + "</IsLogin>"; txml += "<PublishType>" + (int)t.GetPublishType(0) + "</PublishType>"; xmlConfig.InsertElement("Tasks", "Task", txml); xmlConfig.Save (); return TaskID; }
private string getCreateTablesql(cGlobalParas.DatabaseType dType ,string Encoding) { string strsql = ""; strsql = "create table " + this.m_pTaskData.DataTableName + "("; for (int i=0;i<m_pTaskData.PublishData.Columns.Count ;i++) { switch (dType) { case cGlobalParas.DatabaseType.Access: strsql += m_pTaskData.PublishData.Columns[i].ColumnName + " " + "text" + ","; break; case cGlobalParas.DatabaseType.MSSqlServer: strsql += m_pTaskData.PublishData.Columns[i].ColumnName + " " + "text" + ","; break; case cGlobalParas.DatabaseType.MySql: strsql += m_pTaskData.PublishData.Columns[i].ColumnName + " " + "text" + ","; break; default: strsql += m_pTaskData.PublishData.Columns[i].ColumnName + " " + "text" + ","; break; } } strsql = strsql.Substring(0, strsql.Length - 1); strsql += ")"; //�����mysql���ݿ⣬��Ҫ�������Ӵ����ַ����������ݱ�Ľ��� if (dType == cGlobalParas.DatabaseType.MySql) { if (Encoding == "" || Encoding == null) Encoding = "utf8"; strsql += " CHARACTER SET " + Encoding + " "; } return strsql; }
public cCommandEventArgs(cGlobalParas.MessageType MessType) { base.MessType = MessType; }
///����������ҳ ///�ж��Ƿ�Ϊ���һ��������������Ҫע��һ�����⣬��Ϊ�п��� ///�洢�ļ����ǰ���˳����еģ����ԣ�Ҫ���ݴ���ļ���Level���� ///�жϣ��������ִ�������ҳ�Ľ��������ǰ���˳��ģ������ ///������ public List<string> PUrlRule(List<string> pUrl,int Level, List<cNavigRule> nRules,cGlobalParas.WebCode webCode, string cookie) { List<string> tmpUrls; List<string> Urls =new List<string> (); if (nRules.Count == 0) { Urls.Add(pUrl[0].ToString()); return Urls; } string UrlRule=""; int i; //����Level�õ���Ҫ��������ĵ������� for (i = 0; i < nRules.Count; i++) { if (Level ==nRules[i].Level ) { UrlRule =nRules [i].NavigRule ; break; } } for (i = 0; i < pUrl.Count; i++) { tmpUrls = new List<string>(); tmpUrls = GetUrlsByRule(pUrl[i].ToString(), UrlRule,webCode,cookie); if (tmpUrls != null) { Urls.AddRange(tmpUrls); } } //�ж��Ƿ�Ϊ����ĵ�����������أ����������������� if (Level == nRules.Count) { return Urls; } else { List<string> rUrls= PUrlRule(Urls, Level + 1, nRules,webCode,cookie); return rUrls; } }
private void ModifyPlanState(Int64 PlanID, cGlobalParas.PlanState pState) { cXmlIO xmlConfig = new cXmlIO(Program.getPrjPath() + "tasks\\plan\\plan.xml"); xmlConfig.EditNodeValue("Plans", "ID", PlanID.ToString () ,"PlanState",((int)pState).ToString ()); xmlConfig.Save (); xmlConfig =null; }
public void InsertLog(cGlobalParas.LogType lType,string PlanID,string PlanName, cGlobalParas.RunTaskType rType, string FileName, string Para) { string strXml = "<LogType>" + ((int)lType).ToString () + "</LogType>" + "<PlanID>" + PlanID + "</PlanID>" + "<PlanName>" + PlanName + "</PlanName>" + "<FileName>" + FileName + "</FileName>" + "<FilePara>" + Para + "</FilePara>" + "<TaskType>" + ((int)rType).ToString () + "</TaskType>" + "<RunTime>" + DateTime.Now.ToString() + "</RunTime>"; m_PlanFile.InsertElement("Logs", "Log", strXml); m_PlanFile.Save(); }