예제 #1
0
 public cExport(ContainerControl sender, Delegate senderDelegate, cGlobalParas.PublishType pType,string FileName,System.Data.DataTable pData )
 {
     m_sender = sender;
     m_senderDelegate = senderDelegate;
     m_pType =pType ;
     m_FileName = FileName;
     m_pData = pData;
 }
예제 #2
0
        public void WriteLog(string TaskName,cGlobalParas.LogType LogType, string strLog)
        {
            string FileName = Program.getPrjPath() + "Log\\" + TaskName + DateTime.Now.Year;

            if (DateTime.Now.Month.ToString().Length == 1)
                FileName = FileName + "0" + DateTime.Now.Month.ToString();
            else
                FileName = FileName + DateTime.Now.Month.ToString();

            if (DateTime.Now.Day.ToString().Length == 1)
                FileName = FileName + "0" + DateTime.Now.Day.ToString();
            else
                FileName = FileName + DateTime.Now.Day.ToString();

            FileName = FileName + ".txt";

            string Log="";

            if (!Directory.Exists(Path.GetDirectoryName(FileName)))
                Directory.CreateDirectory(Path.GetDirectoryName(FileName));

            FileStream myStream = File.Open(FileName, FileMode.Append, FileAccess.Write, FileShare.Write);
            StreamWriter sw = new StreamWriter(myStream, System.Text.Encoding.GetEncoding("gb2312"));

            if (LogType ==cGlobalParas.LogType.GatherError )
            {
                Log =TaskName + "\t" + DateTime.Now .ToString () + "\t" + "�ɼ�����" + "\t" + strLog ;
                sw.WriteLine(Log);
            }
            else if (LogType ==cGlobalParas.LogType.PublishError )
            {
                Log =TaskName + "\t" + DateTime.Now .ToString () + "\t" + "��������" + "\t" + strLog ;
                sw.WriteLine(Log);
            }

            sw.Close();
            myStream.Close();
        }
 public cRunTaskEventArgs(cGlobalParas.MessageType MessType ,string RunName ,string RunPara)
 {
     base.MessType = MessType;
     m_RunName = RunName;
     m_RunPara = RunPara;
 }
 public cListenInitializedEventArgs(cGlobalParas.MessageType MessType)
 {
     m_MessType = MessType;
 }
 /// <summary>
 /// 
 /// </summary>
 /// <param name="old_state">�ɵ�״̬</param>
 /// <param name="new_statue">�µ�״̬</param>
 public TaskStateChangedEventArgs(Int64 TaskID, cGlobalParas.TaskState oldState, cGlobalParas.TaskState newState)
 {
     //m_TaskID = TaskID;
         base.TaskID = TaskID;
         m_OldState = oldState;
         m_NewState = newState;
 }
 public cGatherUrlCountArgs(Int64 TaskID, cGlobalParas.UpdateUrlCountType uType, int TrueUrlCount)
 {
     //m_TaskID = TaskID;
         base.TaskID = TaskID;
         m_TrueUrlCount = TrueUrlCount;
         m_UType = uType;
 }
        //����һ��ͨѶ�Ľӿڷ����������ɼ�����Ĵ����������Ҫ�ɼ�����ҳ�����ô˷���
        //�ɴ˷�������cGatherWeb.GetGatherData�����η�����Ŀ����Ϊ�˿��Դ����������
        private DataTable GetGatherData(string Url, cGlobalParas.WebCode webCode, string cookie, string startPos, string endPos, string sPath, bool IsAjax)
        {
            cGatherWeb gWeb = new cGatherWeb();
            gWeb.CutFlag = m_TaskSplitData.CutFlag;

            DataTable tmpData ;
            int AgainTime = 0;

            GatherAgain:

            try
            {
                tmpData = gWeb.GetGatherData(Url, m_WebCode, m_Cookie, m_gStartPos, m_gEndPos, m_SavePath, IsAjax);
            }
            catch (System.Exception ex)
            {
                AgainTime++;

                if (AgainTime > m_AgainNumber)
                {
                    if (m_IsErrorLog == true)
                    {
                        //���������־
                    }

                    throw ex;
                }
                else
                {
                    if (m_Ignore404 == true && ex.Message.Contains ("404"))
                    {
                        if (m_IsErrorLog == true)
                        {
                            //���������־
                        }

                        throw ex;
                    }
                    else
                    {
                        e_Log(this, new cGatherTaskLogArgs(m_TaskID, ((int)cGlobalParas.LogType.Error).ToString() + "��ַ��" + Url + "���ʷ������������Ϣ��" + ex.Message + "���ȴ�3������\n", this.IsErrorLog));

                        Thread.Sleep(3000);

                        e_Log(this, new cGatherTaskLogArgs(m_TaskID, ((int)cGlobalParas.LogType.Warning).ToString() + Url + "���ڽ��е�" + AgainTime + "������\n", this.IsErrorLog));

                        //��������
                        goto GatherAgain;
                    }
                }
            }

            return tmpData;
        }
        //���ݵ������򣬻�ȡ��ҳ��ַ����һ������
        public List<string> GetUrlsByRule(string Url, string UrlRule,cGlobalParas.WebCode webCode, string cookie)
        {
            string Url1;
            List<string> Urls=new List<string> ();

            if (UrlRule.Trim() == "")
            {
                Urls.Add(Url);
                return Urls;
            }

            //�ж���ַ�Ƿ���ڲ�����������ڲ�����ȡ����һ��������ַ
            if (Regex.IsMatch(Url, "{.*}"))
            {
                List<string> Urls1 = SplitWebUrl(Url );  //,IsUrlEncode ,UrlEncode
                Url1 = Urls1[0].ToString();
            }
            else
            {
                Url1 = Url;
            }

            //������ַ��Դ�룬��������ȡ������ȡ��������ַ
            //string UrlSource= cTool.GetHtmlSource(Url1,true );

            cGatherWeb gW = new cGatherWeb();
            string UrlSource = gW.GetHtml(Url1, webCode, cookie, "", "", true, false);
            gW = null;

            if (UrlSource == "")
            {
                return null ;
            }

            //string Rule=@"(?<=href=[\W])" + cTool.RegexReplaceTrans(UrlRule) + @"(\S[^'"">]*)(?=[\s'""])";
            string Rule = "";

            if (UrlRule.StartsWith("<Regex:"))
            {
                Rule = @"(?<=[href=|src=|open(][\W])";

                //����ǰ׺
                string strPre = UrlRule.Substring(UrlRule.IndexOf("<Regex:")+7, UrlRule.IndexOf(">")-7);
                Rule += strPre;

                //���������
                string cma=@"(?<=<Common:)\S+?(?=>)";

                Regex cmas = new Regex(cma, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                MatchCollection cs = cmas.Matches(UrlRule);
                foreach (Match ma in cs)
                {
                    Rule +=@"(\S*)" + ma.Value.ToString ();
                }

                //�����׺
                if (Regex.IsMatch(UrlRule, "<End:"))
                {
                    string s = UrlRule.Substring(UrlRule.IndexOf("<End:") + 5, UrlRule.Length - UrlRule.IndexOf("<End:") - 6);
                    Rule += @"(\S*)" + s;
                }
                else
                {
                    Rule += @"(\S[^'"">]*)(?=[\s'""])";
                }

            }
            else
            {
                Rule = @"(?<=[href=|src=|open(][\W])" + cTool.RegexReplaceTrans(UrlRule) + @"(\S[^'"">]*)(?=[\s'""])";
            }

            Regex re = new Regex(Rule, RegexOptions.IgnoreCase | RegexOptions.Multiline);
            MatchCollection aa = re.Matches(UrlSource);

            DataTable d1 = new DataTable();
            d1.Columns.Add("Name");

            foreach (Match ma in aa)
            {
                //Urls.Add(ma.Value.ToString());
                d1.Rows.Add(ma.Value.ToString());
            }

            //����ʱ���ܻ��ȡ�ظ���ַ���б��������Ҫȥ��
            //ȥ���ظ���

            string[] strComuns = new string[d1.Columns.Count];

            for (int m = 0; m < d1.Columns.Count; m++)
            {
                strComuns[m] = d1.Columns[m].ColumnName;
            }

            DataView dv = new DataView(d1);

            DataTable d2 = dv.ToTable(true, strComuns);

            for (int i = 0; i < d2.Rows.Count; i++)
            {
                if (string.Compare(d2.Rows[i][0].ToString ().Substring (0,4), "http", true) != 0)
                {
                    string PreUrl = Url;

                    if (d2.Rows[i][0].ToString().Substring(0, 1) == "/")
                    {
                        PreUrl = PreUrl.Substring(7, PreUrl.Length - 7);
                        PreUrl = PreUrl.Substring(0, PreUrl.IndexOf("/"));
                        PreUrl = "http://" + PreUrl;
                    }
                    else
                    {
                        Match a = Regex.Match(PreUrl, ".*/");
                        PreUrl = a.Groups[0].Value.ToString();
                    }

                    Urls.Add(PreUrl + d2.Rows[i][0].ToString());
                }
                else
                {
                    Urls.Add(d2.Rows[i][0].ToString());
                }

            }

            return Urls;
        }
        public void InsertOnceLog(cGlobalParas.LogType lType, string PlanID, string PlanName, cGlobalParas.RunTaskType rType, string FileName, string Para)
        {
            if (!IsExist())
                NewLogFile();

            cXmlIO xmlconfig = new cXmlIO(Program.getPrjPath() + "tasks\\plan\\RunLog.xml");

            string strXml = "<LogType>" + lType + "</LogType>" +
                "<PlanID>" + PlanID + "</PlanID>" +
                "<PlanName>" + PlanName + "</PlanName>" +
                "<FileName>" + FileName + "</FileName>" +
                "<FilePara>" + Para + "</FilePara>" +
                "<TaskType>" + rType + "</TaskType>" +
                "<RunTime>" + DateTime.Now.ToString() + "</RunTime>";

            xmlconfig.InsertElement("Logs", "Log", strXml);
            xmlconfig.Save();
            xmlconfig = null;
        }
예제 #10
0
        ///����ָ���ĵ����������ҳ�浼������1.6�汾�У������˶�㵼���Ĺ���
        ///��ַ����������һ�Զ�Ĺ�ϵ����ÿһ����ĵ�����������һ�ԶࣨҲ����һ��һ�Ĺ�ϵ��
        ///�ڴ������Ǽ������������صĶ������յ���Ҫ�ɼ����ݵ���ַ
        ///��Ϊ�Ƕ�㵼�������������ڵݹ��һ���㷨
        ///������ַ�󷵻صĶ��DZ�׼��ַ��������������ַ�����
        public List<string> ParseUrlRule(string Url, List<cNavigRule> nRules,cGlobalParas.WebCode webCode, string cookie)
        {
            List<string> pUrls = new List<string>();
            List<string> Urls = new List<string>();

            pUrls.Add(Url);

            //��һ�㵼���ֽⶼ�Ǵ�һ����һ��ַ���У�֮����
            //ѡ�񼯺ϣ���Ϊ��ͳһ���ýӿڲ���
            try
            {
                Urls = PUrlRule(pUrls, 1, nRules, webCode, cookie);
            }
            catch (System.Exception ex)
            {
                //����ʧ�ܣ��޷�������������
                return null;
            }

            return Urls;
        }
예제 #11
0
        /// <summary>
        /// �ɼ���ҳ����
        /// </summary>
        /// <param name="Url">��ҳ��ַ</param>
        /// <param name="StartPos">��ʼ�ɼ�λ��</param>
        /// <param name="EndPos">��ֹ�ɼ�λ��</param>
        /// <returns></returns>
        public DataTable GetGatherData(string Url,cGlobalParas.WebCode webCode, string cookie, string startPos,string endPos,string sPath,bool IsAjax)
        {
            tempData = new DataTable("tempData");
            int i ;
            int j;
            string strCut="";
            bool IsDownloadFile = false;

            #region ������ṹ����������ȡ����

            //����ҳ���ȡ�ı�־������ṹ
            for (i = 0; i < this.CutFlag.Count; i++)
            {
                tempData.Columns.Add(new DataColumn(this.CutFlag[i].Title, typeof(string)));

                if (this.CutFlag[i].DataType !=(int) cGlobalParas.GDataType.Txt && IsDownloadFile ==false)
                {
                    IsDownloadFile = true;
                }
            }

            //�����û�ָ����ҳ���ȡλ�ù���������ʽ
            for (i = 0; i < this.CutFlag.Count; i++)
            {
                strCut += "(?<" + this.CutFlag[i].Title + ">" + cTool.RegexReplaceTrans(this.CutFlag[i].StartPos) + ")";

                //strCut += "(?<=" + cTool.RegexReplaceTrans(this.CutFlag[i].StartPos) + ")";

                switch (this.CutFlag[i].LimitSign )
                {
                    case (int)cGlobalParas.LimitSign.NoLimit :
                        strCut += ".*?";
                        break;
                    case (int)cGlobalParas.LimitSign.NoWebSign:
                        strCut += "[^<>]*?";
                        break;
                    case (int)cGlobalParas.LimitSign.OnlyCN:
                        strCut += "[\\u4e00-\\u9fa5]*?";
                        break;
                    case (int)cGlobalParas.LimitSign.OnlyDoubleByte:
                        strCut += "[^\\x00-\\xff]*?";
                        break;
                    case (int)cGlobalParas.LimitSign.OnlyNumber:
                        strCut += "[\\d]*?";
                        break;
                    case (int)cGlobalParas.LimitSign.OnlyChar:
                        strCut += "[\\x00-\\xff]*?";
                        break;
                    case (int)cGlobalParas.LimitSign.Custom:
                        //strCut += cTool.RegexReplaceTrans(this.CutFlag[i].RegionExpression.ToString());
                        strCut += this.CutFlag[i].RegionExpression.ToString();
                        break;
                    default:
                        strCut += "[\\S\\s]*?";
                        break;
                }
                strCut += "(?=" +  cTool.RegexReplaceTrans(this.CutFlag[i].EndPos) + ")|";
            }

            #endregion

            #region ��ȡ��ҳԴ��

            int rowCount = this.CutFlag.Count;

            //ȥ�����һ����|��
            strCut = strCut.Substring(0, strCut.Length - 1);

            //��ȡ��ҳ��Ϣ
            //�жϴ����Url�Ƿ���ȷ���������ȷ���򷵻ؿ�����
            if (Regex.IsMatch(Url, "[\"\\s]"))
            {
                Match aa = Regex.Match(Url, "[\"\\s]");

                tempData = null;
                return tempData;
            }

            try
            {
                GetHtml(Url, webCode, cookie, startPos, endPos, true, IsAjax);
            }
            catch (System.Web.HttpException ex)
            {
                throw ex;
            }

            #endregion

            //��ʼ��ȡ��ȡ����
            Regex re = new Regex(@strCut, RegexOptions.IgnoreCase | RegexOptions.Multiline );
            MatchCollection mc = re.Matches(this.WebpageSource);

            if (mc.Count == 0)
            {
                tempData = null;
                return tempData;
            }

            DataRow drNew=null ;

            i = 0;

            #region ��ʼ�����ȡ�ַ�����ƴ��һ����

            //��ʼ���ݲɼ������ݹ������ݱ�������
            //�ڴ���Ҫ����ɼ������п��ܴ��е�����
            //���汻ע�͵Ĵ���������������ݱ�Ĵ��룬������ִ�������

            //Match ma;

            int rows = 0; //ͳ�ƹ��ɼ��˶�����
            int m = 0;   //����ʹ��

            try
             {

                while (m < mc.Count)
                {
                    //�½�����
                    drNew = tempData.NewRow();
                    rows++;

                    for (i = 0; i < this.CutFlag.Count; i++)
                    {

                        if (m < mc.Count)
                        {
                            if (i == 0)
                            {
                                while (!mc[m].Value.StartsWith(this.CutFlag[i].StartPos, StringComparison.CurrentCultureIgnoreCase))
                                {
                                    m++;
                                    if (m >= mc.Count)
                                    {
                                        //�˳�����ѭ��
                                        goto ExitWhile;
                                    }
                                }

                                drNew[i] = mc[m].Value.Substring(this.CutFlag[i].StartPos.Length, mc[m].Value.Length - this.CutFlag[i].StartPos.Length);

                                m++;
                            }
                            else
                            {
                                if (mc[m].Value.StartsWith(this.CutFlag[i].StartPos, StringComparison.CurrentCultureIgnoreCase))
                                {

                                    drNew[i] = mc[m].Value.Substring(this.CutFlag[i].StartPos.Length, mc[m].Value.Length - this.CutFlag[i].StartPos.Length);

                                    m++;
                                }
                                else
                                {
                                    if (mc[m].Value.StartsWith(this.CutFlag[i - 1].StartPos, StringComparison.CurrentCultureIgnoreCase))
                                    {
                                        m++;
                                        i--;
                                    }
                                    else
                                    {
                                        if (i < this.CutFlag.Count - 1)
                                        {
                                            if (mc[m].Value.StartsWith(this.CutFlag[i + 1].StartPos, StringComparison.CurrentCultureIgnoreCase))
                                            {

                                            }
                                            else
                                            {
                                                m++;
                                                i--;
                                            }
                                        }
                                        else
                                        {
                                            m++;
                                            i--;
                                        }
                                        //���ɼ�ʱ������ȱ�ٲɼ����ݣ����ô˷������вɼ����ݲ���
                                        //drNew[i] = "";
                                        //continue;
                                    }
                                }
                            }
                        }
                    }
                    tempData.Rows.Add(drNew);
                    drNew = null;

                }
            }
            catch (System.Exception ex)
            {
                throw ex;
            }

            #endregion

            #region ��ʼ����������ƣ����л�ȡ���ݼӹ�

            ExitWhile:

            //�ڴ��ж��Ƿ���Ҫ�����ʱ�������ݵ�����,��������汾1.2�������������������

            for (i = 0; i < this.CutFlag.Count; i++)
            {

                switch (this.CutFlag[i].ExportLimit)
                {
                    case (int)cGlobalParas.ExportLimit.ExportNoLimit :

                        break;
                    case (int)cGlobalParas.ExportLimit.ExportNoWebSign:
                        for (int index = 0; index < tempData.Rows.Count; index++)
                        {
                            tempData.Rows[index][i] = getTxt(tempData.Rows[index][i].ToString());
                        }
                        break;
                    case (int)cGlobalParas.ExportLimit.ExportPrefix:
                        for (int index = 0; index < tempData.Rows.Count; index++)
                        {
                            tempData.Rows[index][i] =this.CutFlag[i].ExportExpression + tempData.Rows[index][i].ToString();
                        }
                        break;
                    case (int)cGlobalParas.ExportLimit.ExportReplace:
                        for (int index = 0; index < tempData.Rows.Count; index++)
                        {
                            string oStr=this.CutFlag[i].ExportExpression .Substring (1,this.CutFlag[i].ExportExpression.IndexOf (",")-2);
                            string nStr = this.CutFlag[i].ExportExpression.Substring(this.CutFlag[i].ExportExpression.IndexOf(",") + 2, this.CutFlag[i].ExportExpression.Length - this.CutFlag[i].ExportExpression.IndexOf(",") - 3);
                            tempData.Rows[index][i] = tempData.Rows[index][i].ToString().Replace(oStr,nStr );
                        }
                        break;
                    case (int)cGlobalParas.ExportLimit.ExportSuffix:
                        for (int index = 0; index < tempData.Rows.Count; index++)
                        {
                            tempData.Rows[index][i] = tempData.Rows[index][i].ToString() + this.CutFlag[i].ExportExpression;
                        }
                        break;
                    case (int)cGlobalParas.ExportLimit.ExportTrimLeft:
                        for (int index = 0; index < tempData.Rows.Count; index++)
                        {
                            int len = tempData.Rows[index][i].ToString().Length;
                            int lefti = int.Parse(this.CutFlag[i].ExportExpression.ToString());
                            if (tempData.Rows[index][i].ToString().Length > lefti)
                            {
                                tempData.Rows[index][i] = tempData.Rows[index][i].ToString().Substring(lefti, len - lefti);
                            }
                        }
                        break;
                    case (int)cGlobalParas.ExportLimit.ExportTrimRight:
                        for (int index = 0; index < tempData.Rows.Count; index++)
                        {
                            int len = tempData.Rows[index][i].ToString().Length;
                            int righti = int.Parse(this.CutFlag[i].ExportExpression.ToString());
                            if (tempData.Rows[index][i].ToString().Length > righti)
                            {
                                tempData.Rows[index][i] = tempData.Rows[index][i].ToString().Substring(0, len - righti);
                            }
                        }
                        break;
                    case (int)cGlobalParas.ExportLimit.ExportTrim :
                        for (int index = 0; index < tempData.Rows.Count; index++)
                        {
                            tempData.Rows[index][i] = tempData.Rows[index][i].ToString().Trim();
                        }
                        break;
                    case (int)cGlobalParas.ExportLimit.ExportRegexReplace :
                        for (int index = 0; index < tempData.Rows.Count; index++)
                        {
                            //string oStr=cTool.RegexReplaceTrans( this.CutFlag[i].ExportExpression .Substring (1,this.CutFlag[i].ExportExpression.IndexOf (",")-2));
                            //string nStr = this.CutFlag[i].ExportExpression.Substring(this.CutFlag[i].ExportExpression.IndexOf(",") + 2, this.CutFlag[i].ExportExpression.Length - this.CutFlag[i].ExportExpression.IndexOf(",") - 3);
                            //tempData.Rows[index][i] = Regex.Replace(tempData.Rows[index][i].ToString(), oStr, nStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);

                            string oStr = this.CutFlag[i].ExportExpression.Substring(1, this.CutFlag[i].ExportExpression.IndexOf(",") - 2);
                            string nStr = this.CutFlag[i].ExportExpression.Substring(this.CutFlag[i].ExportExpression.IndexOf(",") + 2, this.CutFlag[i].ExportExpression.Length - this.CutFlag[i].ExportExpression.IndexOf(",") - 3);
                            tempData.Rows[index][i] = Regex.Replace(tempData.Rows[index][i].ToString(), oStr, nStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);

                        }
                        break;
                    default :

                        break;
                }

            }

            #endregion

            #region ��Բɼ���Ҫ�����ļ����ֶν����ļ����ش���
            //�ж��Ƿ�����������ļ�����������У���ʼ���أ���Ϊ�˹���������������ͼƬʹ��
            //������ר�õ����ع��ߣ����Զ����ش����û�е��������̴߳���

            try
            {
                if (IsDownloadFile == true)
                {
                    if (sPath == "")
                    {
                        sPath = Program.getPrjPath() + "data\\tem_file";
                    }

                    if (!Directory.Exists(sPath))
                    {
                        Directory.CreateDirectory(sPath);
                    }

                    string FileUrl = "";
                    string DownloadFileName = "";

                    for (i = 0; i < rows; i++)
                    {
                        for (j = 0; j < this.CutFlag.Count; j++)
                        {
                            if (this.CutFlag[j].DataType != (int)cGlobalParas.GDataType.Txt)
                            {
                                FileUrl = tempData.Rows[i][j].ToString();

                                //��ʼ��ȡ�����ļ�����
                                Regex s = new Regex(@"(?<=/)[^/]*", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                                MatchCollection urlstr = s.Matches(FileUrl);
                                if (urlstr.Count == 0)
                                    DownloadFileName = FileUrl;
                                else
                                    DownloadFileName = urlstr[urlstr.Count - 1].ToString();
                                DownloadFileName = sPath + "\\" + DownloadFileName;

                                if (string.Compare ( FileUrl.Substring(0, 4) , "http",true )==0)
                                {
                                    DownloadFile(FileUrl, DownloadFileName);
                                }
                                else
                                {
                                    if (FileUrl.Substring(0, 1) == "/")
                                    {
                                        Url = Url.Substring(7, Url.Length - 7);
                                        Url = FileUrl.Substring(0, Url.IndexOf("/"));
                                        Url = "http://" + Url;
                                        FileUrl = Url + FileUrl;
                                    }
                                    else if (FileUrl.IndexOf("/") <= 0)
                                    {
                                        Url = Url.Substring(0, Url.LastIndexOf("/") + 1);
                                        FileUrl = Url + FileUrl;
                                    }
                                    else
                                    {
                                        Url = Url.Substring(0, Url.LastIndexOf("/") + 1);
                                        FileUrl = Url + FileUrl;
                                    }

                                    DownloadFile(FileUrl, DownloadFileName);
                                }
                            }
                        }
                    }

                }
            }
            catch (System.Exception ex)
            {
                throw ex;
            }
            #endregion

            return tempData;
        }
예제 #12
0
        /// <summary>
        /// ��ȡָ����ַԴ��
        /// </summary>
        /// <param name="url">��ַ</param>
        /// <param name="webCode">��ҳ����</param>
        /// <param name="cookie">��ҳcookie</param>
        /// <param name="startPos">��ȡ��ҳԴ�����ʼλ��</param>
        /// <param name="endPos">��ȡ��ҳԴ�����ֹλ��</param>
        /// <param name="IsCutnr">�Ƿ��ȡ�س����з���Ĭ��Ϊtrue����ȡ</param>
        /// <returns></returns>
        public string GetHtml(string url, cGlobalParas.WebCode webCode, string cookie, string startPos, string endPos,bool IsCutnr,bool IsAjax)
        {
            //�ж���ҳ����
            Encoding wCode;
            string PostPara = "";

            CookieContainer CookieCon = new CookieContainer();

            HttpWebRequest wReq ;

            if (Regex.IsMatch(url, @"<POST>.*</POST>", RegexOptions.IgnoreCase))
            {
                wReq = (HttpWebRequest)WebRequest.Create(@url.Substring (0,url.IndexOf ("<POST>")));
            }
            else
            {
                wReq = (HttpWebRequest)WebRequest.Create(@url );
            }

            wReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50215;)";
            //wReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
            //wReq.Headers.Add("Accept-Encoding", "gzip, deflate");
            //wReq.SendChunked = true;
            //wReq.TransferEncoding = "utf-8";

            Match a = Regex.Match(url, @"(http://).[^/]*[?=/]", RegexOptions.IgnoreCase);
            string url1 = a.Groups[0].Value.ToString();
            wReq.Referer = url1;

            //�ж��Ƿ���cookie
            if (cookie != "")
            {
                CookieCollection cl = new CookieCollection();

                //foreach (string sc in cookie.Split(';'))
                //{
                //    string ss = sc.Trim();
                //    cl.Add(new Cookie(ss.Split('=')[0].Trim(), ss.Split('=')[1].Trim(), "/"));
                //}

                foreach (string sc in cookie.Split(';'))
                {
                    string ss = sc.Trim();
                    if (ss.IndexOf("&") > 0)
                    {
                        foreach (string s1 in ss.Split('&'))
                        {
                            string s2 = s1.Trim();
                            string s4 = s2.Substring(s2.IndexOf("=")+1, s2.Length - s2.IndexOf("=")-1);

                            cl.Add(new Cookie(s2.Split('=')[0].Trim(), s4, "/"));
                        }
                    }
                    else
                    {
                        string s3 = sc.Trim();
                        cl.Add(new Cookie(s3.Split('=')[0].Trim(), s3.Split('=')[1].Trim(), "/"));
                    }
                }

                CookieCon.Add(new Uri(url), cl);
                wReq.CookieContainer = CookieCon;
            }

            //�ж��Ƿ���POST����
            if (Regex.IsMatch(url, @"(?<=<POST>)[\S\s]*(?=</POST>)", RegexOptions.IgnoreCase))
            {

                Match s = Regex.Match(url, @"(?<=<POST>).*(?=</POST>)", RegexOptions.IgnoreCase);
                PostPara = s.Groups[0].Value.ToString();
                byte[] pPara = Encoding.ASCII.GetBytes(PostPara);

                wReq.ContentType = "application/x-www-form-urlencoded";
                wReq.ContentLength = pPara.Length;

                wReq.Method = "POST";

                System.IO.Stream reqStream = wReq.GetRequestStream();
                reqStream.Write(pPara, 0, pPara.Length);
                reqStream.Close();

            }
            else
            {
                wReq.Method = "GET";

            }

            //����ҳ�泬ʱʱ��Ϊ12��
            wReq.Timeout = 12000;

            HttpWebResponse wResp = (HttpWebResponse)wReq.GetResponse();

            System.IO.Stream respStream = wResp.GetResponseStream();
            string strWebData = "";

            switch (webCode)
            {
                case cGlobalParas.WebCode.auto:
                    try
                    {
                        wCode = Encoding.Default;
                        string cType = wResp.ContentType.ToLower();
                        Match charSetMatch = Regex.Match(cType, "(?<=charset=)([^<]*)*", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                        string webCharSet = charSetMatch.ToString();
                        wCode = System.Text.Encoding.GetEncoding(webCharSet);
                    }
                    catch
                    {
                        wCode = Encoding.Default;
                    }

                    break;
                case cGlobalParas.WebCode.gb2312:
                    wCode = Encoding.GetEncoding("gb2312");
                    break;
                case cGlobalParas.WebCode.gbk:
                    wCode = Encoding.GetEncoding("gbk");
                    break;
                case cGlobalParas.WebCode.utf8:
                    wCode = Encoding.UTF8;
                    break;
                default:
                    wCode = Encoding.UTF8;
                    break;
            }

            if (wResp.ContentEncoding == "gzip")
            {
                GZipStream myGZip = new GZipStream(respStream, CompressionMode.Decompress);
                System.IO.StreamReader reader;
                reader = new System.IO.StreamReader(myGZip, wCode);
                strWebData = reader.ReadToEnd();
                reader.Close();
                reader.Dispose();
            }
            else
            {
                System.IO.StreamReader reader;
                reader = new System.IO.StreamReader(respStream, wCode);
                strWebData = reader.ReadToEnd();
                reader.Close();
                reader.Dispose();
            }

            //ȥ���س����з���
            if (IsCutnr == true)
            {
                strWebData = Regex.Replace(strWebData, "([\\r\\n])[\\s]+", "", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                strWebData = Regex.Replace(strWebData, "\\n", "", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                strWebData.Replace("\\r\\n", "");
            }

            //��ȡ��ҳ��ı����ʽ,����Դ�����һ���ж�,�����û��Ƿ�ָ������ҳ����
            //Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
            //string webCharSet = charSetMatch.Groups[2].Value;
            //string charSet = webCharSet;

            //if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != wCode)
            //{
            //    byte[] myDataBuffer;

            //    myDataBuffer = System.Text.Encoding.GetEncoding(charSet).GetBytes(strWebData);
            //    strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);

            //}

            //���ս�ȡ��ҳ����ʼ��־����ֹ��־���н�ȡ
            //�����ʼ����ֹ��ȡ��ʶ��һ��Ϊ�գ��򲻽��н�ȡ
            if (startPos != "" && endPos != "")
            {
                string Splitstr = "(" + startPos + ").*?(" + endPos + ")";

                Match aa = Regex.Match(strWebData, Splitstr);
                strWebData = aa.Groups[0].ToString();
            }

            if (IsAjax == true)
            {
                strWebData = System.Web.HttpUtility.UrlDecode(strWebData, Encoding.UTF8);
            }

            this.m_WebpageSource = strWebData;
            return strWebData;
        }
        public Int64 InsertTaskComplete(Int64 TaskID,cGlobalParas.GatherResult tSate)
        {
            ///�����жϴ������ִ�е�Ŀ¼�Ƿ����
            ///��Ŀ¼�ǹ̶�Ŀ¼�������ϵͳ\\data
            string cPath = Program.getPrjPath() + "data";

            if (!System.IO.Directory.Exists(cPath))
            {
                System.IO.Directory.CreateDirectory(cPath);
            }

            ///�Ƚ��������ժҪ��Ϣ���ص�index.xml�ļ���
            Task.cTaskRun t = new Task.cTaskRun();
            t.LoadSingleTask(TaskID);

            //��ʼ����xml�ڵ�����
            LoadTaskData();
            //int MaxID=GetCount () + 1;

            string txml="";
            txml = "<TaskID>" + t.GetTaskID (0) + "</TaskID>";
            txml += "<TaskName>" + t.GetTaskName(0) + "</TaskName>";
            txml += "<TaskState>" + tSate + "</TaskState>";
            txml += "<TaskType>" + (int)t.GetTaskType(0) + "</TaskType>";
            txml += "<RunType>" + (int)t.GetTaskRunType(0) + "</RunType>";
            txml += "<ExportFile>" + t.GetExportFile(0) + "</ExportFile>";
            txml += "<tempFile>" + t.GetTempFile(0) + "</tempFile>";
            txml += "<UrlCount>" + t.GetUrlCount(0) + "</UrlCount>";
            txml += "<GatheredUrlCount>" + t.GetGatheredUrlCount(0) + "</GatheredUrlCount>";
            txml += "<IsLogin>" + t.GetIsLogin(0) + "</IsLogin>";
            txml += "<PublishType>" + (int)t.GetPublishType(0) + "</PublishType>";

            xmlConfig.InsertElement("Tasks", "Task", txml);
            xmlConfig.Save ();

            return TaskID;
        }
        private string getCreateTablesql(cGlobalParas.DatabaseType dType ,string Encoding)
        {
            string strsql = "";

            strsql = "create table " + this.m_pTaskData.DataTableName + "(";
            for (int i=0;i<m_pTaskData.PublishData.Columns.Count ;i++)
            {
                switch (dType)
                {
                    case cGlobalParas.DatabaseType.Access:
                        strsql += m_pTaskData.PublishData.Columns[i].ColumnName + " " + "text" + ",";
                        break;
                    case cGlobalParas.DatabaseType.MSSqlServer:
                        strsql += m_pTaskData.PublishData.Columns[i].ColumnName + " " + "text" + ",";
                        break;
                    case cGlobalParas.DatabaseType.MySql:
                        strsql += m_pTaskData.PublishData.Columns[i].ColumnName + " " + "text" + ",";
                        break;
                    default:
                        strsql += m_pTaskData.PublishData.Columns[i].ColumnName + " " + "text" + ",";
                        break;
                }
            }
            strsql = strsql.Substring(0, strsql.Length - 1);
            strsql += ")";

            //�����mysql���ݿ⣬��Ҫ�������Ӵ����ַ����������ݱ�Ľ���
            if (dType == cGlobalParas.DatabaseType.MySql)
            {
                if (Encoding == "" || Encoding == null)
                    Encoding = "utf8";

                strsql += " CHARACTER SET " + Encoding + " ";
            }

            return strsql;
        }
 public cCommandEventArgs(cGlobalParas.MessageType MessType)
 {
     base.MessType = MessType;
 }
예제 #16
0
        ///����������ҳ
        ///�ж��Ƿ�Ϊ���һ��������������Ҫע��һ�����⣬��Ϊ�п���
        ///�洢�ļ��𲢲��ǰ���˳����еģ����ԣ�Ҫ���ݴ���ļ���Level����
        ///�жϣ��������ִ��󣬵�����ҳ�Ľ��������ǰ���˳��ģ������
        ///�޷�����
        public List<string> PUrlRule(List<string> pUrl,int Level, List<cNavigRule> nRules,cGlobalParas.WebCode webCode, string cookie)
        {
            List<string> tmpUrls;
            List<string> Urls =new List<string> ();

            if (nRules.Count == 0)
            {
                Urls.Add(pUrl[0].ToString());
                return Urls;
            }

            string UrlRule="";
            int i;

            //����Level�õ���Ҫ��������ĵ�������
            for (i = 0; i < nRules.Count; i++)
            {
                if (Level ==nRules[i].Level )
                {
                    UrlRule =nRules [i].NavigRule ;
                    break;
                }
            }

            for (i = 0; i < pUrl.Count; i++)
            {
                tmpUrls = new List<string>();

                tmpUrls = GetUrlsByRule(pUrl[i].ToString(), UrlRule,webCode,cookie);

                if (tmpUrls != null)
                {
                    Urls.AddRange(tmpUrls);
                }
            }

            //�ж��Ƿ�Ϊ��׼��ĵ�����������򷵻أ�����������������
            if (Level == nRules.Count)
            {
                return Urls;
            }
            else
            {
                List<string> rUrls=  PUrlRule(Urls, Level + 1, nRules,webCode,cookie);
                return rUrls;
            }
        }
예제 #17
0
        private void ModifyPlanState(Int64 PlanID, cGlobalParas.PlanState pState)
        {
            cXmlIO xmlConfig = new cXmlIO(Program.getPrjPath() + "tasks\\plan\\plan.xml");

            xmlConfig.EditNodeValue("Plans", "ID", PlanID.ToString () ,"PlanState",((int)pState).ToString  ());
            xmlConfig.Save ();
            xmlConfig =null;
        }
예제 #18
0
        public void InsertLog(cGlobalParas.LogType lType,string PlanID,string PlanName, cGlobalParas.RunTaskType rType, string FileName, string Para)
        {
            string strXml = "<LogType>" + ((int)lType).ToString () + "</LogType>" +
                "<PlanID>" + PlanID + "</PlanID>" +
                "<PlanName>" + PlanName + "</PlanName>" +
                "<FileName>" + FileName + "</FileName>" +
                "<FilePara>" + Para + "</FilePara>" +
                "<TaskType>" + ((int)rType).ToString () + "</TaskType>" +
                "<RunTime>" + DateTime.Now.ToString() + "</RunTime>";

            m_PlanFile.InsertElement("Logs", "Log", strXml);
            m_PlanFile.Save();
        }