Exemple #1
0
 private string Filtrate(string str)
 {
     str = str.Replace("<p>", "\r\n");
     str = str.Replace("document.write(\"", "");
     str = str.Replace("\");", "");
     str = str.Replace("document.write('", "");
     str = str.Replace("');", "");
     str = str.Replace("document.write", "");
     str = str.Replace("')", "");
     str = str.Replace("('", "");
     str = Often.OutTxt(str);
     str = str.Replace(" ", "");
     while (str.IndexOf("\r\n\r\n") > -1)
     {
         str = str.Replace("\r\n\r\n", "\r\n");
     }
     while (str.IndexOf("  ") > -1)
     {
         str = str.Replace("  ", " ");
     }
     return(str);
 }
Exemple #2
0
 private void bt09_Click(object sender, EventArgs e)
 {
     GetOut(Often.OutText(Often.OutTxt(GetInput())));
 }
Exemple #3
0
        /// <summary>根据当前Url与Url内容采集数据</summary>
        public void GatherOper()
        {
            int cindex = 0;

            while (NowUrl.Trim() != "" && ListLinkContent.Trim() != "")
            {
                if (!IsListLinkEcho(NowUrl))
                {
                    string ListLinkArea = "";
                    if (gi.ws.UrlRangeRule != "")
                    {
                        ListLinkArea = wapp.GatherApp.GetRegTxtString(ListLinkContent, gi.ws.UrlRangeRule, gi.ws.UrlRangeRuleIndex);
                        lout.OT(1, "获取内容页链接列表范围匹配内容。[" + ListLinkArea.Length.ToString() + "]");
                    }
                    else
                    {
                        ListLinkArea = ListLinkContent;
                    }
                    if (ListLinkArea.Trim() != "")
                    {
                        List <string> linklist = wapp.GatherApp.GetRegTxtList(ListLinkArea, gi.ws.UrlRule, gi.ws.UrlRuleIndex);
                        lout.OT(1, "获取内容页链接匹配数组[" + linklist.Count.ToString() + "]");
                        for (int x = 0; x < linklist.Count; x++)
                        {
                            StartDate = DateTime.Now;
                            EndDate   = StartDate;
                            string links = wapp.GatherApp.FormatUrl(NowUrl, linklist[x].Trim());
                            if (!IsContLinkEcho(links))
                            {
                                string actlink = links;
                                NowContLinkList.Add(actlink);
                                string errs  = "";
                                string conts = wapp.GatherApp.GetHttp(det, actlink, gi.ws.CodingMode, ref PageEncoding, ref PageCoding, ref errs);
                                if (errs != "")
                                {
                                    lout.OT(1, "获取内容页起始url内容失败,错误信息:" + errs + "。[Url:" + actlink + "]");
                                }
                                else
                                {
                                    lout.OT(1, "获取内容页起始url内容。[" + actlink + "][" + conts.Length.ToString() + "]");
                                }
                                DataRow nsdr = gi.gdt.NewRow();
                                for (int si = 0; si < gi.wsilist.Count; si++)
                                {
                                    string sconts = "";
                                    if (gi.wsilist[si].ContentRangeRule != "")
                                    {
                                        sconts = wapp.GatherApp.GetRegTxtString(conts, gi.wsilist[si].ContentRangeRule, gi.wsilist[si].ContentRangeRuleIndex);
                                    }
                                    else
                                    {
                                        sconts = conts;
                                    }
                                    string sis = "";
                                    if (gi.wsilist[si].ContentType == 2)
                                    {
                                        sis = wapp.GatherApp.FormatDate(Often.OutTxt(wapp.GatherApp.GetUnCode(wapp.GatherApp.GetRegsTxt(sconts, gi.wsilist[si].ContentRule, gi.wsilist[si].ContentRuleIndex), gi.wsilist[si].ContentCodeMode)));
                                    }
                                    else
                                    {
                                        sis = wapp.GatherApp.GetUnCode(wapp.GatherApp.GetRegsTxt(sconts, gi.wsilist[si].ContentRule, gi.wsilist[si].ContentRuleIndex), gi.wsilist[si].ContentCodeMode);
                                    }
                                    if (gi.wsilist[si].ContentType == 1 && Often.IsNum(sis))
                                    {
                                        nsdr[si] = sis;
                                    }
                                    else if (gi.wsilist[si].ContentType == 2 && Often.IsDate(sis))
                                    {
                                        nsdr[si] = sis;
                                    }
                                    else
                                    {
                                        nsdr[si] = sis;
                                    }
                                    if (sis != "")
                                    {
                                        lout.OT(1, "[" + gi.wsilist[si].ListTitle + "(" + gi.wsilist[si].ListCode + ")]内容:[" + sis + "]采集成功!");
                                    }
                                }
                                gi.gdt.Rows.Add(nsdr);
                                GatherCount++;
                                if (GatherCount >= gi.ws.GatherMaxNum)
                                {
                                    return;
                                }
                                #region 保存采集内存表中的信息到数据库
                                if (gi.gdt.Rows.Count > 999)
                                {
                                    string savefile = GetExcelSaveFileName();
                                    lout.OT(1, "保存数据到Excel文件操作开始[文件名:" + savefile + "]...");
                                    wapp.Excel ex = new wapp.Excel();
                                    ex.DataTableToExcel(gi.gdt);
                                    ex.WriteFile(savefile);
                                    gi.gdt.Clear();
                                    lout.OT(1, "保存数据到Excel文件操作结束[文件名:" + savefile + "]");
                                }
                                #endregion
                            }
                        }
                    }
                }
                string listurl = NowUrl;
                NowUrlList.Add(NowUrl);
                NowUrl = "";
                if (gi.ws.IsNext == 1)
                {
                    if (gi.ws.NextMode == 1)
                    {
                        #region 采集内容链接列表分页(列表分页为单页采集模式)
                        if (CentUrlList.Count > 0 && cindex < CentUrlList.Count)
                        {
                            NowUrl = CentUrlList[cindex].Trim();
                            cindex++;
                        }
                        #endregion
                    }
                    else
                    {
                        #region 采集内容链接列表分页(列表分页为多页采集模式)
                        if (gi.ws.NextRangeRule != "")
                        {
                            string s = wapp.GatherApp.GetRegTxtString(ListLinkContent, gi.ws.NextRangeRule, gi.ws.NextRangeRuleIndex);
                            if (s != "")
                            {
                                s = wapp.GatherApp.GetRegsTxt(s, gi.ws.NextRule, gi.ws.NextRuleIndex);
                                if (s != "")
                                {
                                    NowUrl = wapp.GatherApp.GetUrls(wapp.GatherApp.FormatUrl(listurl, s));
                                }
                            }
                        }
                        else
                        {
                            string s = wapp.GatherApp.GetRegsTxt(ListLinkContent, gi.ws.NextRule, gi.ws.NextRuleIndex);
                            if (s != "")
                            {
                                NowUrl = wapp.GatherApp.GetUrls(wapp.GatherApp.FormatUrl(listurl, s));
                            }
                        }
                        #endregion
                    }
                    if (NowUrl != "" && !IsListLinkEcho(NowUrl))
                    {
                        string errs = "";
                        ListLinkContent = wapp.GatherApp.GetHttp(det, NowUrl, gi.ws.CodingMode, ref PageEncoding, ref PageCoding, ref errs);
                        if (errs != "")
                        {
                            ListLinkContent = "";
                            lout.OT(1, "列表页采集失败,错误信息:" + errs + ",[Url:" + NowUrl + "]");
                        }
                    }
                    else
                    {
                        ListLinkContent = "";
                    }
                }
                NowUrlCount++;
            }
        }