Esempio n. 1
0
        /// <summary>
        /// 自动翻页
        /// </summary>
        /// <param name="args">DataReceivedEventArgs</param>
        /// <returns>URL</returns>
        private static void CustomParseLinkEvent_Next(CustomParseLinkEvent2Args args)
        {
            #region 20150930之前的代码

            /*20150930之前的代码
                string url = "";
                string html = args.Html;
                string strReg = "<a .+ href='(.+)'>下一页</a>";
                Regex regex = new Regex(strReg);
                Match mat = regex.Match(html);
                if (mat.Success)
                {
                    if (IsUrlable(mat.Groups[1].Value))
                    {
                        url = mat.Groups[1].Value;
                    }
                    else
                    {
                        Int32 index = args.Url.LastIndexOf("/");
                        //url = args.Url.Substring(0, index) + "/" + mat.Groups[1].Value;
                        url = args.Url.Substring(0, index) + "/list.php" + mat.Groups[1].Value;
                        Console.WriteLine("************************");
                        Console.WriteLine(url);
                        Console.WriteLine("************************");

                        File.AppendAllText(urlFilePath, "************************" + "\r\n");
                        File.AppendAllText(urlFilePath, args.Url + "\r\n");
                        File.AppendAllText(urlFilePath, "************************" + "\r\n");
                    }
                }
                return IsUrlable(url) ? new UrlInfo(url) { Depth = args.Depth + 1 } : null;
                */

            #endregion 20150930之前的代码

            //urlAndHtml.Html = args.Html;
            //urlAndHtml.Url = args.Url;
            //string url = AutoNextPage(urlAndHtml, "<a .+ href='(.+)'>下一页</a>", 1);
            //return IsUrlable(url) ? new UrlInfo(url) { Depth = args.Depth + 1 } : null;
        }
Esempio n. 2
0
        private static Dictionary<string, string> CustomParseLinkE_MainList(CustomParseLinkEvent2Args args, string patternStr)
        {
            Dictionary<string, string> temp = new Dictionary<string, string>();
            foreach (var item in args.UrlDictionary)
            {
                string href = item.Key;
                string text = item.Value;

                if (!string.IsNullOrEmpty(href))
                {
                    Regex regex = new Regex(patternStr);
                    Match mat = regex.Match(href);
                    if (mat.Success)
                    {
                        temp.Add(href, text);
                    }
                }
            }
            return temp;
        }
Esempio n. 3
0
 //用ref或out改写该方法
 private static Dictionary<string, string> Master_CustomParseLinkEvent2(CustomParseLinkEvent2Args args)
 {
     args.UrlDictionary = CustomParseLinkE_MainList(args, "(view).+?([0-9]{5})");//去除
     return CustomParseLinkE_NextPageSdau(args, "<a .+ href='(.+)'>下一页</a>", 1);//添加
 }
Esempio n. 4
0
 private static Dictionary<string, string> CustomParseLinkE_NextPageSdau(CustomParseLinkEvent2Args args, string patternStr, int groupIndex)
 {
     string url = "";
     if (args != null && !string.IsNullOrEmpty(args.Html))
     {
         Regex regex = new Regex(patternStr);
         Match mat = regex.Match(args.Html);
         if (mat.Success)
         {
             url = mat.Groups[groupIndex].Value;
             var baseUri = new Uri(args.UrlInfo.UrlString);
             Uri currentUri = url.StartsWith("http", StringComparison.OrdinalIgnoreCase)
                                  ? new Uri(url)
                                  : new Uri(baseUri, url);//根据指定的基 URI 和相对 URI 字符串,初始化 System.Uri 类的新实例。
                                                          //如果不包含http,则认为超链接是相对路径,根据baseUrl建立绝对路径
             url = currentUri.AbsoluteUri;
             //Console.WriteLine("######" + url + "######");
             args.UrlDictionary.Add(url, Guid.NewGuid().ToString());
         }
     }
     return args.UrlDictionary;
 }