/// <summary> /// 自动翻页 /// </summary> /// <param name="args">DataReceivedEventArgs</param> /// <returns>URL</returns> private static void CustomParseLinkEvent_Next(CustomParseLinkEvent2Args args) { #region 20150930之前的代码 /*20150930之前的代码 string url = ""; string html = args.Html; string strReg = "<a .+ href='(.+)'>下一页</a>"; Regex regex = new Regex(strReg); Match mat = regex.Match(html); if (mat.Success) { if (IsUrlable(mat.Groups[1].Value)) { url = mat.Groups[1].Value; } else { Int32 index = args.Url.LastIndexOf("/"); //url = args.Url.Substring(0, index) + "/" + mat.Groups[1].Value; url = args.Url.Substring(0, index) + "/list.php" + mat.Groups[1].Value; Console.WriteLine("************************"); Console.WriteLine(url); Console.WriteLine("************************"); File.AppendAllText(urlFilePath, "************************" + "\r\n"); File.AppendAllText(urlFilePath, args.Url + "\r\n"); File.AppendAllText(urlFilePath, "************************" + "\r\n"); } } return IsUrlable(url) ? new UrlInfo(url) { Depth = args.Depth + 1 } : null; */ #endregion 20150930之前的代码 //urlAndHtml.Html = args.Html; //urlAndHtml.Url = args.Url; //string url = AutoNextPage(urlAndHtml, "<a .+ href='(.+)'>下一页</a>", 1); //return IsUrlable(url) ? new UrlInfo(url) { Depth = args.Depth + 1 } : null; }
private static Dictionary<string, string> CustomParseLinkE_MainList(CustomParseLinkEvent2Args args, string patternStr) { Dictionary<string, string> temp = new Dictionary<string, string>(); foreach (var item in args.UrlDictionary) { string href = item.Key; string text = item.Value; if (!string.IsNullOrEmpty(href)) { Regex regex = new Regex(patternStr); Match mat = regex.Match(href); if (mat.Success) { temp.Add(href, text); } } } return temp; }
//用ref或out改写该方法 private static Dictionary<string, string> Master_CustomParseLinkEvent2(CustomParseLinkEvent2Args args) { args.UrlDictionary = CustomParseLinkE_MainList(args, "(view).+?([0-9]{5})");//去除 return CustomParseLinkE_NextPageSdau(args, "<a .+ href='(.+)'>下一页</a>", 1);//添加 }
private static Dictionary<string, string> CustomParseLinkE_NextPageSdau(CustomParseLinkEvent2Args args, string patternStr, int groupIndex) { string url = ""; if (args != null && !string.IsNullOrEmpty(args.Html)) { Regex regex = new Regex(patternStr); Match mat = regex.Match(args.Html); if (mat.Success) { url = mat.Groups[groupIndex].Value; var baseUri = new Uri(args.UrlInfo.UrlString); Uri currentUri = url.StartsWith("http", StringComparison.OrdinalIgnoreCase) ? new Uri(url) : new Uri(baseUri, url);//根据指定的基 URI 和相对 URI 字符串,初始化 System.Uri 类的新实例。 //如果不包含http,则认为超链接是相对路径,根据baseUrl建立绝对路径 url = currentUri.AbsoluteUri; //Console.WriteLine("######" + url + "######"); args.UrlDictionary.Add(url, Guid.NewGuid().ToString()); } } return args.UrlDictionary; }