public static List <string> GetPageDownUrlAuto(string baseUrl) { //自动获取翻页 //1、直接在url后面加 _page //2、将url最后一段转换成数字加1 List <string> list = new List <string>(); var urlSuffix = baseUrl.Substring(baseUrl.LastIndexOf(".")); var subUrl = baseUrl.Substring(baseUrl.LastIndexOf("/") + 1).Replace(urlSuffix, ""); var resultUrl = ""; var pageStr = ""; var page = 0; //获取最后一段数字区域 pageStr = RegexUtil.ExtractDigit(subUrl); if (int.TryParse(pageStr, out page) == true) { resultUrl = subUrl.Replace(pageStr, (++page).ToString()); } resultUrl = baseUrl.Replace(subUrl, resultUrl); //添加到url列表中 list.Add(resultUrl); resultUrl = subUrl + "_2"; resultUrl = baseUrl.Replace(subUrl, resultUrl); //添加到url列表中 list.Add(resultUrl); return(list); }
public static string GetPageDownUrlManual(string baseUrl, string pageDownUrl) { //目前只考虑简单的翻页,太复杂的翻页还是具体情况具体分析 var url = ""; int i; for (i = 0; i < baseUrl.Length; i++) { if (baseUrl[i] == pageDownUrl[i]) { continue; } break; } url = pageDownUrl.Substring(i); //补齐 baseUrl = baseUrl.PadLeft(pageDownUrl.Length, ' '); for (i = pageDownUrl.Length - 1; i >= 0; i--) { if (pageDownUrl[i] == baseUrl[i]) { continue; } break; } url = pageDownUrl.Substring(i); var pageStr = RegexUtil.ExtractDigit(url); if (string.IsNullOrEmpty(pageStr)) { return(pageDownUrl); } var page = 0; int.TryParse(pageStr, out page); url = url.Replace(pageStr, (++page).ToString()); //防止前面有数字重合的,再获取最后一段url var subUrl = pageDownUrl.Substring(pageDownUrl.LastIndexOf("/") + 1); url = pageDownUrl.Replace(subUrl, subUrl.Replace(pageStr, url)); return(url); }