protected override Dictionary <string, string> GetNextPageUrlDictionary() { if (UrlPathDictionary?.Any() != true) { UrlPathDictionary = new Dictionary <string, string> { { "{pageNo}", "1" } }; return(UrlPathDictionary); } if (UrlPathDictionary.TryGetValue("{pageNo}", out var currentPageNoStr)) { var currentPageNo = long.Parse(currentPageNoStr); currentPageNo++; UrlPathDictionary.AddOrUpdate("{pageNo}", currentPageNo.ToInvariantString()); } else { UrlPathDictionary.AddOrUpdate("{pageNo}", "1"); } return(UrlPathDictionary); }
protected override string GetEndpoint(Dictionary <string, string> urlDictionary) { if (!UrlPathDictionary.TryGetValue("{pageNo}", out var currentPageNoStr)) { return(Domain); } var currentPageNo = long.Parse(currentPageNoStr); if (currentPageNo <= 1) { return(Domain); } return(base.GetEndpoint(urlDictionary)); }
protected override bool IsStopCrawling(List <string> postUrls) { var isContainStopAtPostUrl = IsContainStopAtPostUrl(postUrls); if (isContainStopAtPostUrl) { return(true); } if (!UrlPathDictionary.TryGetValue("{pageNo}", out var currentPageNoStr)) { return(false); } var currentPageNo = long.Parse(currentPageNoStr); return(currentPageNo >= 5); }
protected override Dictionary <string, string> GetNextPageUrlDictionary() { if (UrlPathDictionary?.Any() != true) { UrlPathDictionary = new Dictionary <string, string> { { "{week}", _weekCurr.ToInvariantString() }, { "{year}", _yearCurr.ToInvariantString() } }; return(UrlPathDictionary); } if (UrlPathDictionary.TryGetValue("{week}", out var currentWeekNoStr)) { var currentWeekNo = long.Parse(currentWeekNoStr); if (currentWeekNo == 1) { _weekCurr = 52; _yearCurr--; _weekNow += 52; } else { _weekCurr--; } } UrlPathDictionary.AddOrUpdate("{week}", _weekCurr.ToInvariantString()); UrlPathDictionary.AddOrUpdate("{year}", _yearCurr.ToInvariantString()); return(UrlPathDictionary); }