public static void Gather(IAdministratorInfo adminInfo, int siteId, int ruleId, string guid) { var cache = ProgressCache.Init(guid, "开始获取链接..."); var gatherRuleInfo = Main.GatherRuleRepository.GetGatherRuleInfo(ruleId); var siteInfo = Context.SiteApi.GetSiteInfo(siteId); var channelInfo = Context.ChannelApi.GetChannelInfo(siteId, gatherRuleInfo.ChannelId); if (channelInfo == null) { channelInfo = Context.ChannelApi.GetChannelInfo(siteId, siteId); gatherRuleInfo.ChannelId = siteId; } var regexUrlInclude = GetRegexString(gatherRuleInfo.UrlInclude); var regexTitleInclude = GetRegexString(gatherRuleInfo.TitleInclude); var regexContentExclude = GetRegexString(gatherRuleInfo.ContentExclude); var regexListArea = GetRegexArea(gatherRuleInfo.ListAreaStart, gatherRuleInfo.ListAreaEnd); var regexChannel = GetRegexChannel(gatherRuleInfo.ContentChannelStart, gatherRuleInfo.ContentChannelEnd); var regexContent = GetRegexContent(gatherRuleInfo.ContentContentStart, gatherRuleInfo.ContentContentEnd); var regexContent2 = string.Empty; if (!string.IsNullOrEmpty(gatherRuleInfo.ContentContentStart2) && !string.IsNullOrEmpty(gatherRuleInfo.ContentContentEnd2)) { regexContent2 = GetRegexContent(gatherRuleInfo.ContentContentStart2, gatherRuleInfo.ContentContentEnd2); } var regexContent3 = string.Empty; if (!string.IsNullOrEmpty(gatherRuleInfo.ContentContentStart3) && !string.IsNullOrEmpty(gatherRuleInfo.ContentContentEnd3)) { regexContent3 = GetRegexContent(gatherRuleInfo.ContentContentStart3, gatherRuleInfo.ContentContentEnd3); } var regexNextPage = GetRegexUrl(gatherRuleInfo.ContentNextPageStart, gatherRuleInfo.ContentNextPageEnd); var regexTitle = GetRegexTitle(gatherRuleInfo.ContentTitleStart, gatherRuleInfo.ContentTitleEnd); var contentAttributes = TranslateUtils.StringCollectionToStringList(gatherRuleInfo.ContentAttributes); var contentAttributesXml = TranslateUtils.ToNameValueCollection(gatherRuleInfo.ContentAttributesXml); var contentUrls = GetContentUrlList(gatherRuleInfo, regexListArea, regexUrlInclude, cache); cache.TotalCount = gatherRuleInfo.GatherNum > 0 ? gatherRuleInfo.GatherNum : contentUrls.Count; cache.IsSuccess = true; cache.Message = "开始采集内容..."; var contentTitleDict = new Dictionary <int, IList <string> >(); var channelIdAndContentIdList = new List <KeyValuePair <int, int> >(); foreach (var contentUrl in contentUrls) { if (GatherOneByUrl(siteInfo, channelInfo, gatherRuleInfo.IsSaveImage, gatherRuleInfo.IsSetFirstImageAsImageUrl, gatherRuleInfo.IsEmptyContentAllowed, gatherRuleInfo.IsSameTitleAllowed, gatherRuleInfo.IsChecked, gatherRuleInfo.Charset, contentUrl, gatherRuleInfo.CookieString, regexTitleInclude, regexContentExclude, gatherRuleInfo.ContentHtmlClearCollection, gatherRuleInfo.ContentHtmlClearTagCollection, gatherRuleInfo.ContentReplaceFrom, gatherRuleInfo.ContentReplaceTo, regexTitle, regexContent, regexContent2, regexContent3, regexNextPage, regexChannel, contentAttributes, contentAttributesXml, contentTitleDict, channelIdAndContentIdList, adminInfo, out var title, out var errorMessage)) { cache.SuccessCount++; cache.IsSuccess = true; cache.Message = $"成功采集内容:{title}"; }
public static ProgressCache Init(string guid, string message) { if (string.IsNullOrEmpty(guid)) { return(null); } var cache = new ProgressCache { Status = StatusProgress, IsSuccess = true, Message = message, FailureMessages = new List <string>() }; CacheUtils.InsertHours(guid, cache, 1); return(cache); }
public static List <string> GetContentUrlList(GatherRuleInfo gatherRuleInfo, string regexListArea, string regexUrlInclude, ProgressCache cache) { var gatherUrls = GetGatherUrlList(gatherRuleInfo); var contentUrls = new List <string>(); foreach (var gatherUrl in gatherUrls) { cache.IsSuccess = true; cache.Message = "获取链接:" + gatherUrl; try { var urls = GetContentUrls(gatherUrl, gatherRuleInfo.Charset, gatherRuleInfo.CookieString, regexListArea, regexUrlInclude); contentUrls.AddRange(urls); } catch (Exception ex) { cache.IsSuccess = false; cache.Message = ex.Message; cache.FailureMessages.Add(ex.Message); } } if (gatherRuleInfo.IsOrderByDesc) { contentUrls.Reverse(); } return(contentUrls); }