/// <summary> /// Parses the list. /// </summary> /// <returns>The list.</returns> /// <param name="Html">Html.</param> /// <param name="Pattern">Pattern.</param> /// <param name="Url">URL.</param> /// <param name="RecogNextPage">If set to <c>true</c> recog next page.</param> public static ArticleList ParseList(string Html, string Pattern, string Url = null, bool RecogNextPage = true) { //输入检查 if (string.IsNullOrWhiteSpace(Html) || string.IsNullOrWhiteSpace(Pattern)) { return(null); } ArticleList articleList = new ArticleList(); //处理繁体字先 Html = TextCleaner.ToSimplifyString(Html); #region Item集合 List <Article> Items = null; #if !NET40 MatchCollection Matches = Regex.Matches(Html, Pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline, new TimeSpan(0, 0, 10)); #else MatchCollection Matches = RegexUtility.Matches(Html, Pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline, 10); #endif if (Matches == null) { Logger.Warn(string.Format("正则ParseList失败,Url={0}", Url)); } int MatchesCount = 0; try { MatchesCount = Matches.Count; } #if !NET40 catch (RegexMatchTimeoutException e) #else catch (Exception e) #endif { Logger.Error(string.Format("正则ParseList超时,Url={0}", Url), e); Matches = null; } if (Matches != null && Matches.Count > 0) { Items = new List <Article>(Matches.Count); //去重,唯一ItemID HashSet <string> ItemIDs = new HashSet <string>(); foreach (Match m in Matches) { Article Item = new Article(); Match2Item(m, ref Item, Url, true); if (Item != null) { Items.Add(Item); } } } articleList.Articles = Items; articleList.Count = Items.Count; articleList.CurrentPage = 1; #endregion Item集合 return(articleList); }