Ejemplo n.º 1
0
        /// <summary>
        /// Parses the list.
        /// </summary>
        /// <returns>The list.</returns>
        /// <param name="Html">Html.</param>
        /// <param name="Pattern">Pattern.</param>
        /// <param name="Url">URL.</param>
        /// <param name="RecogNextPage">If set to <c>true</c> recog next page.</param>
        public static ArticleList ParseList(string Html, string Pattern, string Url = null, bool RecogNextPage = true)
        {
            //输入检查
            if (string.IsNullOrWhiteSpace(Html) || string.IsNullOrWhiteSpace(Pattern))
            {
                return(null);
            }

            ArticleList articleList = new ArticleList();

            //处理繁体字先
            Html = TextCleaner.ToSimplifyString(Html);
            #region Item集合

            List <Article> Items = null;
#if !NET40
            MatchCollection Matches = Regex.Matches(Html, Pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline, new TimeSpan(0, 0, 10));
#else
            MatchCollection Matches = RegexUtility.Matches(Html, Pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline, 10);
#endif
            if (Matches == null)
            {
                Logger.Warn(string.Format("正则ParseList失败,Url={0}", Url));
            }

            int MatchesCount = 0;
            try
            {
                MatchesCount = Matches.Count;
            }
#if !NET40
            catch (RegexMatchTimeoutException e)
#else
            catch (Exception e)
#endif
            {
                Logger.Error(string.Format("正则ParseList超时,Url={0}", Url), e);
                Matches = null;
            }

            if (Matches != null && Matches.Count > 0)
            {
                Items = new List <Article>(Matches.Count);
                //去重,唯一ItemID
                HashSet <string> ItemIDs = new HashSet <string>();

                foreach (Match m in Matches)
                {
                    Article Item = new Article();
                    Match2Item(m, ref Item, Url, true);

                    if (Item != null)
                    {
                        Items.Add(Item);
                    }
                }
            }

            articleList.Articles    = Items;
            articleList.Count       = Items.Count;
            articleList.CurrentPage = 1;
            #endregion Item集合

            return(articleList);
        }