예제 #1
0
        public void Process(object queue)
        {
            csdnPostQueue = (Queue <Post>)queue;
            string          content = BlogFunUtlity.GetURLContents(string.Format(baseUrl, 1));
            MatchCollection mc      = regIndex.Matches(content);

            foreach (Match item in mc)
            {
                Console.WriteLine("Enqueue one item.");
                GetPostList(item.Groups[1].Value);
            }
        }
예제 #2
0
        private void GetPostList(string url)
        {
            Console.WriteLine("Processing {0}", url);
            int    detailIndex = url.IndexOf("details");
            string homePageUrl = url.Substring(0, detailIndex);
            string firstList   = homePageUrl + "list/{0}";
            int    maxPage     = 0;

            Regex           regPage = new Regex("<a href=.*?list/(\\d+)\">.*?</a>");
            string          content = BlogFunUtlity.GetURLContents(string.Format(firstList, 1));
            MatchCollection mc      = regPage.Matches(content);

            if (mc.Count == 0)
            {
                maxPage = 0;
            }
            else
            {
                maxPage = int.Parse(mc[mc.Count - 1].Groups[1].Value);
            }

            Regex regItemInPage = new Regex("<div class=\"article_title\">[\\d\\D]*?<span class=\"link_title\">[\\d\\D]*?href=\"(.*?)\">([\\d\\D]*?)</a>");

            for (int i = 0; i < maxPage; i++)
            {
                string          pageContent = BlogFunUtlity.GetURLContents(string.Format(firstList, i + 1));
                MatchCollection itemsInPage = regItemInPage.Matches(pageContent);
                foreach (Match item in itemsInPage)
                {
                    string title = item.Groups[2].Value.Trim();
                    if (title.Contains("font"))
                    {
                        string[] TmpTitle = title.Split('>');
                        title = TmpTitle[TmpTitle.Length - 1];
                    }
                    CheckItem(new BlogIndexItem(title, "http://blog.csdn.net" + item.Groups[1].Value.Trim()));
                }
            }
        }
예제 #3
0
        private void processContent(BlogIndexItem item)
        {
            string content = BlogFunUtlity.GetURLContents(item.URL);

            Match  artical = regContent.Match(content);
            string result  = artical.Groups[1].Value;

            List <ContentSem> markList = new List <ContentSem>();

            MatchCollection mc = regImage.Matches(result);

            foreach (Match imgItem in mc)
            {
                if (imgItem.Groups[1].Value.StartsWith("http:"))
                {
                    ContentSem cs = new ContentSem("img", imgItem.Groups[1].Index, imgItem.Groups[1].Length, imgItem.Groups[1].Value);
                    markList.Add(cs);
                }
            }

            mc = regCode.Matches(result);
            foreach (Match codeItem in mc)
            {
                ContentSem cs = new ContentSem("code", codeItem.Groups[0].Index, codeItem.Groups[0].Length, codeItem.Groups[0].Value);
                if (regImage.IsMatch(cs.Content))
                {
                    ;
                }
                else
                {
                    markList.Add(cs);
                }
            }

            StringBuilder buffer = new StringBuilder();

            if (markList.Count > 0)
            {
                IEnumerable <ContentSem> orderList = markList.OrderBy(c => c.Index);
                ProcessImageCode(orderList);

                int index = 0;
                foreach (var listItem in orderList)
                {
                    buffer.Append(result.Substring(index, listItem.Index - index));
                    buffer.Append(listItem.Content);
                    index = listItem.Index + listItem.Length;
                }
                buffer.Append(result.Substring(index, result.Length - index));
            }
            else
            {
                buffer.Append(result);
            }

            string PostContent = buffer.ToString();
            Post   CsdnPost    = new Post();

            CsdnPost.Title   = item.Title;
            CsdnPost.Content = PostContent;

            lock (csdnPostQueue)
            {
                csdnPostQueue.Enqueue(CsdnPost);
            }

            Console.WriteLine("Processing {0} Done.", item.URL);
        }