Пример #1
0
        public void Process(object queue)
        {
            csdnPostQueue = (Queue <Post>)queue;
            string          content = BlogFunUtlity.GetURLContents(string.Format(baseUrl, 1));
            MatchCollection mc      = regIndex.Matches(content);

            foreach (Match item in mc)
            {
                Console.WriteLine("Enqueue one item.");
                GetPostList(item.Groups[1].Value);
            }
        }
Пример #2
0
        private void GetPostList(string url)
        {
            Console.WriteLine("Processing {0}", url);
            int    detailIndex = url.IndexOf("details");
            string homePageUrl = url.Substring(0, detailIndex);
            string firstList   = homePageUrl + "list/{0}";
            int    maxPage     = 0;

            Regex           regPage = new Regex("<a href=.*?list/(\\d+)\">.*?</a>");
            string          content = BlogFunUtlity.GetURLContents(string.Format(firstList, 1));
            MatchCollection mc      = regPage.Matches(content);

            if (mc.Count == 0)
            {
                maxPage = 0;
            }
            else
            {
                maxPage = int.Parse(mc[mc.Count - 1].Groups[1].Value);
            }

            Regex regItemInPage = new Regex("<div class=\"article_title\">[\\d\\D]*?<span class=\"link_title\">[\\d\\D]*?href=\"(.*?)\">([\\d\\D]*?)</a>");

            for (int i = 0; i < maxPage; i++)
            {
                string          pageContent = BlogFunUtlity.GetURLContents(string.Format(firstList, i + 1));
                MatchCollection itemsInPage = regItemInPage.Matches(pageContent);
                foreach (Match item in itemsInPage)
                {
                    string title = item.Groups[2].Value.Trim();
                    if (title.Contains("font"))
                    {
                        string[] TmpTitle = title.Split('>');
                        title = TmpTitle[TmpTitle.Length - 1];
                    }
                    CheckItem(new BlogIndexItem(title, "http://blog.csdn.net" + item.Groups[1].Value.Trim()));
                }
            }
        }
Пример #3
0
        private void processImage(ContentSem sem)
        {
            string uploadImgCMDPattern = "netdisk /e \"upload \\\"{0}\\\" \\app\\PublicFiles\\img-zuihoude\\{1}\\{2}\"";
            string filename            = string.Empty;

            if (sem.Content.Contains("?") && sem.Content.StartsWith("http://img.blog.csdn.net/"))
            {
                sem.Content = sem.Content.Split('?')[0];
            }

            int fileNameIndex = sem.Content.Split('/').Length;

            filename = sem.Content.Split('/')[fileNameIndex - 1];
            if (sem.Content.StartsWith("http://img.blog.csdn.net/"))
            {
                fileNameIndex = sem.Content.Split('/').Length;
                filename      = sem.Content.Split('/')[fileNameIndex - 1];
                filename      = filename + ".jpg";
            }
            if (File.Exists(filename))
            {
                string imageOnPost = "/{0}/{1}/{2}";
                sem.Content = string.Format(imageOnPost, DateTime.Now.Year, DateTime.Now.Month, filename);
            }
            else
            {
                try
                {
                    HttpWebRequest  httpReq = (HttpWebRequest)WebRequest.Create(sem.Content);
                    HttpWebResponse httpRes = (HttpWebResponse)httpReq.GetResponse();
                    if (httpRes.StatusCode == HttpStatusCode.NotFound)
                    {
                        processCsdnLog.Add(string.Format("Image {0} return 404 Not Found.", sem.Content));
                        Console.WriteLine("Image {0} return 404 Not Found.", sem.Content);
                    }
                    else if (httpRes.StatusCode == HttpStatusCode.Forbidden)
                    {
                        processCsdnLog.Add(string.Format("Image {0} return  403 Forbidden.", sem.Content));
                        Console.WriteLine("Image {0} return 403 Forbidden.", sem.Content);
                    }
                    else
                    {
                        Stream responseStream = httpRes.GetResponseStream();
                        filename = BlogFunUtlity.filterTitle(filename);
                        Console.WriteLine("Will Save File : {0}", filename);
                        FileStream writer = new FileStream(filename, FileMode.OpenOrCreate, FileAccess.Write);
                        byte[]     buffer = new byte[1024];
                        int        count  = 0;
                        while ((count = responseStream.Read(buffer, 0, buffer.Length)) > 0)
                        {
                            writer.Write(buffer, 0, count);
                        }
                        writer.Close();
                        responseStream.Close();

                        string imageOnPost = "/{0}/{1}/{2}";
                        sem.Content = string.Format(imageOnPost, DateTime.Now.Year, DateTime.Now.Month, filename);

                        filename = System.Environment.CurrentDirectory + "\\" + filename;
                        string cmd = string.Format(uploadImgCMDPattern, filename, DateTime.Now.Year, DateTime.Now.Month);
                        processCsdnLog.Add(string.Format("Will execute command : {0}", cmd));
                        Console.WriteLine("Will execute command : {0}", cmd);
                        BlogFunUtlity.ExecuteCmd(cmd);
                    }
                }
                catch (Exception ex)
                {
                    processCsdnLog.Add(sem.Content + " meets issue: " + ex.ToString());
                    Console.WriteLine(sem.Content + " meets issue: " + ex.ToString());
                }
            }
        }
Пример #4
0
        private void processContent(BlogIndexItem item)
        {
            string content = BlogFunUtlity.GetURLContents(item.URL);

            Match  artical = regContent.Match(content);
            string result  = artical.Groups[1].Value;

            List <ContentSem> markList = new List <ContentSem>();

            MatchCollection mc = regImage.Matches(result);

            foreach (Match imgItem in mc)
            {
                if (imgItem.Groups[1].Value.StartsWith("http:"))
                {
                    ContentSem cs = new ContentSem("img", imgItem.Groups[1].Index, imgItem.Groups[1].Length, imgItem.Groups[1].Value);
                    markList.Add(cs);
                }
            }

            mc = regCode.Matches(result);
            foreach (Match codeItem in mc)
            {
                ContentSem cs = new ContentSem("code", codeItem.Groups[0].Index, codeItem.Groups[0].Length, codeItem.Groups[0].Value);
                if (regImage.IsMatch(cs.Content))
                {
                    ;
                }
                else
                {
                    markList.Add(cs);
                }
            }

            StringBuilder buffer = new StringBuilder();

            if (markList.Count > 0)
            {
                IEnumerable <ContentSem> orderList = markList.OrderBy(c => c.Index);
                ProcessImageCode(orderList);

                int index = 0;
                foreach (var listItem in orderList)
                {
                    buffer.Append(result.Substring(index, listItem.Index - index));
                    buffer.Append(listItem.Content);
                    index = listItem.Index + listItem.Length;
                }
                buffer.Append(result.Substring(index, result.Length - index));
            }
            else
            {
                buffer.Append(result);
            }

            string PostContent = buffer.ToString();
            Post   CsdnPost    = new Post();

            CsdnPost.Title   = item.Title;
            CsdnPost.Content = PostContent;

            lock (csdnPostQueue)
            {
                csdnPostQueue.Enqueue(CsdnPost);
            }

            Console.WriteLine("Processing {0} Done.", item.URL);
        }