private static void ProcessPost(string post, ref string levelOneUid, ref string levelOnePid, string forum, string title) { var uid = WebTool.GetContent("userinfo.php?id=", "&", post); var uName = WebTool.StripTagsCharArray(WebTool.GetContent("<div class=\"fn\">", "</div>", post)); var content = WebTool.GetContent("<div class=\"single-post-content\">", "<div class=\"single-post-content-sig\">", post); var pid = WebTool.GetContent("<div id=\"ct", "\"", post); var tmpar = WebTool.GetContent("<div class=\"date\">", "</div>", post).Split('#'); var pdate = tmpar[0].Trim(); var plevel = tmpar[1].Trim(); var aid = string.Empty; var aowner = string.Empty; var blockquite = WebTool.GetContent("<blockquote>", "</blockquote>", post); var replyTo = WebTool.GetContent("<b>", " wrote:</b>", blockquite); if (!TmpUser.ContainsKey(uName)) { TmpUser.Add(uName, uid); } if (plevel == "1") { levelOnePid = pid; levelOneUid = uid; } else { aid = levelOnePid; aowner = levelOneUid; //add counter ThriftTool.CounterAdd(uid, "M01UserRelaction", string.IsNullOrEmpty(replyTo) ? levelOneUid : TmpUser[replyTo], 1); } var topic = new M01Topic { Forum = forum, Pid = pid, Uid = uid, Content = content, Pdate = pdate, Plevel = plevel, Aid = aid, Aowner = aowner, Title = title }; SaveTopic(topic); }
/// <summary> /// 處理列表 /// </summary> /// <param name="forumUrl"></param> /// <param name="thisPage"></param> public static void ProcessForum(string forumUrl, int thisPage) { var thisUrl = forumUrl; if (thisPage != 1) { thisUrl = forumUrl + "&p=" + thisPage; } //Get Content Utility.Logging("Processing Forum = " + thisUrl, Utility.DebugLevel.Info); var source = WebTool.GetHtmlAsyncUtf8(thisUrl); //Get Page var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source); var pageTotal = WebTool.GetContent("共", "頁", page); source = WebTool.GetContent("<table summary=\"文章列表\">", "</table>", source); source = WebTool.GetContent("<tbody>", "</tbody>", source).Replace("\n", string.Empty); //topic list var arTopic = source.Split(new[] { "<tr>" }, StringSplitOptions.RemoveEmptyEntries); arTopic = arTopic.Where(x => x.Trim().Length > 10).ToArray(); foreach (var s in arTopic) { var reply = WebTool.GetContent("<td width=\"7%\" class=\"reply\">", "</td>", s).Replace(",", string.Empty); var postDate = Convert.ToDateTime(WebTool.StripTagsCharArray(WebTool.GetContent("<td width=\"17%\" class=\"authur\">", "</p>", s))); //if (postDate) if (int.Parse(reply) >= 500) { continue; } var url = WebTool.GetContent("<td class=\"subject\">", "</a>", s); var topicUrl = WebTool.GetContent("<a href=\"", "\"", url); var topicTitle = WebTool.StripTagsCharArray((url.Split(new[] { "</td>" }, StringSplitOptions.RemoveEmptyEntries)[0])).Trim(); QueuePage(topicUrl, topicTitle); } //go to next page if (thisPage < int.Parse(pageTotal)) { Thread.Sleep(ChangePage); ProcessForum(forumUrl, thisPage + 1); } }
/// <summary> /// 處理文章 /// </summary> /// <param name="url"></param> /// <param name="thisPage"></param> /// <param name="aid"></param> /// <param name="aowner"></param> public static bool ProcessUrl(string url, int thisPage, string aid, string aowner) { var pageTotal = string.Empty; var thisUrl = url; if (thisPage != 1) { thisUrl = url + "&p=" + thisPage; } Utility.Logging("Processing Url = " + thisUrl, Utility.DebugLevel.Info); try { var forum = thisUrl.Split(new[] { "f=" }, StringSplitOptions.RemoveEmptyEntries)[1].Split('&')[0]; //Get Content var source = WebTool.GetHtmlAsyncUtf8(thisUrl); //if (!string.IsNullOrEmpty(source)) //{ //if (source.IndexOf("action=\"error.php\"", System.StringComparison.Ordinal) == -1) return; //Console.WriteLine("Processing Url start"); source = WebTool.GetContent("<div class=\"forum-content\">", "<div class=\"sidebar\">", source); //Get Title var title = WebTool.GetContent("<h2 class=\"topic\">", "</h2>", source); //Get Page var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source); //var pageNow = WebTool.GetContent("<span>", "</span>", page); pageTotal = WebTool.GetContent("共", "頁", page); //process post -begin- source = WebTool.GetContent("<div class=\"single-post\">", "<div class=\"pagination\">", source); var ar = source.Split(new[] { "<div class=\"single-post\">" }, StringSplitOptions.RemoveEmptyEntries); //Get all foreach (var s in ar) { ProcessPost(s, ref aid, ref aowner, forum, title); } //} //process post -end- //ThriftTool.TransportClose(ref _transport); if (thisPage >= int.Parse(pageTotal)) { return(true); } Thread.Sleep(ChangePage); return(ProcessUrl(url, thisPage + 1, aid, aowner)); } catch (Exception ex) { Utility.Logging("ProcessUrl Error:" + ex.Message); return(false); } }