public static void ProcessUrl(string url, int thisPage, string aid, string aowner) { var thisUrl = url; if (thisPage != 1) { thisUrl = url + "&p=" + thisPage; } //TempLog += "page=" + thisPage + new HtmlString("<BR>"); //var aid = string.Empty; //var aowner = string.Empty; try { var forum = thisUrl.Split(new[] { "f=" }, StringSplitOptions.RemoveEmptyEntries)[1].Split('&')[0]; //Get Content var source = WebTool.GetHtmlUtf8(thisUrl); TempLog = source; source = WebTool.GetContent("<div class=\"forum-content\">", "<div class=\"sidebar\">", source); //Get Title var title = WebTool.GetContent("<h2 class=\"topic\">", "</h2>", source); //Get Page var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source); //var pageNow = WebTool.GetContent("<span>", "</span>", page); var pageTotal = WebTool.GetContent("共", "頁", page); //process post -begin- source = WebTool.GetContent("<div class=\"single-post\">", "<div class=\"pagination\">", source); var ar = source.Split(new[] { "<div class=\"single-post\">" }, StringSplitOptions.RemoveEmptyEntries); //Get all foreach (var s in ar) { ProcessPost(s, ref aid, ref aowner, forum, title); } //process post -end- //ThriftTool.TransportClose(ref _transport); //TempCounter++; if (thisPage < int.Parse(pageTotal)) { ProcessUrl(url, thisPage + 1, aid, aowner); } } catch (Exception ex) { TempLog += ex.Message; //throw; } }
private static void ProcessPost(string post, ref string levelOneUid, ref string levelOnePid, string forum, string title) { var uid = WebTool.GetContent("userinfo.php?id=", "&", post); var content = WebTool.GetContent("<div class=\"single-post-content\">", "<div class=\"single-post-content-sig\">", post); var pid = WebTool.GetContent("<div id=\"ct", "\"", post); var tmpar = WebTool.GetContent("<div class=\"date\">", "</div>", post).Split('#'); var pdate = tmpar[0].Trim(); var plevel = tmpar[1].Trim(); var aid = string.Empty; var aowner = string.Empty; if (plevel == "1") { levelOnePid = pid; levelOneUid = uid; } else { aid = levelOnePid; //article = self aowner = levelOneUid; //article owner = self //add counter //need check if new*** if (!ThriftTool.CheckExist(uid, "M01UserRelaction")) { ThriftTool.CounterAdd(uid, "M01UserRelaction", levelOneUid, 1); } } var topic = new M01Topic { Forum = forum, Pid = pid, Uid = uid, Content = content, Pdate = pdate, Plevel = plevel, Aid = aid, Aowner = aowner, Title = title }; SaveTopic(topic); //TempLog += "topic Pid=" + topic.Pid + new HtmlString("<BR>") ; }
public static void ProcessForum(string forumUrl, int thisPage) { var thisUrl = forumUrl; if (thisPage != 1) { thisUrl = forumUrl + "&p=" + thisPage; } //Get Content var source = WebTool.GetHtmlUtf8(thisUrl); //Get Page var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source); //var pageNow = WebTool.GetContent("<span>", "</span>", page); var pageTotal = WebTool.GetContent("共", "頁", page); source = WebTool.GetContent("<table summary=\"文章列表\">", "</table>", source); source = WebTool.GetContent("<tbody>", "</tbody>", source).Replace("\n", string.Empty); //topic list var arTopic = source.Split(new[] { "<tr>" }, StringSplitOptions.RemoveEmptyEntries); arTopic = arTopic.Where(x => x.Trim().Length > 10).ToArray(); foreach (var s in arTopic) { var topicUrl = WebTool.GetContent("<a href=\"", "\"", s); var topicTitle = WebTool.StripTagsCharArray((s.Split(new[] { "</td>" }, StringSplitOptions.RemoveEmptyEntries)[0])).Trim(); QueuePage(topicUrl, topicTitle); } //return arTopic.Count(); ThriftTool.TransportClose(); //go to next page if (thisPage < int.Parse(pageTotal)) { ProcessForum(forumUrl, thisPage + 1); } }