Esempio n. 1
0
        public static void ProcessUrl(string url, int thisPage, string aid, string aowner)
        {
            var thisUrl = url;

            if (thisPage != 1)
            {
                thisUrl = url + "&p=" + thisPage;
            }

            //TempLog += "page=" + thisPage + new HtmlString("<BR>");
            //var aid = string.Empty;
            //var aowner = string.Empty;

            try
            {
                var forum = thisUrl.Split(new[] { "f=" }, StringSplitOptions.RemoveEmptyEntries)[1].Split('&')[0];

                //Get Content
                var source = WebTool.GetHtmlUtf8(thisUrl);
                TempLog = source;
                source  = WebTool.GetContent("<div class=\"forum-content\">", "<div class=\"sidebar\">", source);

                //Get Title
                var title = WebTool.GetContent("<h2 class=\"topic\">", "</h2>", source);
                //Get Page
                var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source);
                //var pageNow = WebTool.GetContent("<span>", "</span>", page);
                var pageTotal = WebTool.GetContent("共", "頁", page);

                //process post -begin-
                source = WebTool.GetContent("<div class=\"single-post\">", "<div class=\"pagination\">", source);
                var ar = source.Split(new[] { "<div class=\"single-post\">" }, StringSplitOptions.RemoveEmptyEntries);

                //Get all
                foreach (var s in ar)
                {
                    ProcessPost(s, ref aid, ref aowner, forum, title);
                }
                //process post -end-
                //ThriftTool.TransportClose(ref _transport);

                //TempCounter++;

                if (thisPage < int.Parse(pageTotal))
                {
                    ProcessUrl(url, thisPage + 1, aid, aowner);
                }
            }
            catch (Exception ex)
            {
                TempLog += ex.Message;
                //throw;
            }
        }
Esempio n. 2
0
        private static void ProcessPost(string post, ref string levelOneUid, ref string levelOnePid, string forum, string title)
        {
            var uid     = WebTool.GetContent("userinfo.php?id=", "&", post);
            var content = WebTool.GetContent("<div class=\"single-post-content\">", "<div class=\"single-post-content-sig\">", post);
            var pid     = WebTool.GetContent("<div id=\"ct", "\"", post);
            var tmpar   = WebTool.GetContent("<div class=\"date\">", "</div>", post).Split('#');
            var pdate   = tmpar[0].Trim();
            var plevel  = tmpar[1].Trim();
            var aid     = string.Empty;
            var aowner  = string.Empty;

            if (plevel == "1")
            {
                levelOnePid = pid;
                levelOneUid = uid;
            }
            else
            {
                aid    = levelOnePid;   //article = self
                aowner = levelOneUid;   //article owner = self
                //add counter
                //need check if new***
                if (!ThriftTool.CheckExist(uid, "M01UserRelaction"))
                {
                    ThriftTool.CounterAdd(uid, "M01UserRelaction", levelOneUid, 1);
                }
            }
            var topic = new M01Topic
            {
                Forum   = forum,
                Pid     = pid,
                Uid     = uid,
                Content = content,
                Pdate   = pdate,
                Plevel  = plevel,
                Aid     = aid,
                Aowner  = aowner,
                Title   = title
            };

            SaveTopic(topic);

            //TempLog += "topic Pid=" + topic.Pid + new HtmlString("<BR>") ;
        }
Esempio n. 3
0
        public static void ProcessForum(string forumUrl, int thisPage)
        {
            var thisUrl = forumUrl;

            if (thisPage != 1)
            {
                thisUrl = forumUrl + "&p=" + thisPage;
            }
            //Get Content
            var source = WebTool.GetHtmlUtf8(thisUrl);

            //Get Page
            var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source);
            //var pageNow = WebTool.GetContent("<span>", "</span>", page);
            var pageTotal = WebTool.GetContent("共", "頁", page);

            source = WebTool.GetContent("<table summary=\"文章列表\">", "</table>", source);
            source = WebTool.GetContent("<tbody>", "</tbody>", source).Replace("\n", string.Empty); //topic list
            var arTopic = source.Split(new[] { "<tr>" }, StringSplitOptions.RemoveEmptyEntries);

            arTopic = arTopic.Where(x => x.Trim().Length > 10).ToArray();
            foreach (var s in arTopic)
            {
                var topicUrl   = WebTool.GetContent("<a href=\"", "\"", s);
                var topicTitle = WebTool.StripTagsCharArray((s.Split(new[] { "</td>" }, StringSplitOptions.RemoveEmptyEntries)[0])).Trim();
                QueuePage(topicUrl, topicTitle);
            }
            //return arTopic.Count();

            ThriftTool.TransportClose();

            //go to next page
            if (thisPage < int.Parse(pageTotal))
            {
                ProcessForum(forumUrl, thisPage + 1);
            }
        }