void GroupThreadList(CreditCookie cookie) { _group_task = Task.WhenAll( thread_lists.Keys.Select(key => Task.Run(() => { var threeriver_thread = new ThreeRiverThread(key, cookie); _threeRiverThreads.Enqueue(new KeyValuePair <DateTime, ThreeRiverThread>(threeriver_thread.CreateTime, threeriver_thread)); } ))); }
static void Main(string[] args) { var credit_cookie = new CreditCookie("username", "password", "answer"); var three_river_collection = new ThreeRiverColletion(new DateTime(2017, 1, 1), credit_cookie); foreach (var three_river_pair in three_river_collection) { var three_river = three_river_pair.Value; //var three_river = new ThreeRiverThread("http://www.lkong.net/thread-1879329-1-1.html", credit_cookie); var jobject = new JObject(); jobject.Add("Time", three_river.CreateTime); foreach (var book in three_river.Books) { jobject.Add(book, JArray.FromObject(three_river[book])); } var filename = three_river.Url.Replace("http://www.lkong.net/", "data/").Replace(".html", ".json"); File.WriteAllText(filename, JsonConvert.SerializeObject(jobject, Formatting.Indented)); } }
public ThreeRiverColletion(DateTime earliest, CreditCookie cookie = null) { //获取网页内容 var webClient = new HtmlWeb() { PreRequest = (handler, request) => { if (cookie != null) { handler.CookieContainer = cookie.CookieContainer; } return(true); } }; var doc = webClient.Load(ranklisturl); var root = doc.DocumentNode; //获取当前页所有帖子 if (ParseThreadList(root, earliest)) { //获取分页个数 var last = root.SelectSingleNode("//div[@class='pg']/a[@class='last']"); var pagecount = int.Parse(last.InnerText.Trim().Replace("...", "")); for (var i = 2; i <= pagecount; ++i) { root = webClient.Load(ranklisturl + $"&page={i}").DocumentNode; //获取该页的所有帖子 if (!ParseThreadList(root, earliest)) { break; } } } //过滤所有帖子 FilterThreeRiverThreadList(); //分析所有帖子 GroupThreadList(cookie); }
public Thread(string url, CreditCookie cookie = null) { Url = url; var webClient = new HtmlWeb() { PreRequest = (handler, request) => { if (cookie != null) { handler.CookieContainer = cookie.CookieContainer; } return(true); } }; var doc = webClient.Load(url); Parse(webClient, doc.DocumentNode); foreach (var floor in _floors) { ParseFloorTime(floor); } }
public ThreeRiverThread(string url, CreditCookie cookie = null) : base(url, cookie) { ParseFirstFloor(); }
public ThreeRiverColletion(CreditCookie cookie = null) : this(DateTime.MinValue, cookie) { }