public static DCInsideGallery ParseMinorGallery(string html) { var gall = new DCInsideGallery(); HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); HtmlNode node = document.DocumentNode.SelectNodes("//tbody")[0]; gall.id = document.DocumentNode.SelectSingleNode("//input[@id='gallery_id']").GetAttributeValue("value", ""); gall.name = document.DocumentNode.SelectSingleNode("//meta[@property='og:title']").GetAttributeValue("content", ""); gall.esno = document.DocumentNode.SelectSingleNode("//input[@id='e_s_n_o']").GetAttributeValue("value", ""); gall.cur_page = document.DocumentNode.SelectSingleNode("//div[@class='bottom_paging_box']/em").InnerText; try { gall.max_page = document.DocumentNode.SelectSingleNode("//a[@class='page_end']").GetAttributeValue("href", "").Split('=').Last(); } catch { } List <DCInsidePageArticle> pas = new List <DCInsidePageArticle>(); foreach (var tr in node.SelectNodes("./tr")) { try { var gall_num = tr.SelectSingleNode("./td[1]").InnerText; int v; if (!int.TryParse(gall_num, out v)) { continue; } var pa = new DCInsidePageArticle(); pa.no = gall_num; pa.classify = tr.SelectSingleNode("./td[2]").InnerText; pa.type = tr.SelectSingleNode("./td[3]/a/em").GetAttributeValue("class", "").Split(' ')[1]; pa.title = HttpUtility.HtmlDecode(tr.SelectSingleNode("./td[3]/a").InnerText); try { pa.replay_num = tr.SelectSingleNode(".//span[@class='reply_num']").InnerText; } catch { } pa.nick = tr.SelectSingleNode("./td[4]").GetAttributeValue("data-nick", ""); pa.uid = tr.SelectSingleNode("./td[4]").GetAttributeValue("data-uid", ""); pa.ip = tr.SelectSingleNode("./td[4]").GetAttributeValue("data-ip", ""); if (pa.ip == "") { pa.islogined = true; if (tr.SelectSingleNode("./td[4]/a/img") != null && tr.SelectSingleNode("./td[4]/a/img").GetAttributeValue("src", "").Contains("fix_nik.gif")) { pa.isfixed = true; } } pa.date = DateTime.Parse(tr.SelectSingleNode("./td[5]").GetAttributeValue("title", "")); pa.count = tr.SelectSingleNode("./td[6]").InnerText; pa.recommend = tr.SelectSingleNode("./td[7]").InnerText; pas.Add(pa); } catch { } } gall.articles = pas.ToArray(); return(gall); }
public static async Task <DCInsideComment> GetComments(DCInsideGallery g, DCInsidePageArticle article, string page) { var nt = NetTask.MakeDefault("https://gall.dcinside.com/board/comment/"); nt.Headers = new Dictionary <string, string>() { { "X-Requested-With", "XMLHttpRequest" } }; nt.Query = new Dictionary <string, string>() { { "id", g.id }, { "no", article.no }, { "cmt_id", g.id }, { "cmt_no", article.no }, { "e_s_n_o", g.esno }, { "comment_page", page } }; return(JsonConvert.DeserializeObject <DCInsideComment>(await NetTools.DownloadStringAsync(nt))); }
static void ProcessCollectArticles(string[] args) { var rstarts = Convert.ToInt32(args[1]); var starts = Convert.ToInt32(args[1]); var ends = Convert.ToInt32(args[2]); bool is_minorg = !DCGalleryList.Instance.GalleryIds.Contains(args[0]); var result = new DCInsideGalleryModel(); var articles = new List <DCInsidePageArticle>(); using (var progressBar = new Console.ConsoleProgressBar()) { for (; starts <= ends; starts++) { var url = ""; if (is_minorg) { url = $"https://gall.dcinside.com/mgallery/board/lists/?id={args[0]}&page={starts}"; } else { url = $"https://gall.dcinside.com/board/lists/?id={args[0]}&page={starts}"; } Console.Instance.WriteLine($"Download URL: {url}"); var html = NetTools.DownloadString(url); DCInsideGallery gall = null; if (is_minorg) { gall = DCInsideUtils.ParseMinorGallery(html); } else { gall = DCInsideUtils.ParseGallery(html); } if (is_minorg && (gall.articles == null || gall.articles.Length == 0)) { gall = DCInsideUtils.ParseGallery(html); } articles.AddRange(gall.articles); progressBar.SetProgress((((ends - rstarts + 1) - (ends - starts)) / (float)(ends - rstarts + 1)) * 100); } var overlap = new HashSet <string>(); var articles_trim = new List <DCInsidePageArticle>(); foreach (var article in articles) { if (!overlap.Contains(article.no)) { articles_trim.Add(article); overlap.Add(article.no); } } articles_trim.Sort((x, y) => y.no.ToInt().CompareTo(x.no.ToInt())); result.is_minor_gallery = is_minorg; result.gallery_id = args[0]; result.articles = articles_trim; File.WriteAllText($"list-{args[0]}-{DateTime.Now.Ticks}.txt", JsonConvert.SerializeObject(result)); var bbb = MessagePackSerializer.Serialize(result); using (FileStream fsStream = new FileStream($"list-{args[0]}-{DateTime.Now.Ticks}-index.txt", FileMode.Create)) using (BinaryWriter sw = new BinaryWriter(fsStream)) { sw.Write(bbb); } } }