/// <summary> /// 일반 갤러리의 리스트를 가져옵니다. /// </summary> /// <returns></returns> public static SortedDictionary <string, string> GetGalleryList() { var dic = new SortedDictionary <string, string>(); var src = NetTools.DownloadString("http://wstatic.dcinside.com/gallery/gallindex_iframe_new_gallery.html"); var parse = new List <Match>(); parse.AddRange(Regex.Matches(src, @"onmouseover=""gallery_view\('(\w+)'\);""\>[\s\S]*?\<.*?\>([\w\s]+)\<").Cast <Match>().ToList()); parse.AddRange(Regex.Matches(src, @"onmouseover\=""gallery_view\('(\w+)'\);""\>\s*([\w\s]+)\<").Cast <Match>().ToList()); foreach (var match in parse) { var identification = match.Groups[1].Value; var name = match.Groups[2].Value.Trim(); if (!string.IsNullOrEmpty(name)) { if (name[0] == '-') { name = name.Remove(0, 1).Trim(); } if (!dic.ContainsKey(name)) { dic.Add(name, identification); } } } return(dic); }
public static SortedDictionary <string, string> GetMinorGalleryList() { var dic = new SortedDictionary <string, string>(); var html = NetTools.DownloadString("https://gall.dcinside.com/m"); HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); foreach (var a in document.DocumentNode.SelectNodes("//a[@onmouseout='thumb_hide();']")) { dic.Add(a.InnerText.Trim(), a.GetAttributeValue("href", "").Split('=').Last()); } var under_name = new List <string>(); foreach (var b in document.DocumentNode.SelectNodes("//button[@class='btn_cate_more']")) { under_name.Add(b.GetAttributeValue("data-lyr", "")); } int count = 1; foreach (var un in under_name) { RETRY: //var wc = NetCommon.GetDefaultClient(); //wc.Headers.Add("X-Requested-With", "XMLHttpRequest"); //wc.QueryString.Add("under_name", un); //var subhtml = Encoding.UTF8.GetString(wc.UploadValues("https://gall.dcinside.com/ajax/minor_ajax/get_under_gall", "POST", wc.QueryString)); var subhtml = NetTools.DownloadString($"https://wstatic.dcinside.com/gallery/mgallindex_underground/{un}.html"); if (subhtml.Trim() == "") { Console.Instance.WriteLine($"[{count}/{under_name.Count}] Retry {un}..."); goto RETRY; } HtmlDocument document2 = new HtmlDocument(); document2.LoadHtml(subhtml); foreach (var c in document2.DocumentNode.SelectNodes("//a[@class='list_title']")) { if (!dic.ContainsKey(c.InnerText.Trim())) { dic.Add(c.InnerText.Trim(), c.GetAttributeValue("href", "").Split('=').Last()); } } Console.Instance.WriteLine($"[{count++}/{under_name.Count}] Complete {un}"); } return(dic); }
static void ProcessArchive(string[] args) { var counts = Convert.ToInt32(args[0]); var invalid = new string(Path.GetInvalidFileNameChars()) + new string(Path.GetInvalidPathChars()); var sp = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "Archive", $"{DCGalleryAnalyzer.Instance.Model.gallery_name} ({DCGalleryAnalyzer.Instance.Model.gallery_id})"); Directory.CreateDirectory(sp); for (int i = 0; i < counts; i++) { var article = DCGalleryAnalyzer.Instance.Articles[i]; var ttitle = $"{article.title}"; foreach (char c in invalid) { ttitle = ttitle.Replace(c.ToString(), ""); } string url; if (DCGalleryAnalyzer.Instance.Model.is_minor_gallery) { url = $"https://gall.dcinside.com/mgallery/board/view/?id={DCGalleryAnalyzer.Instance.Model.gallery_id}&no={article.no}"; } else { url = $"https://gall.dcinside.com/board/view/?id={DCGalleryAnalyzer.Instance.Model.gallery_id}&no={article.no}"; } var html = NetTools.DownloadString(url); var info = DCInsideUtils.ParseBoardView(html, DCGalleryAnalyzer.Instance.Model.is_minor_gallery); File.WriteAllText(Path.Combine(sp, $"[{article.no}]-body-{ttitle}.json"), JsonConvert.SerializeObject(info, Formatting.Indented)); int com; if (int.TryParse(info.CommentCount.Replace(",", ""), out com) && com > 0) { var comments = DCInsideUtils.GetAllComments(DCGalleryAnalyzer.Instance.Model.gallery_id, article.no).Result; File.WriteAllText(Path.Combine(sp, $"[{article.no}]-comments-{ttitle}.json"), JsonConvert.SerializeObject(comments, Formatting.Indented)); } Console.Instance.WriteLine($"{counts}중 {i}개 완료"); Thread.Sleep(700); } }
public static async Task <DCInsideComment> GetComments(string gall_id, string article_id, string page) { var nt = NetTask.MakeDefault("https://gall.dcinside.com/board/comment/"); nt.Headers = new Dictionary <string, string>() { { "X-Requested-With", "XMLHttpRequest" } }; nt.Query = new Dictionary <string, string>() { { "id", gall_id }, { "no", article_id }, { "cmt_id", gall_id }, { "cmt_no", article_id }, { "e_s_n_o", DCInsideManager.Instance.ESNO }, { "comment_page", page } }; return(JsonConvert.DeserializeObject <DCInsideComment>(await NetTools.DownloadStringAsync(nt))); }
static void ProcessCollectArticles(string[] args) { var rstarts = Convert.ToInt32(args[1]); var starts = Convert.ToInt32(args[1]); var ends = Convert.ToInt32(args[2]); bool is_minorg = !DCGalleryList.Instance.GalleryIds.Contains(args[0]); var result = new DCInsideGalleryModel(); var articles = new List <DCInsidePageArticle>(); using (var progressBar = new Console.ConsoleProgressBar()) { for (; starts <= ends; starts++) { var url = ""; if (is_minorg) { url = $"https://gall.dcinside.com/mgallery/board/lists/?id={args[0]}&page={starts}"; } else { url = $"https://gall.dcinside.com/board/lists/?id={args[0]}&page={starts}"; } Console.Instance.WriteLine($"Download URL: {url}"); var html = NetTools.DownloadString(url); DCInsideGallery gall = null; if (is_minorg) { gall = DCInsideUtils.ParseMinorGallery(html); } else { gall = DCInsideUtils.ParseGallery(html); } if (is_minorg && (gall.articles == null || gall.articles.Length == 0)) { gall = DCInsideUtils.ParseGallery(html); } articles.AddRange(gall.articles); progressBar.SetProgress((((ends - rstarts + 1) - (ends - starts)) / (float)(ends - rstarts + 1)) * 100); } var overlap = new HashSet <string>(); var articles_trim = new List <DCInsidePageArticle>(); foreach (var article in articles) { if (!overlap.Contains(article.no)) { articles_trim.Add(article); overlap.Add(article.no); } } articles_trim.Sort((x, y) => y.no.ToInt().CompareTo(x.no.ToInt())); result.is_minor_gallery = is_minorg; result.gallery_id = args[0]; result.articles = articles_trim; File.WriteAllText($"list-{args[0]}-{DateTime.Now.Ticks}.txt", JsonConvert.SerializeObject(result)); var bbb = MessagePackSerializer.Serialize(result); using (FileStream fsStream = new FileStream($"list-{args[0]}-{DateTime.Now.Ticks}-index.txt", FileMode.Create)) using (BinaryWriter sw = new BinaryWriter(fsStream)) { sw.Write(bbb); } } }
public DCInsideManager() { ESNO = DCInsideUtils.ParseGallery(NetTools.DownloadString("https://gall.dcinside.com/board/lists?id=hit")).esno; }