Ejemplo n.º 1
0
        /// <summary>
        /// 일반 갤러리의 리스트를 가져옵니다.
        /// </summary>
        /// <returns></returns>
        public static SortedDictionary <string, string> GetGalleryList()
        {
            var dic = new SortedDictionary <string, string>();
            var src = NetTools.DownloadString("http://wstatic.dcinside.com/gallery/gallindex_iframe_new_gallery.html");

            var parse = new List <Match>();

            parse.AddRange(Regex.Matches(src, @"onmouseover=""gallery_view\('(\w+)'\);""\>[\s\S]*?\<.*?\>([\w\s]+)\<").Cast <Match>().ToList());
            parse.AddRange(Regex.Matches(src, @"onmouseover\=""gallery_view\('(\w+)'\);""\>\s*([\w\s]+)\<").Cast <Match>().ToList());

            foreach (var match in parse)
            {
                var identification = match.Groups[1].Value;
                var name           = match.Groups[2].Value.Trim();

                if (!string.IsNullOrEmpty(name))
                {
                    if (name[0] == '-')
                    {
                        name = name.Remove(0, 1).Trim();
                    }
                    if (!dic.ContainsKey(name))
                    {
                        dic.Add(name, identification);
                    }
                }
            }

            return(dic);
        }
Ejemplo n.º 2
0
        public static SortedDictionary <string, string> GetMinorGalleryList()
        {
            var dic  = new SortedDictionary <string, string>();
            var html = NetTools.DownloadString("https://gall.dcinside.com/m");

            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            foreach (var a in document.DocumentNode.SelectNodes("//a[@onmouseout='thumb_hide();']"))
            {
                dic.Add(a.InnerText.Trim(), a.GetAttributeValue("href", "").Split('=').Last());
            }

            var under_name = new List <string>();

            foreach (var b in document.DocumentNode.SelectNodes("//button[@class='btn_cate_more']"))
            {
                under_name.Add(b.GetAttributeValue("data-lyr", ""));
            }

            int count = 1;

            foreach (var un in under_name)
            {
RETRY:
                //var wc = NetCommon.GetDefaultClient();
                //wc.Headers.Add("X-Requested-With", "XMLHttpRequest");
                //wc.QueryString.Add("under_name", un);
                //var subhtml = Encoding.UTF8.GetString(wc.UploadValues("https://gall.dcinside.com/ajax/minor_ajax/get_under_gall", "POST", wc.QueryString));
                var subhtml = NetTools.DownloadString($"https://wstatic.dcinside.com/gallery/mgallindex_underground/{un}.html");
                if (subhtml.Trim() == "")
                {
                    Console.Instance.WriteLine($"[{count}/{under_name.Count}] Retry {un}...");
                    goto RETRY;
                }

                HtmlDocument document2 = new HtmlDocument();
                document2.LoadHtml(subhtml);
                foreach (var c in document2.DocumentNode.SelectNodes("//a[@class='list_title']"))
                {
                    if (!dic.ContainsKey(c.InnerText.Trim()))
                    {
                        dic.Add(c.InnerText.Trim(), c.GetAttributeValue("href", "").Split('=').Last());
                    }
                }
                Console.Instance.WriteLine($"[{count++}/{under_name.Count}] Complete {un}");
            }

            return(dic);
        }
Ejemplo n.º 3
0
        static void ProcessArchive(string[] args)
        {
            var counts = Convert.ToInt32(args[0]);

            var invalid = new string(Path.GetInvalidFileNameChars()) + new string(Path.GetInvalidPathChars());
            var sp      = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "Archive",
                                       $"{DCGalleryAnalyzer.Instance.Model.gallery_name} ({DCGalleryAnalyzer.Instance.Model.gallery_id})");

            Directory.CreateDirectory(sp);

            for (int i = 0; i < counts; i++)
            {
                var article = DCGalleryAnalyzer.Instance.Articles[i];
                var ttitle  = $"{article.title}";
                foreach (char c in invalid)
                {
                    ttitle = ttitle.Replace(c.ToString(), "");
                }

                string url;

                if (DCGalleryAnalyzer.Instance.Model.is_minor_gallery)
                {
                    url = $"https://gall.dcinside.com/mgallery/board/view/?id={DCGalleryAnalyzer.Instance.Model.gallery_id}&no={article.no}";
                }
                else
                {
                    url = $"https://gall.dcinside.com/board/view/?id={DCGalleryAnalyzer.Instance.Model.gallery_id}&no={article.no}";
                }

                var html = NetTools.DownloadString(url);
                var info = DCInsideUtils.ParseBoardView(html, DCGalleryAnalyzer.Instance.Model.is_minor_gallery);

                File.WriteAllText(Path.Combine(sp, $"[{article.no}]-body-{ttitle}.json"), JsonConvert.SerializeObject(info, Formatting.Indented));

                int com;
                if (int.TryParse(info.CommentCount.Replace(",", ""), out com) && com > 0)
                {
                    var comments = DCInsideUtils.GetAllComments(DCGalleryAnalyzer.Instance.Model.gallery_id, article.no).Result;
                    File.WriteAllText(Path.Combine(sp, $"[{article.no}]-comments-{ttitle}.json"), JsonConvert.SerializeObject(comments, Formatting.Indented));
                }

                Console.Instance.WriteLine($"{counts}중 {i}개 완료");
                Thread.Sleep(700);
            }
        }
Ejemplo n.º 4
0
        static void ProcessCollectArticles(string[] args)
        {
            var rstarts = Convert.ToInt32(args[1]);
            var starts  = Convert.ToInt32(args[1]);
            var ends    = Convert.ToInt32(args[2]);

            bool is_minorg = !DCGalleryList.Instance.GalleryIds.Contains(args[0]);

            var result   = new DCInsideGalleryModel();
            var articles = new List <DCInsidePageArticle>();

            using (var progressBar = new Console.ConsoleProgressBar())
            {
                for (; starts <= ends; starts++)
                {
                    var url = "";
                    if (is_minorg)
                    {
                        url = $"https://gall.dcinside.com/mgallery/board/lists/?id={args[0]}&page={starts}";
                    }
                    else
                    {
                        url = $"https://gall.dcinside.com/board/lists/?id={args[0]}&page={starts}";
                    }

                    Console.Instance.WriteLine($"Download URL: {url}");

                    var             html = NetTools.DownloadString(url);
                    DCInsideGallery gall = null;

                    if (is_minorg)
                    {
                        gall = DCInsideUtils.ParseMinorGallery(html);
                    }
                    else
                    {
                        gall = DCInsideUtils.ParseGallery(html);
                    }

                    if (is_minorg && (gall.articles == null || gall.articles.Length == 0))
                    {
                        gall = DCInsideUtils.ParseGallery(html);
                    }

                    articles.AddRange(gall.articles);

                    progressBar.SetProgress((((ends - rstarts + 1) - (ends - starts)) / (float)(ends - rstarts + 1)) * 100);
                }

                var overlap       = new HashSet <string>();
                var articles_trim = new List <DCInsidePageArticle>();
                foreach (var article in articles)
                {
                    if (!overlap.Contains(article.no))
                    {
                        articles_trim.Add(article);
                        overlap.Add(article.no);
                    }
                }

                articles_trim.Sort((x, y) => y.no.ToInt().CompareTo(x.no.ToInt()));

                result.is_minor_gallery = is_minorg;
                result.gallery_id       = args[0];
                result.articles         = articles_trim;

                File.WriteAllText($"list-{args[0]}-{DateTime.Now.Ticks}.txt", JsonConvert.SerializeObject(result));

                var bbb = MessagePackSerializer.Serialize(result);
                using (FileStream fsStream = new FileStream($"list-{args[0]}-{DateTime.Now.Ticks}-index.txt", FileMode.Create))
                    using (BinaryWriter sw = new BinaryWriter(fsStream))
                    {
                        sw.Write(bbb);
                    }
            }
        }
Ejemplo n.º 5
0
 public DCInsideManager()
 {
     ESNO = DCInsideUtils.ParseGallery(NetTools.DownloadString("https://gall.dcinside.com/board/lists?id=hit")).esno;
 }