static void ProcessExtract() { var xxx = JsonConvert.DeserializeObject <List <EHentaiResultArticle> >(File.ReadAllText("ex-hentai-archive.json")); const string archive = @"E:\2019\e-archive"; var ix = new FileIndexor(); Task.Run(async() => await ix.ListingDirectoryAsync(archive)).Wait(); var htmls = new List <string>(); ix.Enumerate((string path, List <FileInfo> files) => { files.ForEach(x => htmls.Add(x.FullName)); }); var result = new List <EHentaiResultArticle>(); using (var progressBar = new Console.ConsoleProgressBar()) { int x = 0; foreach (var html in htmls) { var content = File.ReadAllText(html); try { var exh = ExHentaiParser.ParseResultPageExtendedListView(content); //Console.Instance.WriteLine("[GET] " + exh.Count + " Articles! - " + html); result.AddRange(exh); if (exh.Count != 25) { Console.Instance.WriteLine("[Miss] " + html); } } catch (Exception e) { Console.Instance.WriteLine("[Fail] " + html); } x++; progressBar.SetProgress(x / (float)htmls.Count * 100); } } JsonSerializer serializer = new JsonSerializer(); serializer.Converters.Add(new JavaScriptDateTimeConverter()); serializer.NullValueHandling = NullValueHandling.Ignore; Monitor.Instance.Push("Write file: ex-hentai-archive.json"); using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "ex-hentai-archive.json"))) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, result); } }
static void ProcessParseComments(string[] args) { LoadGalleryList(); bool is_minorg = minor_galleries.ContainsValue(args[0]); if (ESNO == "") { var url = ""; if (is_minorg) { url = $"https://gall.dcinside.com/mgallery/board/view/?id={args[0]}&no={args[1]}"; } else { url = $"https://gall.dcinside.com/board/view/?id={args[0]}&no={args[1]}"; } var client = NetCommon.GetDefaultClient(); if (PHPSESSID != "") { client.Headers.Add(System.Net.HttpRequestHeader.Cookie, "PHPSESSID=" + PHPSESSID); } var html = client.DownloadString(url); var article = DCParser.ParseBoardView(html, is_minorg); ESNO = article.ESNO; } var comments = DCCommon.GetComments(new DCArticle { OriginalGalleryName = args[0], Id = args[1], ESNO = ESNO }, "1"); Console.Instance.WriteLine(comments); using (var progressBar = new Console.ConsoleProgressBar()) { int tcount = comments.total_cnt; int count = comments.comment_cnt; for (int i = 2; count < tcount; i++) { progressBar.SetProgress(((count) / (float)(tcount)) * 100); comments = DCCommon.GetComments(new DCArticle { OriginalGalleryName = args[0], Id = args[1], ESNO = ESNO }, i.ToString()); if (comments.comment_cnt == 0) { break; } Console.Instance.WriteLine(comments); count += comments.comment_cnt; } } }
static void ProcessExtract4() { const string archive = @"C:\Tools\koromo-copy\Koromo Copy UX\bin\Debug\exhentai-page"; var htmls = new List <string>(); foreach (var file in Directory.GetFiles(archive)) { htmls.Add(File.ReadAllText(file)); } var result = new List <EHentaiResultArticle>(); using (var progressBar = new Console.ConsoleProgressBar()) { int x = 0; foreach (var html in htmls) { var content = html; try { var exh = ExHentaiParser.ParseResultPageExtendedListView(content); //Console.Instance.WriteLine("[GET] " + exh.Count + " Articles! - " + html); result.AddRange(exh); if (exh.Count != 25) { Console.Instance.WriteLine("[Miss] " + html); } } catch (Exception e) { Console.Instance.WriteLine("[Fail] " + html); } x++; progressBar.SetProgress(x / (float)htmls.Count * 100); } } JsonSerializer serializer = new JsonSerializer(); serializer.Converters.Add(new JavaScriptDateTimeConverter()); serializer.NullValueHandling = NullValueHandling.Ignore; Monitor.Instance.Push("Write file: ex-hentai-archive.json"); using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "ex-hentai-archive2.json"))) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, result); } }
static void ProcessExtract2() { const string target = @"e-parse-fail.txt"; var lines = File.ReadAllLines(target); var result = new List <EHentaiResultArticle>(); using (var progressBar = new Console.ConsoleProgressBar()) { int x = 0; foreach (var line in lines) { var fn = line.Replace("[Fail] ", ""); var content = File.ReadAllText(fn); try { var exh = ExHentaiParser.ParseResultPageMinimalListView(content); Console.Instance.WriteLine("[GET] " + exh.Count + " Articles! - " + fn); result.AddRange(exh); } catch (Exception e) { Console.Instance.WriteLine("[Fail] " + fn); } x++; progressBar.SetProgress(x / (float)lines.Length * 100); } } JsonSerializer serializer = new JsonSerializer(); serializer.Converters.Add(new JavaScriptDateTimeConverter()); serializer.NullValueHandling = NullValueHandling.Ignore; Monitor.Instance.Push("Write file: ex-hentai-archive2.json"); using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "ex-hentai-archive2.json"))) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, result); } }
static void ProcessCollectArticles(string[] args, bool rem) { var rstarts = Convert.ToInt32(args[1]); var starts = Convert.ToInt32(args[1]); var ends = Convert.ToInt32(args[2]); LoadGalleryList(); bool is_minorg = minor_galleries.ContainsValue(args[0]); var result = new DCGalleryModel(); var articles = new List <DCPageArticle>(); using (var progressBar = new Console.ConsoleProgressBar()) { for (; starts <= ends; starts++) { X: try { var url = ""; if (is_minorg) { url = $"https://gall.dcinside.com/mgallery/board/lists/?id={args[0]}&page={starts}"; } else { url = $"https://gall.dcinside.com/board/lists/?id={args[0]}&page={starts}"; } if (rem) { url += "&exception_mode=recommend"; } Console.Instance.WriteLine($"Download URL: {url}"); var client = NetCommon.GetDefaultClient(); if (PHPSESSID != "") { client.Headers.Add(System.Net.HttpRequestHeader.Cookie, "PHPSESSID=" + PHPSESSID); } var html = client.DownloadString(url); DCGallery gall = null; if (is_minorg) { gall = DCParser.ParseMinorGallery(html); } else { gall = DCParser.ParseGallery(html); } articles.AddRange(gall.articles); } catch { goto X; } progressBar.SetProgress((((ends - rstarts + 1) - (ends - starts)) / (float)(ends - rstarts + 1)) * 100); } result.is_minor_gallery = is_minorg; result.gallery_id = args[0]; result.articles = articles.ToArray(); File.WriteAllText($"list-{args[0]}-{DateTime.Now.Ticks}.txt", JsonConvert.SerializeObject(result)); } }
static void ProcessFullParse(string[] args, bool rem, string[] title, string[] nick, string[] ip, string[] id, bool login, bool fix) { using (var progressBar = new Console.ConsoleProgressBar()) { Console.Instance.WriteLine("Parse gallery..."); var rstarts = Convert.ToInt32(args[1]); var starts = Convert.ToInt32(args[1]); var ends = Convert.ToInt32(args[2]); LoadGalleryList(); bool is_minorg = minor_galleries.ContainsValue(args[0]); var articles = new List <DCPageArticle>(); // // Parse Gallery // for (; starts <= ends; starts++) { var url = ""; if (is_minorg) { url = $"https://gall.dcinside.com/mgallery/board/lists/?id={args[0]}&page={starts}"; } else { url = $"https://gall.dcinside.com/board/lists/?id={args[0]}&page={starts}"; } if (rem) { url += "&exception_mode=recommend"; } var client = NetCommon.GetDefaultClient(); if (PHPSESSID != "") { client.Headers.Add(System.Net.HttpRequestHeader.Cookie, "PHPSESSID=" + PHPSESSID); } var html = client.DownloadString(url); DCGallery gall = null; if (is_minorg) { gall = DCParser.ParseMinorGallery(html); } else { gall = DCParser.ParseGallery(html); } foreach (var article in gall.articles) { if (title != null && !article.title.Contains(title[0])) { continue; } if (nick != null && !article.nick.Contains(nick[0])) { continue; } if (ip != null && !article.ip.Contains(ip[0])) { continue; } if (id != null && !article.uid.Contains(id[0])) { continue; } if (login && !article.islogined) { continue; } if (fix && !article.isfixed) { continue; } //Console.Instance.Write(Monitor.SerializeObject(article)); //Console.Instance.WriteLine(","); articles.Add(article); } progressBar.SetProgress((((ends - rstarts + 1) - (ends - starts)) / (float)(ends - rstarts + 1)) * 100); Thread.Sleep(3000); } Console.Instance.WriteLine("Parse articles..."); int acnt = 0; var _articles = new List <DCArticle>(); // // Parse Articles // foreach (var article in articles) { var url = ""; if (is_minorg) { url = $"https://gall.dcinside.com/mgallery/board/view/?id={args[0]}&no={article.no}"; } else { url = $"https://gall.dcinside.com/board/view/?id={args[0]}&no={article.no}"; } var client = NetCommon.GetDefaultClient(); if (PHPSESSID != "") { client.Headers.Add(System.Net.HttpRequestHeader.Cookie, "PHPSESSID=" + PHPSESSID); } var html = client.DownloadString(url); var _article = DCParser.ParseBoardView(html, is_minorg); _articles.Add(_article); acnt++; progressBar.SetProgress(((acnt) / (float)(articles.Count)) * 100); Thread.Sleep(3000); } var info = new List <Tuple <DCArticle, List <DCComment> > >(); if (ESNO == "") { ESNO = _articles[0].ESNO; } int ccnt = 0; // // Parse Comments // foreach (var article in _articles) { var cc = new List <DCComment>(); var comments = DCCommon.GetComments(new DCArticle { OriginalGalleryName = article.OriginalGalleryName, Id = article.Id, ESNO = article.ESNO }, "1"); Thread.Sleep(2000); cc.Add(comments); int tcount = comments.total_cnt; int count = 100; for (int i = 2; count < tcount; count += 100) { comments = DCCommon.GetComments(new DCArticle { OriginalGalleryName = args[0], Id = args[1], ESNO = ESNO }, i.ToString()); if (comments.comment_cnt == 0) { break; } count += comments.comment_cnt; cc.Add(comments); Thread.Sleep(2000); } info.Add(new Tuple <DCArticle, List <DCComment> >(article, cc)); ccnt++; progressBar.SetProgress(((ccnt) / (float)(_articles.Count)) * 100); } var result = new Tuple <List <DCPageArticle>, List <Tuple <DCArticle, List <DCComment> > > >(articles, info); File.WriteAllText($"dc-{args[0]}-{args[1]}-{args[2]}-{DateTime.Now.Ticks}.txt", Monitor.SerializeObject(result)); } }
/// <summary> /// 각종 기능을 테스트합니다. /// </summary> /// <param name="args"></param> static void ProcessTest(string[] args) { switch (args[0].ToInt32()) { // // Save and beautify metadatas // case 0: { var hiddendata = JsonConvert.DeserializeObject <List <HitomiArticle> >(File.ReadAllText(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "hiddendata.json"))); Func <HitomiArticle, NHitomiArticle> r2n = (ha) => { return(new NHitomiArticle { Artists = ha.Artists, Characters = ha.Characters, Groups = ha.Groups, Language = ha.Language, Tags = ha.Tags, Type = ha.Type, DateTime = ha.DateTime, Thumbnail = ha.Thumbnail, Magic = ha.Magic, Title = ha.Title }); }; var jj = hiddendata.Select(x => r2n(x)); var json = JsonConvert.SerializeObject(jj, Formatting.Indented); using (var fs = new StreamWriter(new FileStream("hiddendata_beautify.json", FileMode.Create, FileAccess.Write))) { fs.Write(json); } var json2 = JsonConvert.SerializeObject(jj, Formatting.None); using (var fs = new StreamWriter(new FileStream("hiddendata_nonbeautify.json", FileMode.Create, FileAccess.Write))) { var bytes = json2.Zip(); fs.BaseStream.Write(json2.Zip(), 0, bytes.Length); } } break; // // Load metadatas // case 1: { var bytes = File.ReadAllBytes("hiddendata_nonbeautify.json"); var json = JsonConvert.DeserializeObject <List <NHitomiArticle> >(bytes.Unzip()); Console.Instance.Write($"{json.Count}"); } break; case 2: { var str = File.ReadAllText("hiddendata.json"); File.WriteAllBytes("hiddendata.compress", str.Zip()); } break; case 3: { var str = File.ReadAllText("metadata.json"); File.WriteAllBytes("metadata.compress", str.Zip()); } break; case 4: { var str = JsonConvert.DeserializeObject <List <HitomiArticle> >(File.ReadAllText("hiddendata.json")); using (var fs = new StreamWriter(new FileStream("hiddendata.json", FileMode.Create, FileAccess.Write))) { fs.Write(JsonConvert.SerializeObject(str, Formatting.None)); } } break; case 5: { var bytes = File.ReadAllBytes("metadata.compress"); File.WriteAllText("metadata.json", bytes.Unzip()); } break; case 6: { var bytes = File.ReadAllBytes("hiddendata.compress"); File.WriteAllText("hiddendata.json", bytes.Unzip()); } break; case 7: { HitomiExplore.exploreNullSpace().ForEach(x => Console.Instance.WriteLine($"{x.Item1} {x.Item2} {x.Item3}")); } break; case 8: { // Update index-metadata.json HitomiData.Instance.LoadMetadataJson(); HitomiData.Instance.LoadHiddendataJson(); HitomiIndex.MakeIndex(); var str = File.ReadAllBytes("index-metadata.json"); File.WriteAllBytes("index-metadata.compress", str.ZipByte()); } break; case 9: { //var hidden = JsonConvert.DeserializeObject<List<HitomiArticle>>(File.ReadAllText(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "hiddendata.json"))); //var gall = JsonConvert.DeserializeObject<List<HitomiArticle>>(File.ReadAllText(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "galleries.json"))); //for (int i = 0; i < gall.Count; i++) // for (int j = 0; j < hidden.Count; j++) // if (gall[i].Magic == hidden[j].Magic) // { // hidden[j].Groups = gall[i].Groups; // hidden[j].Characters = gall[i].Characters; // } //JsonSerializer serializer = new JsonSerializer(); //serializer.Converters.Add(new Newtonsoft.Json.Converters.JavaScriptDateTimeConverter()); //serializer.NullValueHandling = NullValueHandling.Ignore; //using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "hiddendata2.json"))) //using (JsonWriter writer = new JsonTextWriter(sw)) //{ // serializer.Serialize(writer, hidden); //} var x = new HitomiIndexDataModel(); x.index = HitomiIndex.Instance.index; x.metadata = HitomiIndex.Instance.metadata_collection; JsonSerializer serializer = new JsonSerializer(); serializer.Converters.Add(new Newtonsoft.Json.Converters.JavaScriptDateTimeConverter()); serializer.NullValueHandling = NullValueHandling.Ignore; Monitor.Instance.Push("Write file: metadata-index.json"); using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "metadata-index.json"))) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, x); } HitomiData.Instance.LoadMetadataJson(); HitomiData.Instance.LoadHiddendataJson(); Monitor.Instance.Push("Write file: metadata-noptimized.json"); using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "metadametadata-noptimizedta.json"))) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, HitomiData.Instance.metadata_collection); } } break; case 10: { foreach (var x in HitomiIndex.Instance.tagdata_collection.artist) { Console.Instance.Write(x.Tag + ", "); } } break; case 11: { HitomiData.Instance.LoadMetadataJson(); HitomiData.Instance.LoadHiddendataJson(); HitomiData.Instance.RebuildTagData(); } break; case 12: { // Update HitomiTitle for (int i = 0; i < 50; i++) { try { var url3 = $"https://exhentai.org/?page={i}&f_doujinshi=on&f_manga=on&f_artistcg=on&f_gamecg=on&&f_cats=0&f_sname=on&f_stags=on&f_sh=on&advsearch=1&f_srdd=2&f_sname=on&f_stags=on&f_sdesc=on&f_sh=on"; var html = NetCommon.DownloadExHentaiString(url3); File.WriteAllText($"exhentai-page/exhentai-{i}.html", html); Monitor.Instance.Push($"[Paging] {i + 1}/1457"); } catch (Exception e) { Console.Instance.WriteErrorLine($"[Error] {i} {e.Message}"); } Thread.Sleep(100); } { const string archive = @"C:\Dev\koromo-copy\Koromo Copy UX\bin\Debug\exhentai-page"; var htmls = new List <string>(); foreach (var file in Directory.GetFiles(archive)) { htmls.Add(File.ReadAllText(file)); } var result = new List <EHentaiResultArticle>(); using (var progressBar = new Console.ConsoleProgressBar()) { int x = 0; foreach (var html in htmls) { var content = html; try { var exh = ExHentaiParser.ParseResultPageExtendedListView(content); //Console.Instance.WriteLine("[GET] " + exh.Count + " Articles! - " + html); result.AddRange(exh); if (exh.Count != 25) { Console.Instance.WriteLine("[Miss] " + html); } } catch (Exception e) { Console.Instance.WriteLine("[Fail] " + html); } x++; progressBar.SetProgress(x / (float)htmls.Count * 100); } } JsonSerializer serializer = new JsonSerializer(); serializer.Converters.Add(new JavaScriptDateTimeConverter()); serializer.NullValueHandling = NullValueHandling.Ignore; Monitor.Instance.Push("Write file: ex-hentai-archive.json"); using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "ex-hentai-archive2.json"))) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, result); } } { var xxx = JsonConvert.DeserializeObject <List <EHentaiResultArticle> >(File.ReadAllText("ex-hentai-archive.json")); var zzz = JsonConvert.DeserializeObject <List <EHentaiResultArticle> >(File.ReadAllText("ex-hentai-archive2.json")); var exists = new HashSet <int>(); xxx.ForEach(x => exists.Add(x.URL.Split('/')[4].ToInt32())); foreach (var z in zzz) { var nn = z.URL.Split('/')[4].ToInt32(); if (exists.Contains(nn)) { Console.Instance.WriteLine("[Duplicate] " + nn); } else { xxx.Add(z); } } JsonSerializer serializer = new JsonSerializer(); serializer.Converters.Add(new JavaScriptDateTimeConverter()); serializer.NullValueHandling = NullValueHandling.Ignore; Monitor.Instance.Push("Write file: ex-hentai-archive3.json"); using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "ex-hentai-archive3.json"))) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, xxx); } } HitomiTitle.MakeTitle(); } break; case 13: // Fill type { var md = JsonConvert.DeserializeObject <List <HitomiArticle> >(File.ReadAllText("hiddendata.json")); var xxx = JsonConvert.DeserializeObject <List <EHentaiResultArticle> >(File.ReadAllText("ex-hentai-archive.json")); //var md = JsonConvert.DeserializeObject<List<HitomiMetadata>>(File.ReadAllText("metadata.json")); var types = new Dictionary <string, string>(); foreach (var xx in xxx) { try { types.Add(xx.URL.Split('/')[4], xx.Type.ToLower()); } catch (Exception e) { //Console.Instance.WriteLine("[??] " + xx.URL); } } //for (int i = 0; i < md.Count; i++) //{ // if (md[i].Type == null || md[i].Type.Trim() == "") // { // if (types.ContainsKey(md[i].ID.ToString())) // { // var x = md[i]; // x.Type = types[md[i].ID.ToString()]; // md[i] = x; // } // else // { // Console.Instance.WriteLine("[Fail] " + md[i].ID.ToString()); // } // } //} for (int i = 0; i < md.Count; i++) { if (md[i].Type == null || md[i].Type.Trim() == "") { if (types.ContainsKey(md[i].Magic.ToString())) { var x = md[i]; x.Type = types[md[i].Magic.ToString()]; md[i] = x; } else { Console.Instance.WriteLine("[Fail] " + md[i].Magic.ToString()); } } } JsonSerializer serializer = new JsonSerializer(); serializer.Converters.Add(new JavaScriptDateTimeConverter()); serializer.NullValueHandling = NullValueHandling.Ignore; Monitor.Instance.Push("Write file: metadata.json"); using (StreamWriter sw = new StreamWriter(Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "hiddendata2.json"))) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, md); } } break; } }