/// <summary> /// /// </summary> /// <param name="filename"></param> /// <param name="language"></param> public void ExtractRawDatabase(string filename = "rawdata", bool skip_indexing = false, string language = null, bool include_exhentai = false) { Directory.CreateDirectory(filename); if (File.Exists(filename + "/data.db")) File.Delete(filename + "/data.db"); var db = new SQLiteConnection(filename + "/data.db"); var info = db.GetTableInfo(typeof(HitomiColumnModel).Name); if (!info.Any()) db.CreateTable<HitomiColumnModel>(); var datas = articles.Where(id => { if (language == null) return true; var oh = onHitomi.ContainsKey(id); var oe = onEH.ContainsKey(id); if (oh) { var md = HitomiData.Instance.metadata_collection[onHitomi[id]]; if (md.Language == null || md.Language == "" || md.Language == language) return true; } else { var ed = ehentaiArticles[onEH[id]]; if (ed.Descripts == null || !ed.Descripts.ContainsKey("language")) return true; var edl = ed.Descripts["language"]; if (edl == null || edl.Count == 0 || edl.Contains(language)) return true; } return false; }).Select(id => { HitomiColumnModel result = null; var oh = onHitomi.ContainsKey(id); var oe = onEH.ContainsKey(id); if (oh) { var md = HitomiData.Instance.metadata_collection[onHitomi[id]]; result = new HitomiColumnModel { Id = id, Artists = (md.Artists != null && md.Artists.Length > 0 && md.Artists[0] != "" ? "|" + string.Join("|", md.Artists) + "|" : "|N/A|"), Characters = (md.Characters != null && md.Characters.Length > 0 && md.Characters[0] != "" ? "|" + string.Join("|", md.Characters) + "|" : null), Groups = (md.Groups != null && md.Groups.Length > 0 && md.Groups[0] != "" ? "|" + string.Join("|", md.Groups) + "|" : null), Series = (md.Parodies != null && md.Parodies.Length > 0 && md.Parodies[0] != "" ? "|" + string.Join("|", md.Parodies) + "|" : null), Title = md.Name, Tags = (md.Tags != null && md.Tags.Length > 0 && md.Tags[0] != "" ? "|" + string.Join("|", md.Tags) + "|" : null), Type = md.Type, Language = (md.Language != null && md.Language.Length != 0) ? md.Language : "n/a", Published = md.DateTime, ExistOnHitomi = 1, }; if (oe) { var ii = ehentaiArticles[ehIndex[md.ID.ToString()]]; result.Uploader = ii.Uploader; result.Published = DateTime.Parse(ii.Published); result.EHash = ii.URL.Split('/')[5]; result.Files = ii.Files.Split(' ')[0].ToInt(); if (ii.Title.StartsWith("(")) { result.Class = ii.Title.Split("(")[1].Split(")")[0]; } } else if (result.Published == null) result.Published = mindd.AddMinutes(datetimeEstimator.Predict(md.ID)); } else { /* [ { "URL": string, "Thumbnail": string, "Title": string, "Uploader": string, "Published": string, "Files": string, "Type": string, "Descripts": { "female": [ string ], "artist": [ string ], "parody": [ string ], "character": [ string ], "male": [ string ], "misc": [ string ], "language": [ string ], "group": [ string ] } } ] */ var ed = ehentaiArticles[onEH[id]]; var aritst = ed.Descripts != null ? ed.Descripts.ContainsKey("artist") ? ed.Descripts["artist"] : null : null; var female = ed.Descripts != null ? ed.Descripts.ContainsKey("female") ? ed.Descripts["female"] : null : null; var parody = ed.Descripts != null ? ed.Descripts.ContainsKey("parody") ? ed.Descripts["parody"] : null : null; var character = ed.Descripts != null ? ed.Descripts.ContainsKey("character") ? ed.Descripts["character"] : null : null; var male = ed.Descripts != null ? ed.Descripts.ContainsKey("male") ? ed.Descripts["male"] : null : null; var misc = ed.Descripts != null ? ed.Descripts.ContainsKey("misc") ? ed.Descripts["misc"] : null : null; var language = ed.Descripts != null ? ed.Descripts.ContainsKey("language") ? ed.Descripts["language"] : null : null; var group = ed.Descripts != null ? ed.Descripts.ContainsKey("group") ? ed.Descripts["group"] : null : null; var lang = "n/a"; if (language != null && language.Count != 0) { if (language.Where(x => x != "translated").ToList().Count == 0) Console.WriteLine(ed.URL); else lang = language.Where(x => x != "translated").ToList()[0]; } var tags = new List<string>(); if (female != null) { foreach (var tag in female) { var tt = tag; if (tt == "lolicon") tt = "loli"; else if (tt == "shotacon") tt = "shota"; tags.Add("female:" + tt); } } if (male != null) { foreach (var tag in male) { var tt = tag; if (tt == "lolicon") tt = "loli"; else if (tt == "shotacon") tt = "shota"; tags.Add("male:" + tt); } } if (misc != null) { foreach (var tag in misc) { var tt = tag; if (tt == "lolicon") tt = "loli"; else if (tt == "shotacon") tt = "shota"; tags.Add(tt); } } if (include_exhentai) { result = new HitomiColumnModel { Id = id, Artists = (aritst != null && aritst.Count > 0 && aritst[0] != "" ? "|" + string.Join("|", aritst) + "|" : "|N/A|"), Characters = (character != null && character.Count > 0 && character[0] != "" ? "|" + string.Join("|", character) + "|" : null), Groups = (group != null && group.Count > 0 && group[0] != "" ? "|" + string.Join("|", group) + "|" : null), Series = (parody != null && parody.Count > 0 && parody[0] != "" ? "|" + string.Join("|", parody) + "|" : null), Title = ed.Title, Tags = (tags.Count > 0 ? "|" + string.Join("|", tags) + "|" : null), Type = ed.Type, Language = lang, ExistOnHitomi = 0, Uploader = ed.Uploader, Published = DateTime.Parse(ed.Published), EHash = ed.URL.Split('/')[5], Files = ed.Files.Split(' ')[0].ToInt(), Class = ed.Title.StartsWith("(") ? ed.Title.Split("(")[1].Split(")")[0] : null, Thumbnail = ed.Thumbnail, }; } } return result; }); db.InsertAll(datas); db.Close(); if (skip_indexing) return; Action<Dictionary<string, int>, string> insert = (map, qr) => { if (qr == null) return; foreach (var tag in qr.Split('|')) { if (tag != "") { if (!map.ContainsKey(tag)) map.Add(tag, 0); map[tag] += 1; } } }; Action<Dictionary<string, int>, string> insertSingle = (map, qr) => { if (qr == null || qr == "") return; if (!map.ContainsKey(qr)) map.Add(qr, 0); map[qr] += 1; }; var index = new IndexData(); var result_artist = new Dictionary<string, Dictionary<int, int>>(); var result_group = new Dictionary<string, Dictionary<int, int>>(); var result_uploader = new Dictionary<string, Dictionary<int, int>>(); var result_series = new Dictionary<string, Dictionary<int, int>>(); var result_character = new Dictionary<string, Dictionary<int, int>>(); var result_characterseries = new Dictionary<string, Dictionary<string, int>>(); var result_seriescharacter = new Dictionary<string, Dictionary<string, int>>(); var result_charactercharacter = new Dictionary<string, Dictionary<string, int>>(); var result_seriesseries = new Dictionary<string, Dictionary<string, int>>(); var ff = new Dictionary<string, int>(); foreach (var article in datas) { if (article == null) continue; insert(index.tags, article.Tags); insert(index.artists, article.Artists); insert(index.groups, article.Groups); insert(index.series, article.Series); insert(index.characters, article.Characters); insertSingle(index.languages, article.Language); insertSingle(index.types, article.Type); insertSingle(index.uploaders, article.Uploader); insertSingle(index.classes, article.Class); if (article.Tags == null || article.Tags.Length == 0) continue; if (article.Artists != null) { foreach (var artist in article.Artists.Split('|')) { if (artist == "") continue; if (!result_artist.ContainsKey(artist)) result_artist.Add(artist, new Dictionary<int, int>()); foreach (var tag in article.Tags.Split('|')) { if (tag == "") continue; if (!ff.ContainsKey(tag)) ff.Add(tag, ff.Count); if (!result_artist[artist].ContainsKey(ff[tag])) result_artist[artist].Add(ff[tag], 0); result_artist[artist][ff[tag]] += 1; } } } if (article.Groups != null) { foreach (var artist in article.Groups.Split('|')) { if (artist == "") continue; if (!result_group.ContainsKey(artist)) result_group.Add(artist, new Dictionary<int, int>()); foreach (var tag in article.Tags.Split('|')) { if (tag == "") continue; if (!ff.ContainsKey(tag)) ff.Add(tag, ff.Count); if (!result_group[artist].ContainsKey(ff[tag])) result_group[artist].Add(ff[tag], 0); result_group[artist][ff[tag]] += 1; } } } if (article.Uploader != null) { foreach (var artist in article.Uploader.Split('|')) { if (artist == "") continue; if (!result_uploader.ContainsKey(artist)) result_uploader.Add(artist, new Dictionary<int, int>()); foreach (var tag in article.Tags.Split('|')) { if (tag == "") continue; if (!ff.ContainsKey(tag)) ff.Add(tag, ff.Count); if (!result_uploader[artist].ContainsKey(ff[tag])) result_uploader[artist].Add(ff[tag], 0); result_uploader[artist][ff[tag]] += 1; } } } if (article.Series != null) { foreach (var artist in article.Series.Split('|')) { if (artist == "") continue; if (!result_series.ContainsKey(artist)) result_series.Add(artist, new Dictionary<int, int>()); foreach (var tag in article.Tags.Split('|')) { if (tag == "") continue; if (!ff.ContainsKey(tag)) ff.Add(tag, ff.Count); if (!result_series[artist].ContainsKey(ff[tag])) result_series[artist].Add(ff[tag], 0); result_series[artist][ff[tag]] += 1; } } } if (article.Characters != null) { foreach (var artist in article.Characters.Split('|')) { if (artist == "") continue; if (!result_character.ContainsKey(artist)) result_character.Add(artist, new Dictionary<int, int>()); foreach (var tag in article.Tags.Split('|')) { if (tag == "") continue; if (!ff.ContainsKey(tag)) ff.Add(tag, ff.Count); if (!result_character[artist].ContainsKey(ff[tag])) result_character[artist].Add(ff[tag], 0); result_character[artist][ff[tag]] += 1; } } } if (article.Series != null && article.Characters != null) { foreach (var series in article.Series.Split('|')) { if (series == "") continue; if (!result_characterseries.ContainsKey(series)) result_characterseries.Add(series, new Dictionary<string, int>()); foreach (var character in article.Characters.Split('|')) { if (character == "") continue; if (!result_characterseries[series].ContainsKey(character)) result_characterseries[series].Add(character, 0); result_characterseries[series][character] += 1; } } foreach (var character in article.Characters.Split('|')) { if (character == "") continue; if (!result_seriescharacter.ContainsKey(character)) result_seriescharacter.Add(character, new Dictionary<string, int>()); foreach (var series in article.Series.Split('|')) { if (series == "") continue; if (!result_seriescharacter[character].ContainsKey(series)) result_seriescharacter[character].Add(series, 0); result_seriescharacter[character][series] += 1; } } foreach (var series in article.Series.Split('|')) { if (series == "") continue; if (!result_seriesseries.ContainsKey(series)) result_seriesseries.Add(series, new Dictionary<string, int>()); foreach (var series2 in article.Series.Split('|')) { if (series2 == "" || series == series2) continue; if (!result_seriesseries[series].ContainsKey(series2)) result_seriesseries[series].Add(series2, 0); result_seriesseries[series][series2] += 1; } } foreach (var character in article.Characters.Split('|')) { if (character == "") continue; if (!result_charactercharacter.ContainsKey(character)) result_charactercharacter.Add(character, new Dictionary<string, int>()); foreach (var character2 in article.Characters.Split('|')) { if (character2 == "" || character == character2) continue; if (!result_charactercharacter[character].ContainsKey(character2)) result_charactercharacter[character].Add(character2, 0); result_charactercharacter[character][character2] += 1; } } } } File.WriteAllText(filename + "/index.json", JsonConvert.SerializeObject(index)); File.WriteAllText(filename + "/tag-index.json", JsonConvert.SerializeObject(ff)); File.WriteAllText(filename + "/tag-artist.json", JsonConvert.SerializeObject(result_artist)); File.WriteAllText(filename + "/tag-group.json", JsonConvert.SerializeObject(result_group)); File.WriteAllText(filename + "/tag-uploader.json", JsonConvert.SerializeObject(result_uploader)); File.WriteAllText(filename + "/tag-series.json", JsonConvert.SerializeObject(result_series)); File.WriteAllText(filename + "/tag-character.json", JsonConvert.SerializeObject(result_character)); File.WriteAllText(filename + "/character-series.json", JsonConvert.SerializeObject(result_characterseries)); File.WriteAllText(filename + "/series-character.json", JsonConvert.SerializeObject(result_seriescharacter)); File.WriteAllText(filename + "/character-character.json", JsonConvert.SerializeObject(result_charactercharacter)); File.WriteAllText(filename + "/series-series.json", JsonConvert.SerializeObject(result_seriesseries)); }
static void ProcessStart() { Console.Clear(); Console.Title = "hsync"; Console.WriteLine($"hsync - DB Synchronization Manager"); Console.WriteLine($"Copyright (C) 2020. project violet-server."); Console.WriteLine($"Version: {Version.Text} (Build: {Internals.GetBuildDate().ToLongDateString()})"); Console.WriteLine(""); if (!File.Exists("hiddendata.json")) { Logs.Instance.Push("Welcome to hsync!\r\n\tDownload the necessary data before running the program!"); download_data("https://github.com/project-violet/database/releases/download/rd2020.06.07/hiddendata.json", "hiddendata.json"); } if (!File.Exists("metadata.json")) { download_data("https://github.com/project-violet/database/releases/download/rd2020.06.07/metadata.json", "metadata.json"); } if (!File.Exists("ex-hentai-archive.json")) { download_data("https://github.com/project-violet/database/releases/download/rd2020.06.07/ex-hentai-archive.json", "ex-hentai-archive.json"); } HitomiData.Instance.Load(); var latest = HitomiData.Instance.metadata_collection.First().ID; //#if true // Sync Hitomi { var range = 2000; var exists = new HashSet <int>(); foreach (var metadata in HitomiData.Instance.metadata_collection) { exists.Add(metadata.ID); } var gburls = Enumerable.Range(latest - range, range * 2).Where(x => !exists.Contains(x)).Select(x => $"https://ltn.hitomi.la/galleryblock/{x}.html").ToList(); var dcnt = 0; var ecnt = 0; Console.Write("Running galleryblock tester... "); List <string> htmls; using (var pb = new ProgressBar()) { htmls = NetTools.DownloadStrings(gburls, "", () => { pb.Report(gburls.Count, Interlocked.Increment(ref dcnt), ecnt); }, () => { pb.Report(gburls.Count, dcnt, Interlocked.Increment(ref ecnt)); }); } Console.WriteLine("Complete"); var gurls = new List <string>(gburls.Count); for (int i = 0; i < gburls.Count; i++) { if (htmls[i] == null) { continue; } var aa = HitomiParser.ParseGalleryBlock(htmls[i]); if (aa.Magic.Contains("-")) { gurls.Add("https://hitomi.la/" + aa.Magic); } else { gurls.Add("https://hitomi.la/galleries/" + i + ".html"); } } dcnt = 0; ecnt = 0; Console.Write("Running gallery tester... "); List <string> htmls2 = null; if (gurls.Count != 0) { using (var pb = new ProgressBar()) { htmls2 = NetTools.DownloadStrings(gurls, "", () => { pb.Report(gburls.Count, Interlocked.Increment(ref dcnt), ecnt); }, () => { pb.Report(gburls.Count, dcnt, Interlocked.Increment(ref ecnt)); }); } } Console.WriteLine("Complete"); Console.Write("Check redirect gallery html... "); var last_change = true; while (last_change) { last_change = false; for (int i = 0; i < htmls2.Count; i++) { if (htmls2[i] == null) { continue; } var node = htmls2[i].ToHtmlNode(); var title = node.SelectSingleNode("//title"); if (title != null && title.InnerText == "Redirect") { htmls2[i] = NetTools.DownloadString(node.SelectSingleNode("//a").GetAttributeValue("href", "")); last_change = true; } } } Console.WriteLine("Complete"); var result = new List <HitomiArticle>(); for (int i = 0, j = 0; i < gburls.Count; i++) { if (htmls[i] == null) { continue; } var aa = HitomiParser.ParseGalleryBlock(htmls[i]); if (htmls2[j] != null) { var ab = HitomiParser.ParseGallery(htmls2[j]); aa.Groups = ab.Groups; aa.Characters = ab.Characters; } result.Add(aa); j++; } Console.Write("Save to hiddendata.json... "); HitomiData.Instance.SaveWithNewData(result); Console.WriteLine("Complete"); //#if true // Console.Write("Save to index-metadata.json... "); // HitomiIndex.MakeIndex(); // Console.WriteLine("Complete"); //#endif } //#if false // Sync EH { var result = new List <EHentaiResultArticle>(); for (int i = 0; i < 9999999; i++) { try { //var task = NetTask.MakeDefault($"https://exhentai.org/?page={i}&f_doujinshi=on&f_manga=on&f_artistcg=on&f_gamecg=on&&f_cats=0&f_sname=on&f_stags=on&f_sh=on&advsearch=1&f_srdd=2&f_sname=on&f_stags=on&f_sdesc=on&f_sh=on"); //task.Cookie = "igneous=30e0c0a66;ipb_member_id=2742770;ipb_pass_hash=6042be35e994fed920ee7dd11180b65f;sl=dm_2"; //var html = NetTools.DownloadString(task); var url = $"https://exhentai.org/?page={i}&f_doujinshi=on&f_manga=on&f_artistcg=on&f_gamecg=on&&f_cats=0&f_sname=on&f_stags=on&f_sh=on&advsearch=1&f_srdd=2&f_sname=on&f_stags=on&f_sdesc=on&f_sh=on"; var wc = new WebClient(); wc.Encoding = Encoding.UTF8; wc.Headers.Add(HttpRequestHeader.Accept, "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"); wc.Headers.Add(HttpRequestHeader.UserAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"); wc.Headers.Add(HttpRequestHeader.Cookie, "igneous=30e0c0a66;ipb_member_id=2742770;ipb_pass_hash=6042be35e994fed920ee7dd11180b65f;sl=dm_2"); var html = wc.DownloadString(url); try { var exh = ExHentaiParser.ParseResultPageExtendedListView(html); result.AddRange(exh); if (exh.Count != 25) { Logs.Instance.PushWarning("[Miss] " + url); } if (i > 500 && exh.Min(x => x.URL.Split('/')[4].ToInt()) < latest) { break; } Logs.Instance.Push("Parse exh page - " + i); } catch (Exception e) { Logs.Instance.PushError("[Fail] " + url); } } catch (Exception e) { Logs.Instance.PushError($"{i} {e.Message}"); } Thread.Sleep(100); if (i % 1000 == 999) { Thread.Sleep(60000); } } var xxx = JsonConvert.DeserializeObject <List <EHentaiResultArticle> >(File.ReadAllText("ex-hentai-archive.json")); File.Move("ex-hentai-archive.json", $"ex-hentai-archive-{DateTime.Now.Ticks}.json"); var exists = new HashSet <int>(); xxx.ForEach(x => exists.Add(x.URL.Split('/')[4].ToInt())); foreach (var z in result) { var nn = z.URL.Split('/')[4].ToInt(); if (!exists.Contains(nn)) { xxx.Add(z); } } JsonSerializer serializer = new JsonSerializer(); serializer.Converters.Add(new JavaScriptDateTimeConverter()); serializer.NullValueHandling = NullValueHandling.Ignore; Logs.Instance.Push("Write file: ex-hentai-archive.json"); using (StreamWriter sw = new StreamWriter("ex-hentai-archive.json")) using (JsonWriter writer = new JsonTextWriter(sw)) { serializer.Serialize(writer, xxx); } } //#endif //#endif //#if false // Make DataBase { HitomiData.Instance.metadata_collection.Clear(); HitomiData.Instance.Load(); var xxx = JsonConvert.DeserializeObject <List <EHentaiResultArticle> >(File.ReadAllText("ex-hentai-archive.json")); Console.Write("Make database... "); var dict = new Dictionary <string, int>(); for (int i = 0; i < xxx.Count; i++) { if (!dict.ContainsKey(xxx[i].URL.Split('/')[4])) { dict.Add(xxx[i].URL.Split('/')[4], i); } } { var db = new SQLiteConnection("hitomidata.db"); var info = db.GetTableInfo(typeof(HitomiColumnModel).Name); if (!info.Any()) { db.CreateTable <HitomiColumnModel>(); } db.InsertAll(HitomiData.Instance.metadata_collection.Select(md => { var dd = new HitomiColumnModel { Id = md.ID, Artists = (md.Artists != null && md.Artists.Length > 0 && md.Artists[0] != "" ? "|" + string.Join("|", md.Artists) + "|" : "N/A|"), Characters = (md.Characters != null && md.Characters.Length > 0 && md.Characters[0] != "" ? "|" + string.Join("|", md.Characters) + "|" : null), Groups = (md.Groups != null && md.Groups.Length > 0 && md.Groups[0] != "" ? "|" + string.Join("|", md.Groups) + "|" : null), Series = (md.Parodies != null && md.Parodies.Length > 0 && md.Parodies[0] != "" ? "|" + string.Join("|", md.Parodies) + "|" : null), Title = md.Name, Tags = (md.Tags != null && md.Tags.Length > 0 && md.Tags[0] != "" ? "|" + string.Join("|", md.Tags) + "|" : null), Type = md.Type, Language = md.Language, }; if (dict.ContainsKey(md.ID.ToString())) { var ii = xxx[dict[md.ID.ToString()]]; dd.Uploader = ii.Uploader; dd.Published = DateTime.Parse(ii.Published); dd.EHash = ii.URL.Split('/')[5]; dd.Files = ii.Files.Split(' ')[0].ToInt(); if (ii.Title.StartsWith("(")) { dd.Class = ii.Title.Split("(")[1].Split(")")[0]; } } return(dd); })); db.Close(); } Console.WriteLine("Complete-All"); { var db = new SQLiteConnection("hitomidata-korean.db"); var info = db.GetTableInfo(typeof(HitomiColumnModel).Name); if (!info.Any()) { db.CreateTable <HitomiColumnModel>(); } db.InsertAll(HitomiData.Instance.metadata_collection.Where(md => md.Language == null || md.Language == "" || md.Language == "korean").Select(md => { var dd = new HitomiColumnModel { Id = md.ID, Artists = (md.Artists != null && md.Artists.Length > 0 && md.Artists[0] != "" ? "|" + string.Join("|", md.Artists) + "|" : "N/A|"), Characters = (md.Characters != null && md.Characters.Length > 0 && md.Characters[0] != "" ? "|" + string.Join("|", md.Characters) + "|" : null), Groups = (md.Groups != null && md.Groups.Length > 0 && md.Groups[0] != "" ? "|" + string.Join("|", md.Groups) + "|" : null), Series = (md.Parodies != null && md.Parodies.Length > 0 && md.Parodies[0] != "" ? "|" + string.Join("|", md.Parodies) + "|" : null), Title = md.Name, Tags = (md.Tags != null && md.Tags.Length > 0 && md.Tags[0] != "" ? "|" + string.Join("|", md.Tags) + "|" : null), Type = md.Type, Language = md.Language, }; if (dict.ContainsKey(md.ID.ToString())) { var ii = xxx[dict[md.ID.ToString()]]; dd.Uploader = ii.Uploader; dd.Published = DateTime.Parse(ii.Published); dd.EHash = ii.URL.Split('/')[5]; dd.Files = ii.Files.Split(' ')[0].ToInt(); if (ii.Title.StartsWith("(")) { dd.Class = ii.Title.Split("(")[1].Split(")")[0]; } } return(dd); })); db.Close(); } Console.WriteLine("Complete-Korean"); { var db = new SQLiteConnection("hitomidata-japanese.db"); var info = db.GetTableInfo(typeof(HitomiColumnModel).Name); if (!info.Any()) { db.CreateTable <HitomiColumnModel>(); } db.InsertAll(HitomiData.Instance.metadata_collection.Where(md => md.Language == null || md.Language == "" || md.Language == "japanese").Select(md => { var dd = new HitomiColumnModel { Id = md.ID, Artists = (md.Artists != null && md.Artists.Length > 0 && md.Artists[0] != "" ? "|" + string.Join("|", md.Artists) + "|" : "N/A|"), Characters = (md.Characters != null && md.Characters.Length > 0 && md.Characters[0] != "" ? "|" + string.Join("|", md.Characters) + "|" : null), Groups = (md.Groups != null && md.Groups.Length > 0 && md.Groups[0] != "" ? "|" + string.Join("|", md.Groups) + "|" : null), Series = (md.Parodies != null && md.Parodies.Length > 0 && md.Parodies[0] != "" ? "|" + string.Join("|", md.Parodies) + "|" : null), Title = md.Name, Tags = (md.Tags != null && md.Tags.Length > 0 && md.Tags[0] != "" ? "|" + string.Join("|", md.Tags) + "|" : null), Type = md.Type, Language = md.Language, }; if (dict.ContainsKey(md.ID.ToString())) { var ii = xxx[dict[md.ID.ToString()]]; dd.Uploader = ii.Uploader; dd.Published = DateTime.Parse(ii.Published); dd.EHash = ii.URL.Split('/')[5]; dd.Files = ii.Files.Split(' ')[0].ToInt(); if (ii.Title.StartsWith("(")) { dd.Class = ii.Title.Split("(")[1].Split(")")[0]; } } return(dd); })); db.Close(); } Console.WriteLine("Complete-Japanese"); { var db = new SQLiteConnection("hitomidata-english.db"); var info = db.GetTableInfo(typeof(HitomiColumnModel).Name); if (!info.Any()) { db.CreateTable <HitomiColumnModel>(); } db.InsertAll(HitomiData.Instance.metadata_collection.Where(md => md.Language == null || md.Language == "" || md.Language == "english").Select(md => { var dd = new HitomiColumnModel { Id = md.ID, Artists = (md.Artists != null && md.Artists.Length > 0 && md.Artists[0] != "" ? "|" + string.Join("|", md.Artists) + "|" : "N/A|"), Characters = (md.Characters != null && md.Characters.Length > 0 && md.Characters[0] != "" ? "|" + string.Join("|", md.Characters) + "|" : null), Groups = (md.Groups != null && md.Groups.Length > 0 && md.Groups[0] != "" ? "|" + string.Join("|", md.Groups) + "|" : null), Series = (md.Parodies != null && md.Parodies.Length > 0 && md.Parodies[0] != "" ? "|" + string.Join("|", md.Parodies) + "|" : null), Title = md.Name, Tags = (md.Tags != null && md.Tags.Length > 0 && md.Tags[0] != "" ? "|" + string.Join("|", md.Tags) + "|" : null), Type = md.Type, Language = md.Language, }; if (dict.ContainsKey(md.ID.ToString())) { var ii = xxx[dict[md.ID.ToString()]]; dd.Uploader = ii.Uploader; dd.Published = DateTime.Parse(ii.Published); dd.EHash = ii.URL.Split('/')[5]; dd.Files = ii.Files.Split(' ')[0].ToInt(); if (ii.Title.StartsWith("(")) { dd.Class = ii.Title.Split("(")[1].Split(")")[0]; } } return(dd); })); db.Close(); } Console.WriteLine("Complete-English"); { var db = new SQLiteConnection("hitomidata-chinese.db"); var info = db.GetTableInfo(typeof(HitomiColumnModel).Name); if (!info.Any()) { db.CreateTable <HitomiColumnModel>(); } db.InsertAll(HitomiData.Instance.metadata_collection.Where(md => md.Language == null || md.Language == "" || md.Language == "chinese").Select(md => { var dd = new HitomiColumnModel { Id = md.ID, Artists = (md.Artists != null && md.Artists.Length > 0 && md.Artists[0] != "" ? "|" + string.Join("|", md.Artists) + "|" : "N/A|"), Characters = (md.Characters != null && md.Characters.Length > 0 && md.Characters[0] != "" ? "|" + string.Join("|", md.Characters) + "|" : null), Groups = (md.Groups != null && md.Groups.Length > 0 && md.Groups[0] != "" ? "|" + string.Join("|", md.Groups) + "|" : null), Series = (md.Parodies != null && md.Parodies.Length > 0 && md.Parodies[0] != "" ? "|" + string.Join("|", md.Parodies) + "|" : null), Title = md.Name, Tags = (md.Tags != null && md.Tags.Length > 0 && md.Tags[0] != "" ? "|" + string.Join("|", md.Tags) + "|" : null), Type = md.Type, Language = md.Language, }; if (dict.ContainsKey(md.ID.ToString())) { var ii = xxx[dict[md.ID.ToString()]]; dd.Uploader = ii.Uploader; dd.Published = DateTime.Parse(ii.Published); dd.EHash = ii.URL.Split('/')[5]; dd.Files = ii.Files.Split(' ')[0].ToInt(); if (ii.Title.StartsWith("(")) { dd.Class = ii.Title.Split("(")[1].Split(")")[0]; } } return(dd); })); db.Close(); } Console.WriteLine("Complete-Chinese"); } //#endif }
private IEnumerable <HitomiColumnModel> getNewedHitomiColumnModels() { if (_newedCache != null) { return(_newedCache); } var articles = new HashSet <int>(); foreach (var n in newedDataEH) { articles.Add(n); } foreach (var n in newedDataHitomi) { articles.Add(n); } var x = articles.ToList(); x.RemoveAll(x => existsBoth.Contains(x)); x.RemoveAll(x => newedDataEH.Contains(x) && existsEH.Contains(x) && !existsHitomi.Contains(x) && !newedDataHitomi.Contains(x)); x.RemoveAll(x => newedDataHitomi.Contains(x) && existsHitomi.Contains(x) && !existsEH.Contains(x) && !newedDataEH.Contains(x)); x.Sort((x, y) => x.CompareTo(y)); var onHitomi = new Dictionary <int, int>(); for (int i = 0; i < hitomiArticles.Count; i++) { var id = 0; try { if (hitomiArticles[i].Magic.Contains("-")) { id = Convert.ToInt32(hitomiArticles[i].Magic.Split('-').Last().Split('.')[0]); } else if (hitomiArticles[i].Magic.Contains("galleries")) { id = Convert.ToInt32(hitomiArticles[i].Magic.Split('/').Last().Split('.')[0]); } else { id = Convert.ToInt32(hitomiArticles[i].Magic); } } catch { ; } if (onHitomi.ContainsKey(id)) { continue; } onHitomi.Add(id, i); } var onEH = new Dictionary <int, int>(); if (eHentaiResultArticles != null) { for (int i = 0; i < eHentaiResultArticles.Count; i++) { var id = int.Parse(eHentaiResultArticles[i].URL.Split('/')[4]); if (onEH.ContainsKey(id)) { continue; } onEH.Add(id, i); } } var exists = db.Query <HitomiColumnModel>($"SELECT * FROM HitomiColumnModel WHERE Id IN ({string.Join(",", x)})"); var onExists = new Dictionary <int, int>(); for (int i = 0; i < exists.Count; i++) { if (onExists.ContainsKey(exists[i].Id)) { continue; } onExists.Add(exists[i].Id, i); } // TODO: This code must be called only one! db.Execute($"DELETE FROM HitomiColumnModel WHERE Id IN ({string.Join(",", x)})"); var datas = x.Select(id => { HitomiColumnModel result = null; var oh = newedDataHitomi.Contains(id); var oe = newedDataEH.Contains(id); var ox = onExists.ContainsKey(id); var ehh = existsHitomi.Contains(id); var eeh = existsEH.Contains(id); if (oh) { var md = HitomiLegalize.ArticleToMetadata(hitomiArticles[onHitomi[id]]); result = new HitomiColumnModel { Id = id, Artists = (md.Artists != null && md.Artists.Length > 0 && md.Artists[0] != "" ? "|" + string.Join("|", md.Artists) + "|" : "|N/A|"), Characters = (md.Characters != null && md.Characters.Length > 0 && md.Characters[0] != "" ? "|" + string.Join("|", md.Characters) + "|" : null), Groups = (md.Groups != null && md.Groups.Length > 0 && md.Groups[0] != "" ? "|" + string.Join("|", md.Groups) + "|" : null), Series = (md.Parodies != null && md.Parodies.Length > 0 && md.Parodies[0] != "" ? "|" + string.Join("|", md.Parodies) + "|" : null), Title = md.Name, Tags = (md.Tags != null && md.Tags.Length > 0 && md.Tags[0] != "" ? "|" + string.Join("|", md.Tags) + "|" : null), Type = md.Type, Language = (md.Language != null && md.Language.Length != 0) ? md.Language : "n/a", Published = md.DateTime, ExistOnHitomi = 1, }; if (oe) { var ii = eHentaiResultArticles[onEH[md.ID]]; result.Uploader = ii.Uploader; result.Published = DateTime.Parse(ii.Published); result.EHash = ii.URL.Split('/')[5]; result.Files = ii.Files.Split(' ')[0].ToInt(); if (ii.Title.StartsWith("(")) { result.Class = ii.Title.Split("(")[1].Split(")")[0]; } } else if (eeh) { var ii = exists[onExists[id]]; result.EHash = ii.EHash; result.Uploader = ii.Uploader; result.Published = ii.Published; result.Class = ii.Class; } //else if (result.Published == null) // result.Published = mindd.AddMinutes(datetimeEstimator.Predict(md.ID)); } else { /* * [ * { * "URL": string, * "Thumbnail": string, * "Title": string, * "Uploader": string, * "Published": string, * "Files": string, * "Type": string, * "Descripts": { * "female": [ string ], * "artist": [ string ], * "parody": [ string ], * "character": [ string ], * "male": [ string ], * "misc": [ string ], * "language": [ string ], * "group": [ string ] * } * } * ] */ var ed = eHentaiResultArticles[onEH[id]]; var aritst = ed.Descripts != null ? ed.Descripts.ContainsKey("artist") ? ed.Descripts["artist"] : null : null; var female = ed.Descripts != null ? ed.Descripts.ContainsKey("female") ? ed.Descripts["female"] : null : null; var parody = ed.Descripts != null ? ed.Descripts.ContainsKey("parody") ? ed.Descripts["parody"] : null : null; var character = ed.Descripts != null ? ed.Descripts.ContainsKey("character") ? ed.Descripts["character"] : null : null; var male = ed.Descripts != null ? ed.Descripts.ContainsKey("male") ? ed.Descripts["male"] : null : null; var misc = ed.Descripts != null ? ed.Descripts.ContainsKey("misc") ? ed.Descripts["misc"] : null : null; var language = ed.Descripts != null ? ed.Descripts.ContainsKey("language") ? ed.Descripts["language"] : null : null; var group = ed.Descripts != null ? ed.Descripts.ContainsKey("group") ? ed.Descripts["group"] : null : null; var lang = "n/a"; if (language != null && language.Count != 0) { if (language.Where(x => x != "translated").ToList().Count == 0) { Console.WriteLine(ed.URL); } else { lang = language.Where(x => x != "translated").ToList()[0]; } } var tags = new List <string>(); if (female != null) { foreach (var tag in female) { var tt = tag; if (tt == "lolicon") { tt = "loli"; } else if (tt == "shotacon") { tt = "shota"; } tags.Add("female:" + tt); } } if (male != null) { foreach (var tag in male) { var tt = tag; if (tt == "lolicon") { tt = "loli"; } else if (tt == "shotacon") { tt = "shota"; } tags.Add("male:" + tt); } } if (misc != null) { foreach (var tag in misc) { var tt = tag; if (tt == "lolicon") { tt = "loli"; } else if (tt == "shotacon") { tt = "shota"; } tags.Add(tt); } } result = new HitomiColumnModel { Id = id, Artists = (aritst != null && aritst.Count > 0 && aritst[0] != "" ? "|" + string.Join("|", aritst) + "|" : "|N/A|"), Characters = (character != null && character.Count > 0 && character[0] != "" ? "|" + string.Join("|", character) + "|" : null), Groups = (group != null && group.Count > 0 && group[0] != "" ? "|" + string.Join("|", group) + "|" : null), Series = (parody != null && parody.Count > 0 && parody[0] != "" ? "|" + string.Join("|", parody) + "|" : null), Title = ed.Title, Tags = (tags.Count > 0 ? "|" + string.Join("|", tags) + "|" : null), Type = ed.Type, Language = lang, ExistOnHitomi = ehh ? 1 : 0, Uploader = ed.Uploader, Published = DateTime.Parse(ed.Published), EHash = ed.URL.Split('/')[5], Files = ed.Files.Split(' ')[0].ToInt(), Class = ed.Title.StartsWith("(") ? ed.Title.Split("(")[1].Split(")")[0] : null, Thumbnail = ed.Thumbnail, }; } return(result); }); return(_newedCache = datas); }