public static void FilteringArticle() { Directory.CreateDirectory(Path.Combine(Program.ApplicationPath, "maytrash")); var w = ExtractManager.InhaUnivDB; var files = Directory.GetFiles(Path.Combine(Program.ApplicationPath, "tmp")).ToList(); files.Sort((x, y) => Convert.ToInt32(Path.GetFileNameWithoutExtension(x)).CompareTo(Convert.ToInt32(Path.GetFileNameWithoutExtension(y)))); foreach (var file in files) { try { var html = File.ReadAllText(file); var cc = InhaUnivExtractor.Parse(html); Console.WriteLine($"{cc.Classify}, {cc.DateTime}, {cc.Title}"); cc.Link = $"https://www.inha.ac.kr/bbs/kr/8/{Path.GetFileNameWithoutExtension(file)}/artclView.do"; w.Add(cc); } catch { Console.WriteLine("[Fail] " + file); File.Move(file, Path.Combine(Program.ApplicationPath, "maytrash", Path.GetFileName(file))); } } }
public static async Task LoopInternal() { // Inha Univ Article { var range = Enumerable.Range(Convert.ToInt32(ExtractManager.InhaUnivArticles.Last().Link.Split('/')[6]) + 1, 5).ToList(); var htmls = NetTools.DownloadStrings(range.Select(x => $"https://www.inha.ac.kr/bbs/kr/8/{x}/artclView.do").ToList()); for (int i = 0; i < htmls.Count; i++) { try { var cc = InhaUnivExtractor.Parse(htmls[i]); cc.Link = $"https://www.inha.ac.kr/bbs/kr/8/{range[i]}/artclView.do"; ExtractManager.InhaUnivArticles.Add(cc); ExtractManager.InhaUnivDB.Add(cc); Log.Logs.Instance.Push($"[Loop] New item is added. - IUA - {cc.Title}"); if (EnableServer) { await BotManager.Instance.Notice(cc.ToString(), "MSG-MAIN"); } } catch { } } } // Department Notices { // Lazy downloading foreach (var department in DepartmentList.Lists) { try { if (department.Item3 == "") { continue; } var task = NetTask.MakeDefault(department.Item3); if (department.Item2 == "s5") { task.Encoding = Encoding.GetEncoding(51949); } var html = NetTools.DownloadString(task); List <DepartmentDBModel> cc = null; if (department.Item2 == "s1") { cc = DepartmentExtractor.ExtractStyle1(html, department.Item1); } else if (department.Item2 == "s2") { cc = DepartmentExtractor.ExtractStyle2(html, department.Item1); } else if (department.Item2 == "s3") { cc = DepartmentExtractor.ExtractStyle3(html, department.Item1); } else if (department.Item2 == "s4") { cc = DepartmentExtractor.ExtractStyle4(html, department.Item1); } else if (department.Item2 == "s5") { cc = DepartmentExtractor.ExtractStyle5(html, department.Item1); } // get cse latest var mm = new HashSet <int>(); ExtractManager.DepartmentArticles.Where(x => x.Department == department.Item1).ToList().ForEach(x => mm.Add(Convert.ToInt32(x.Number))); int starts = 0; for (starts = cc.Count - 1; starts >= 0; starts--) { if (mm.Contains(Convert.ToInt32(cc[starts].Number))) { break; } } for (int i = starts + 1; i < cc.Count; i++) { ExtractManager.DepartmentArticles.Add(cc[i]); ExtractManager.DepartmentDB.Add(cc[i]); Log.Logs.Instance.Push($"[Loop] New item is added. - DN - {cc[i].Title}"); if (EnableServer) { await BotManager.Instance.Notice(cc[i].ToString(), "MSG-" + department.Item1); } } } catch (Exception e) { Log.Logs.Instance.PushError("[Loop] '" + department.Item1 + "' " + e.Message + "\r\n" + e.StackTrace); } } } Log.Logs.Instance.Push("[Loop] Cycle " + Count.ToString()); Count++; }