public void ArchivePrevMonth() { var dt = DateTime.Now.AddMonths(-1); DateTime startDate = new DateTime(dt.Year, dt.Month, 1); while (startDate < DateTime.Now) { DateTime endDate = startDate.AddMonths(1).AddSeconds(-1); DBHelpers.GetPages(ProcessPage, null, startDate, endDate); startDate = startDate.AddMonths(1); break; } }
public void Run() { //IndexPrevMonth(); //ArchivePrevMonth(); //return; //index files //IndexFiles(); //index current period ThreadPool.QueueUserWorkItem(x => { var startDate = new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1); DBHelpers.GetPages(IndexFile, null, startDate, startDate.AddMonths(1)); }); Thread.Sleep(500); this.IndexEqueue(); try { //run spiders foreach (Site site in BH.BoobenRobot.Site.Sites) { //site.LoadLabels(); ThreadPool.QueueUserWorkItem(new WaitCallback(site.Run)); } //refresh interface UpdateStat(); ThreadPool.QueueUserWorkItem(new WaitCallback(this.UpdateStat)); } catch (Exception ex) { _logError(ex.Message); } }
public void CheckLabelAndAddPage(List <Page> pages, string url, string label, string dashboardID = null) { if (!DBHelpers.HasLabel(url, label)) { List <string> docNumber = GetDocNumberByUrl(url); if (docNumber != null && docNumber.Count > 0) { int pageNumber = 1; while (true) { string filePath = GetFilePath(Code, dashboardID, docNumber[0], pageNumber + 1); if (!File.Exists(filePath) || !DBHelpers.HasPage(url)) { break; } else { pageNumber++; } } pages.Add(new Page() { DashboardURL = url, DashboardID = dashboardID, URL = GetUrlByDocNumber(docNumber[0], pageNumber, dashboardID), DocNumber = docNumber[0], PageNumber = pageNumber }); DBHelpers.SaveLabel(this.Code, url, label); } } }
public void Run(object data) { while (true) { //this.LastErrorMessage = null; List <Page> pages = new List <Page>(); try { //dashboards List <Page> dashboards = GetDashboards(); //WriteLog("Download dashboards", dashboards.Count.ToString()); foreach (Page page in dashboards) { this.CurrentPage = page; this.Progress = "Dashboard: " + (dashboards.IndexOf(page) + 1).ToString() + " of " + dashboards.Count; try { this.DownloadWebPage(page); } catch (Exception ex) { this.LastErrorMessage = ex.Message; Thread.Sleep(this.PageDelay); continue; } this.TotalSize += page.HtmlContent.Length; this.AmountPages++; pages.AddRange(this.OnDashboardLoaded(page)); Thread.Sleep(this.PageDelay); } //WriteLog("Process pages", pages.Count.ToString()); } catch (Exception ex) { if (this.CurrentPage != null) { //clear labels for (int i = 0; i < pages.Count; i++) { //lock (Labels) //{ // Labels[pages[i].DashboardURL] = string.Empty; //} DBHelpers.SaveLabel(this.Code, pages[i].DashboardURL, string.Empty); } this.LastErrorMessage = string.Format("Failed page: {0};{1}", this.GetUrlByDocNumber(this.CurrentPage.DocNumber, this.CurrentPage.PageNumber, this.CurrentPage.DashboardID), ex.Message); //MessageBox.Show(ex.Message + this.CurrentPage.ToString() + ex.StackTrace); } else { this.LastErrorMessage = ex.Message; //MessageBox.Show(ex.Message + ex.StackTrace); } this.WaitNextUpdate(true); continue; } try { //pages for (int i = 0; i < pages.Count; i++) { Page page = pages[i]; this.CurrentPage = page; this.Progress = "Article: " + (i + 1).ToString() + " of " + pages.Count; this.ProcessPage(pages[0], page); this.TotalSize += page.HtmlContent.Length; this.AmountPages++; while (page.NeedLoadNextPage) { page.PageNumber++; if (page.PageNumber > 5000) { break; } page.URL = GetUrlByDocNumber(page.DocNumber, page.PageNumber, page.DashboardID); this.CurrentPage = page; this.ProcessPage(pages[0], page); if (page.URL != page.RedirectURL) //avoid cycles { break; } this.TotalSize += page.HtmlContent.Length; this.AmountPages++; Thread.Sleep(this.PageDelay); } Thread.Sleep(this.PageDelay); } //WriteLog("Site " + Code + " sleep to ", DateTime.Now.Add(this.SiteDelay).ToString()); //this.SaveLabels(); this.WaitNextUpdate(false); } catch (Exception ex) { //WriteLog("Exception in site " + Code, ex.Message + ex.StackTrace); //WriteLog("Site " + Code + " sleep to ", DateTime.Now.Add(this.SiteDelay).ToString()); if (this.CurrentPage != null) { //clear labels for (int i = pages.IndexOf(this.CurrentPage); i < pages.Count; i++) { //lock (Labels) //{ // Labels[pages[i].DashboardURL] = string.Empty; //} DBHelpers.SaveLabel(this.Code, pages[i].DashboardURL, string.Empty); } this.LastErrorMessage = string.Format("Failed page: {0};{1}", this.GetUrlByDocNumber(this.CurrentPage.DocNumber, this.CurrentPage.PageNumber, this.CurrentPage.DashboardID), ex.Message); //MessageBox.Show(ex.Message + this.CurrentPage.ToString() + ex.StackTrace); } else { this.LastErrorMessage = ex.Message; //MessageBox.Show(ex.Message + ex.StackTrace); } this.WaitNextUpdate(true); } } }
private void IndexPrevMonth() { //index previous periods DateTime baseDate = new DateTime(2016, 11, 1); //DateTime.Now.AddMonths(-1); //baseDate.AddMonths(16); DateTime startDate = new DateTime(baseDate.Year, baseDate.Month, 1); DateTime endDate = new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1); //startDate.AddMonths(1); ThreadPool.QueueUserWorkItem(x => { while (true) { string zipPath; for (int archiveIndex = 1; ; archiveIndex++) { if (archiveIndex == 1) { zipPath = Site.FTRobot_PATH + startDate.Year.ToString("0000") + startDate.Month.ToString("00") + ".zip"; } else { zipPath = Site.FTRobot_PATH + startDate.Year.ToString("0000") + startDate.Month.ToString("00") + "_" + archiveIndex.ToString() + ".zip"; } if (File.Exists(zipPath)) { using (FileStream zipToOpen = new FileStream(zipPath, FileMode.Open)) { using (ZipArchive archive = new ZipArchive(zipToOpen, ZipArchiveMode.Read)) { foreach (var ent in archive.Entries) { IndexEntry(ent, 0); DBHelpers.SavePage("", ent.FullName); } //DBHelpers.GetPages(IndexFile, archive, startDate, startDate.AddMonths(1)); } } break; } else { break; } } startDate = startDate.AddMonths(1); if (startDate >= endDate) { break; } } }); Thread.Sleep(100); IndexEqueue(); ////index current period //startDate = new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1); //DBHelpers.GetPages(IndexFile, null, startDate, startDate.AddMonths(1)); //save index _service.SaveIndex(); }
private void IndexFiles() { var info = _service.GetInfo(); if (info.LastNameIDRAM <= 80) //empty instance { /* * string logPath = BH.BoobenRobot.Site.FTRobot_PATH + "Log.txt"; * * if (File.Exists(logPath)) * { * StreamReader sr = new StreamReader(logPath); * * //read files * List<string> files = new List<string>(); * * while (!sr.EndOfStream) * { * string file = sr.ReadLine(); * files.Add(file); * } * * sr.Close(); * * //distinct files * List<string> disFiles = new List<string>(); * * Dictionary<string, bool> dic = new Dictionary<string, bool>(); * * for (int i = files.Count - 1; i >= 0; i--) * { * string file = files[i]; * * if (!dic.ContainsKey(file)) * { * disFiles.Insert(0, file); * * dic.Add(file, true); * } * } * * StringBuilder log = new StringBuilder(); * * //index files * int count = 1; * foreach (string file1 in disFiles) * { * string file = file1.Replace("ixbt-", "ixbt_"); * * if (File.Exists(file)) * { * string aliasName = Path.GetFileName(file).Replace(".txt", ""); * string contentText = File.ReadAllText(file, BH.BoobenRobot.Site.ASC2Encoding); * * //string[] parts = contentText.Split(new char[] { '.', '?', '!' }); * //foreach (string part in parts) * //{ * // if (part.Trim().Length > 3 && part.Trim().Length < 65535) * // { * * if (contentText.Length > 65000) * { * contentText = contentText.Substring(0, 65000); * } * * client.IndexText(aliasName, contentText); * * // } * //} * * if (count % 100 == 0) * { * tbLog.Text = "Index files: " + count.ToString() + " of " + disFiles.Count.ToString(); * tbLog.Update(); * * Application.DoEvents(); * } * * count++; * * //save new log * log.AppendLine(file); * } * } * * //save log * File.WriteAllText(logPath, log.ToString()); * } */ //index previous periods DateTime startDate; ThreadPool.QueueUserWorkItem(x => { startDate = new DateTime(2015, 6, 1); while (true) { string zipPath; for (int archiveIndex = 1; ; archiveIndex++) { if (archiveIndex == 1) { zipPath = Site.FTRobot_PATH + startDate.Year.ToString("0000") + startDate.Month.ToString("00") + ".zip"; } else { zipPath = Site.FTRobot_PATH + startDate.Year.ToString("0000") + startDate.Month.ToString("00") + "_" + archiveIndex.ToString() + ".zip"; } if (File.Exists(zipPath)) { using (FileStream zipToOpen = new FileStream(zipPath, FileMode.Open)) { using (ZipArchive archive = new ZipArchive(zipToOpen, ZipArchiveMode.Read)) { DBHelpers.GetPages(IndexFile, archive, startDate, startDate.AddMonths(1)); } } } else { break; } } if (startDate < new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1)) { startDate = startDate.AddMonths(1); } else { break; } } }); Thread.Sleep(100); IndexEqueue(); ////index current period //startDate = new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1); //DBHelpers.GetPages(IndexFile, null, startDate, startDate.AddMonths(1)); //save index //client.SaveIndex(); } }