Example #1
0
        public void ArchivePrevMonth()
        {
            var dt = DateTime.Now.AddMonths(-1);

            DateTime startDate = new DateTime(dt.Year, dt.Month, 1);

            while (startDate < DateTime.Now)
            {
                DateTime endDate = startDate.AddMonths(1).AddSeconds(-1);

                DBHelpers.GetPages(ProcessPage, null, startDate, endDate);

                startDate = startDate.AddMonths(1);

                break;
            }
        }
Example #2
0
        public void Run()
        {
            //IndexPrevMonth();
            //ArchivePrevMonth();
            //return;

            //index files
            //IndexFiles();

            //index current period
            ThreadPool.QueueUserWorkItem(x =>
            {
                var startDate = new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1);

                DBHelpers.GetPages(IndexFile, null, startDate, startDate.AddMonths(1));
            });

            Thread.Sleep(500);

            this.IndexEqueue();

            try
            {
                //run spiders
                foreach (Site site in BH.BoobenRobot.Site.Sites)
                {
                    //site.LoadLabels();

                    ThreadPool.QueueUserWorkItem(new WaitCallback(site.Run));
                }

                //refresh interface
                UpdateStat();

                ThreadPool.QueueUserWorkItem(new WaitCallback(this.UpdateStat));
            }
            catch (Exception ex)
            {
                _logError(ex.Message);
            }
        }
Example #3
0
        public void CheckLabelAndAddPage(List <Page> pages, string url, string label, string dashboardID = null)
        {
            if (!DBHelpers.HasLabel(url, label))
            {
                List <string> docNumber = GetDocNumberByUrl(url);

                if (docNumber != null && docNumber.Count > 0)
                {
                    int pageNumber = 1;

                    while (true)
                    {
                        string filePath = GetFilePath(Code, dashboardID, docNumber[0], pageNumber + 1);

                        if (!File.Exists(filePath) || !DBHelpers.HasPage(url))
                        {
                            break;
                        }
                        else
                        {
                            pageNumber++;
                        }
                    }

                    pages.Add(new Page()
                    {
                        DashboardURL = url,
                        DashboardID  = dashboardID,
                        URL          = GetUrlByDocNumber(docNumber[0], pageNumber, dashboardID),
                        DocNumber    = docNumber[0],
                        PageNumber   = pageNumber
                    });

                    DBHelpers.SaveLabel(this.Code, url, label);
                }
            }
        }
Example #4
0
        public void Run(object data)
        {
            while (true)
            {
                //this.LastErrorMessage = null;

                List <Page> pages = new List <Page>();

                try
                {
                    //dashboards
                    List <Page> dashboards = GetDashboards();

                    //WriteLog("Download dashboards", dashboards.Count.ToString());

                    foreach (Page page in dashboards)
                    {
                        this.CurrentPage = page;
                        this.Progress    = "Dashboard: " + (dashboards.IndexOf(page) + 1).ToString() + " of " + dashboards.Count;

                        try
                        {
                            this.DownloadWebPage(page);
                        }
                        catch (Exception ex)
                        {
                            this.LastErrorMessage = ex.Message;

                            Thread.Sleep(this.PageDelay);

                            continue;
                        }

                        this.TotalSize += page.HtmlContent.Length;
                        this.AmountPages++;

                        pages.AddRange(this.OnDashboardLoaded(page));

                        Thread.Sleep(this.PageDelay);
                    }

                    //WriteLog("Process pages", pages.Count.ToString());
                }
                catch (Exception ex)
                {
                    if (this.CurrentPage != null)
                    {
                        //clear labels
                        for (int i = 0; i < pages.Count; i++)
                        {
                            //lock (Labels)
                            //{
                            //    Labels[pages[i].DashboardURL] = string.Empty;
                            //}

                            DBHelpers.SaveLabel(this.Code, pages[i].DashboardURL, string.Empty);
                        }

                        this.LastErrorMessage = string.Format("Failed page: {0};{1}",
                                                              this.GetUrlByDocNumber(this.CurrentPage.DocNumber,
                                                                                     this.CurrentPage.PageNumber,
                                                                                     this.CurrentPage.DashboardID),
                                                              ex.Message);

                        //MessageBox.Show(ex.Message + this.CurrentPage.ToString() + ex.StackTrace);
                    }
                    else
                    {
                        this.LastErrorMessage = ex.Message;
                        //MessageBox.Show(ex.Message + ex.StackTrace);
                    }

                    this.WaitNextUpdate(true);

                    continue;
                }

                try
                {
                    //pages
                    for (int i = 0; i < pages.Count; i++)
                    {
                        Page page = pages[i];

                        this.CurrentPage = page;
                        this.Progress    = "Article: " + (i + 1).ToString() + " of " + pages.Count;

                        this.ProcessPage(pages[0], page);

                        this.TotalSize += page.HtmlContent.Length;
                        this.AmountPages++;

                        while (page.NeedLoadNextPage)
                        {
                            page.PageNumber++;

                            if (page.PageNumber > 5000)
                            {
                                break;
                            }

                            page.URL = GetUrlByDocNumber(page.DocNumber, page.PageNumber, page.DashboardID);

                            this.CurrentPage = page;

                            this.ProcessPage(pages[0], page);

                            if (page.URL != page.RedirectURL) //avoid cycles
                            {
                                break;
                            }

                            this.TotalSize += page.HtmlContent.Length;
                            this.AmountPages++;

                            Thread.Sleep(this.PageDelay);
                        }

                        Thread.Sleep(this.PageDelay);
                    }

                    //WriteLog("Site " + Code + " sleep to ", DateTime.Now.Add(this.SiteDelay).ToString());

                    //this.SaveLabels();

                    this.WaitNextUpdate(false);
                }
                catch (Exception ex)
                {
                    //WriteLog("Exception in site " + Code, ex.Message + ex.StackTrace);
                    //WriteLog("Site " + Code + " sleep to ", DateTime.Now.Add(this.SiteDelay).ToString());

                    if (this.CurrentPage != null)
                    {
                        //clear labels
                        for (int i = pages.IndexOf(this.CurrentPage); i < pages.Count; i++)
                        {
                            //lock (Labels)
                            //{
                            //    Labels[pages[i].DashboardURL] = string.Empty;
                            //}

                            DBHelpers.SaveLabel(this.Code, pages[i].DashboardURL, string.Empty);
                        }

                        this.LastErrorMessage = string.Format("Failed page: {0};{1}",
                                                              this.GetUrlByDocNumber(this.CurrentPage.DocNumber,
                                                                                     this.CurrentPage.PageNumber,
                                                                                     this.CurrentPage.DashboardID),
                                                              ex.Message);

                        //MessageBox.Show(ex.Message + this.CurrentPage.ToString() + ex.StackTrace);
                    }
                    else
                    {
                        this.LastErrorMessage = ex.Message;

                        //MessageBox.Show(ex.Message + ex.StackTrace);
                    }

                    this.WaitNextUpdate(true);
                }
            }
        }
Example #5
0
        private void IndexPrevMonth()
        {
            //index previous periods
            DateTime baseDate = new DateTime(2016, 11, 1); //DateTime.Now.AddMonths(-1); //baseDate.AddMonths(16);

            DateTime startDate = new DateTime(baseDate.Year, baseDate.Month, 1);
            DateTime endDate   = new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1); //startDate.AddMonths(1);

            ThreadPool.QueueUserWorkItem(x =>
            {
                while (true)
                {
                    string zipPath;

                    for (int archiveIndex = 1; ; archiveIndex++)
                    {
                        if (archiveIndex == 1)
                        {
                            zipPath = Site.FTRobot_PATH + startDate.Year.ToString("0000") + startDate.Month.ToString("00") + ".zip";
                        }
                        else
                        {
                            zipPath = Site.FTRobot_PATH + startDate.Year.ToString("0000") + startDate.Month.ToString("00") + "_" + archiveIndex.ToString() + ".zip";
                        }

                        if (File.Exists(zipPath))
                        {
                            using (FileStream zipToOpen = new FileStream(zipPath, FileMode.Open))
                            {
                                using (ZipArchive archive = new ZipArchive(zipToOpen, ZipArchiveMode.Read))
                                {
                                    foreach (var ent in archive.Entries)
                                    {
                                        IndexEntry(ent, 0);
                                        DBHelpers.SavePage("", ent.FullName);
                                    }

                                    //DBHelpers.GetPages(IndexFile, archive, startDate, startDate.AddMonths(1));
                                }
                            }

                            break;
                        }
                        else
                        {
                            break;
                        }
                    }

                    startDate = startDate.AddMonths(1);

                    if (startDate >= endDate)
                    {
                        break;
                    }
                }
            });

            Thread.Sleep(100);

            IndexEqueue();

            ////index current period
            //startDate = new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1);

            //DBHelpers.GetPages(IndexFile, null, startDate, startDate.AddMonths(1));

            //save index
            _service.SaveIndex();
        }
Example #6
0
        private void IndexFiles()
        {
            var info = _service.GetInfo();

            if (info.LastNameIDRAM <= 80) //empty instance
            {
                /*
                 * string logPath = BH.BoobenRobot.Site.FTRobot_PATH + "Log.txt";
                 *
                 * if (File.Exists(logPath))
                 * {
                 *  StreamReader sr = new StreamReader(logPath);
                 *
                 *  //read files
                 *  List<string> files = new List<string>();
                 *
                 *  while (!sr.EndOfStream)
                 *  {
                 *      string file = sr.ReadLine();
                 *      files.Add(file);
                 *  }
                 *
                 *  sr.Close();
                 *
                 *  //distinct files
                 *  List<string> disFiles = new List<string>();
                 *
                 *  Dictionary<string, bool> dic = new Dictionary<string, bool>();
                 *
                 *  for (int i = files.Count - 1; i >= 0; i--)
                 *  {
                 *      string file = files[i];
                 *
                 *      if (!dic.ContainsKey(file))
                 *      {
                 *          disFiles.Insert(0, file);
                 *
                 *          dic.Add(file, true);
                 *      }
                 *  }
                 *
                 *  StringBuilder log = new StringBuilder();
                 *
                 *  //index files
                 *  int count = 1;
                 *  foreach (string file1 in disFiles)
                 *  {
                 *      string file = file1.Replace("ixbt-", "ixbt_");
                 *
                 *      if (File.Exists(file))
                 *      {
                 *          string aliasName = Path.GetFileName(file).Replace(".txt", "");
                 *          string contentText = File.ReadAllText(file, BH.BoobenRobot.Site.ASC2Encoding);
                 *
                 *          //string[] parts = contentText.Split(new char[] { '.', '?', '!' });
                 *          //foreach (string part in parts)
                 *          //{
                 *          //    if (part.Trim().Length > 3 && part.Trim().Length < 65535)
                 *          //    {
                 *
                 *          if (contentText.Length > 65000)
                 *          {
                 *              contentText = contentText.Substring(0, 65000);
                 *          }
                 *
                 *          client.IndexText(aliasName, contentText);
                 *
                 *          //    }
                 *          //}
                 *
                 *          if (count % 100 == 0)
                 *          {
                 *              tbLog.Text = "Index files: " + count.ToString() + " of " + disFiles.Count.ToString();
                 *              tbLog.Update();
                 *
                 *              Application.DoEvents();
                 *          }
                 *
                 *          count++;
                 *
                 *          //save new log
                 *          log.AppendLine(file);
                 *      }
                 *  }
                 *
                 *  //save log
                 *  File.WriteAllText(logPath, log.ToString());
                 * }
                 */

                //index previous periods
                DateTime startDate;

                ThreadPool.QueueUserWorkItem(x =>
                {
                    startDate = new DateTime(2015, 6, 1);

                    while (true)
                    {
                        string zipPath;

                        for (int archiveIndex = 1; ; archiveIndex++)
                        {
                            if (archiveIndex == 1)
                            {
                                zipPath = Site.FTRobot_PATH + startDate.Year.ToString("0000") + startDate.Month.ToString("00") + ".zip";
                            }
                            else
                            {
                                zipPath = Site.FTRobot_PATH + startDate.Year.ToString("0000") + startDate.Month.ToString("00") + "_" + archiveIndex.ToString() + ".zip";
                            }

                            if (File.Exists(zipPath))
                            {
                                using (FileStream zipToOpen = new FileStream(zipPath, FileMode.Open))
                                {
                                    using (ZipArchive archive = new ZipArchive(zipToOpen, ZipArchiveMode.Read))
                                    {
                                        DBHelpers.GetPages(IndexFile, archive, startDate, startDate.AddMonths(1));
                                    }
                                }
                            }
                            else
                            {
                                break;
                            }
                        }

                        if (startDate < new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1))
                        {
                            startDate = startDate.AddMonths(1);
                        }
                        else
                        {
                            break;
                        }
                    }
                });

                Thread.Sleep(100);

                IndexEqueue();

                ////index current period
                //startDate = new DateTime(DateTime.Now.Year, DateTime.Now.Month, 1);

                //DBHelpers.GetPages(IndexFile, null, startDate, startDate.AddMonths(1));

                //save index
                //client.SaveIndex();
            }
        }