Exemple #1
0
        public CrawlServiceTests()
        {
            HtmlProvider  = new Mock <IHTMLProvider>();
            LinkExtractor = new Mock <ILinkExtractor>();

            target = new CrawlService(HtmlProvider.Object, LinkExtractor.Object);
        }
        private static void Execute()
        {
            Console.WriteLine("Starting Thread Execution");
            SearchTask task;
            _queue.TryDequeue(out task);

            if (task == null)
                return;
            using (var service = new CrawlService("site"))
            {
                while (task != null)
                {
                    task.Process(service);
                    Console.WriteLine("Document Indexed " + task.Name);
                    _queue.TryDequeue(out task);
                }
            }
            _backgroundThread = null;
        }
 public abstract void Process(CrawlService service);
 public override void Process(CrawlService service)
 {
     service.RemoveRecord(Url);
 }
 public override void Process(CrawlService service)
 {
     try
     {
         if (NoIndex)
         {
             service.RemoveRecord(Url);
         }
         else
         {
             var record = service.GetRecordFromUrl(Url, "text", Title);
             if (record != null)
             {
                 record.SetString("title", Title);
                 record.SetString("template", TemplateName);
                 record.SetString("subjects", Subjects != null && Subjects.Any() ? Subjects.ToSeparatedString(",").ToLower() : string.Empty);
                 record.SetString("summary", Summary);
                 record.SetDate("date", Date);
                 service.AddRecord(record);
             }
         }
     }
     catch(Exception exc)
     {
         var record = service.NewRecord(Url);
         record.SetString("error", exc.ToString());
         service.AddRecord(record);
     }
 }
 public void RebuildSearchIndex()
 {
     using (var service = new CrawlService("site"))
     {
         service.ClearDatabase();
     }
     Queue<Page> pages = new Queue<Page>();
     using (CmsContext.Published)
     {
         var siteRoots = CmsService.Instance.SelectItems<SiteRoot>("/Content/*/*{SiteRoot}");
         pages.EnqueueRange(siteRoots.SelectMany(r => r.GetChildren<Page>()));
         while (pages.Count > 0)
         {
             var page = pages.Dequeue();
             if (page.Template.Path.StartsWith("/WebPage"))
             {
                 var site = CmsService.Instance.GetSitePath(page.Path).Split('/').Last();
                 var thumbnail = page.GetValue<Image>("thumbnail");
                 SearchBackgroundCrawler.QueueDocumentAdd(site, page, thumbnail.Exists ? thumbnail.Url : string.Empty);
             }
             pages.EnqueueRange(page.GetChildren<Page>());
         }
     }
 }