public CrawlServiceTests() { HtmlProvider = new Mock <IHTMLProvider>(); LinkExtractor = new Mock <ILinkExtractor>(); target = new CrawlService(HtmlProvider.Object, LinkExtractor.Object); }
private static void Execute() { Console.WriteLine("Starting Thread Execution"); SearchTask task; _queue.TryDequeue(out task); if (task == null) return; using (var service = new CrawlService("site")) { while (task != null) { task.Process(service); Console.WriteLine("Document Indexed " + task.Name); _queue.TryDequeue(out task); } } _backgroundThread = null; }
public abstract void Process(CrawlService service);
public override void Process(CrawlService service) { service.RemoveRecord(Url); }
public override void Process(CrawlService service) { try { if (NoIndex) { service.RemoveRecord(Url); } else { var record = service.GetRecordFromUrl(Url, "text", Title); if (record != null) { record.SetString("title", Title); record.SetString("template", TemplateName); record.SetString("subjects", Subjects != null && Subjects.Any() ? Subjects.ToSeparatedString(",").ToLower() : string.Empty); record.SetString("summary", Summary); record.SetDate("date", Date); service.AddRecord(record); } } } catch(Exception exc) { var record = service.NewRecord(Url); record.SetString("error", exc.ToString()); service.AddRecord(record); } }
public void RebuildSearchIndex() { using (var service = new CrawlService("site")) { service.ClearDatabase(); } Queue<Page> pages = new Queue<Page>(); using (CmsContext.Published) { var siteRoots = CmsService.Instance.SelectItems<SiteRoot>("/Content/*/*{SiteRoot}"); pages.EnqueueRange(siteRoots.SelectMany(r => r.GetChildren<Page>())); while (pages.Count > 0) { var page = pages.Dequeue(); if (page.Template.Path.StartsWith("/WebPage")) { var site = CmsService.Instance.GetSitePath(page.Path).Split('/').Last(); var thumbnail = page.GetValue<Image>("thumbnail"); SearchBackgroundCrawler.QueueDocumentAdd(site, page, thumbnail.Exists ? thumbnail.Url : string.Empty); } pages.EnqueueRange(page.GetChildren<Page>()); } } }