public UpdateContextAwareCrawler(IndexUpdateContext updateContext, ShortID runningContextId, IEnumerable <Uri> urlsToCrawl, ILog logger, params IPipelineStep[] pipeline) : base(urlsToCrawl, pipeline) { m_Logger = logger; RunningContextId = runningContextId; _updateContext = updateContext; }
protected override void AddItem(Item item, IndexUpdateContext context) { if (item["include in search results"] == "1") { base.AddItem(item, context); } }
public void Add(IndexUpdateContext context) { if (_logger != null) { _logger.InfoFormat("Crawler rebuild called, going to crawl {0} urls", _urls.Count); } Crawl(context); }
protected virtual void AddVersion(Item version, IndexUpdateContext context) { Assert.ArgumentNotNull(version, "version"); Assert.ArgumentNotNull(context, "context"); Item latestVersion = version.Database.GetItem(version.ID, version.Language, Sitecore.Data.Version.Latest); if (latestVersion != null) { this.IndexVersion(version, latestVersion, context); } }
/// <summary> /// Index version of Item /// </summary> /// <param name="item"> /// The item. /// </param> /// <param name="latestVersion"> /// The latest version. /// </param> /// <param name="context"> /// The context. /// </param> protected override void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context) { if (item.Template.IsNotNull()) { base.IndexVersion(item, latestVersion, context); } else { Log.Warn(string.Format("Custom Database Crawler: Cannot update item version. Reason: Template is NULL in item '{0}'.", item.Paths.FullPath), this); } }
public void AddItem(Item item) { Assert.ArgumentNotNull(item, "item"); if (this.IsMatch(item)) { using (IndexUpdateContext context = this._index.CreateUpdateContext()) { this.AddItem(item, context); context.Commit(); } } }
protected void IndexSharedData(Item item, IndexUpdateContext context) { Assert.ArgumentNotNull(item, "item"); Assert.ArgumentNotNull(context, "context"); Document document = new Document(); this.AddItemIdentifiers(item, document); this.AddAllFields(document, item, false); this.AddSpecialFields(document, item); this.AdjustBoost(document, item); context.AddDocument(document); }
public void AddVersion(Item version) { Assert.ArgumentNotNull(version, "version"); if (this.IsMatch(version)) { using (IndexUpdateContext context = this._index.CreateUpdateContext()) { this.AddVersion(version, context); context.Commit(); } } }
public void AddTree(Item root) { Assert.ArgumentNotNull(root, "root"); if (root.Axes.IsDescendantOf(this._root)) { using (IndexUpdateContext context = this._index.CreateUpdateContext()) { this.AddTree(root, context); context.Commit(); } } }
public void Rebuild() { using (IndexUpdateContext context = this.CreateUpdateContext()) { foreach (IRamCrawler crawler in this._crawlers) { crawler.Add(context); } context.Optimize(); context.Commit(); } }
protected virtual void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context) { Assert.ArgumentNotNull(item, "item"); Assert.ArgumentNotNull(latestVersion, "latestVersion"); Assert.ArgumentNotNull(context, "context"); Document document = new Document(); this.AddVersionIdentifiers(item, latestVersion, document); this.AddAllFields(document, item, true); this.AddSpecialFields(document, item); this.AdjustBoost(document, item); context.AddDocument(document); }
public void Rebuild() { using (IndexUpdateContext context = this.CreateUpdateContext()) { foreach (IRemoteCrawler crawler in this._crawlers) { crawler.Add(context); } context.Optimize(); context.Commit(); } File.Copy((Path.Combine(Config.RemoteIndexingServer, _folder)), Settings.IndexFolder); }
/// <summary> /// Indexes the virtual products. /// </summary> /// <param name="catalogItem">The catalog item.</param> /// <param name="productItems">The product items.</param> /// <param name="context">The context.</param> protected virtual void AddVirtualProducts(Item catalogItem, IEnumerable<Item> productItems, IndexUpdateContext context) { foreach (Item itm in productItems) { foreach (Language language in itm.Languages) { Item latestVersion = itm.Database.GetItem(itm.ID, language, Version.Latest); if (latestVersion != null) { foreach (Item version in latestVersion.Versions.GetVersions(false)) { this.AddVirtualProduct(catalogItem, version, latestVersion, context); } } } } }
/// <summary> /// Adds the item to the index. /// </summary> /// <param name="item">The Sitecore item to index.</param> /// <param name="latestVersion">The latest version.</param> /// <param name="context">The context.</param> protected override void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context) { this.IndexVersion(item, latestVersion, context, null); if (this.IsCatalogItem(item)) { string siteName = SiteUtils.GetSiteByItem(item); if (!string.IsNullOrEmpty(siteName)) { using (new SiteContextSwitcher(Factory.GetSite(siteName))) { using (new SiteIndependentDatabaseSwitcher(item.Database)) { this.AddVirtualProducts(item, this.GetVirtualProductsForIndexing(item), context); } } } } }
protected virtual void AddItem(Item item, IndexUpdateContext context) { Assert.ArgumentNotNull(item, "item"); Assert.ArgumentNotNull(context, "context"); if (this.IsMatch(item)) { foreach (Language language in item.Languages) { Item latestVersion = item.Database.GetItem(item.ID, language, Sitecore.Data.Version.Latest); if (latestVersion != null) { foreach (Item item3 in latestVersion.Versions.GetVersions(false)) { this.IndexVersion(item3, latestVersion, context); } } } } }
protected void AddTree(Item root, IndexUpdateContext context) { Assert.ArgumentNotNull(root, "root"); Assert.ArgumentNotNull(context, "context"); using (new LimitMemoryContext(true)) { this.AddItem(root, context); List <ID> list = new List <ID>(); foreach (Item item in root.GetChildren(ChildListOptions.IgnoreSecurity)) { list.Add(item.ID); } foreach (ID id in list) { Item item2 = root.Database.GetItem(id); Assert.IsNotNull(item2, "Child item was not found."); this.AddTree(item2, context); } } }
protected void RemoteBuild() { var job = Context.Job; if (job != null) { try { var serverAddress = Util.Config.RemoteIndexingServer; //Copy PsExec to Target Address foreach (var str in this.indexNames) { var index = SearchManager.GetIndex(str); if (index != null) { using (IndexUpdateContext context = new IndexUpdateContext(index)) { foreach (ICrawler crawler in SearchManager.Indexes.Where(indexType => indexType.GetType() == Type.GetType("Sitecore.ItemBuckets.BigData.RemoteIndex.RemoteIndex, Sitecore.ItemBuckets.BigData"))) { crawler.Add(context); } context.Optimize(); context.Commit(); } } var status = job.Status; status.Processed += 1L; } } catch (Exception exception) { job.Status.Failed = true; job.Status.Messages.Add(exception.ToString()); } job.Status.State = JobState.Finished; } }
public override void Optimize() { this.CloseSearcher(); using (IndexUpdateContext context = _index.CreateUpdateContext()) { Type indexUpdateContextType = typeof(IndexUpdateContext); IndexWriter writer = indexUpdateContextType.InvokeMember("_writer", BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.GetField, null, context, null) as IndexWriter; if (writer != null) { writer.Optimize(); } context.Commit(); } }
/// <summary> /// Indexes the virtual products. /// </summary> /// <param name="catalogItem">The catalog item.</param> /// <param name="productItems">The product items.</param> /// <param name="context">The context.</param> protected virtual void AddVirtualProducts(Item catalogItem, IEnumerable <Item> productItems, IndexUpdateContext context) { foreach (Item itm in productItems) { foreach (Language language in itm.Languages) { Item latestVersion = itm.Database.GetItem(itm.ID, language, Version.Latest); if (latestVersion != null) { foreach (Item version in latestVersion.Versions.GetVersions(false)) { this.AddVirtualProduct(catalogItem, version, latestVersion, context); } } } } }
private void Crawl(IndexUpdateContext context) { if (_isrunning) { _logger.InfoFormat("Crawler is already running, aborting"); return; } lock (_runninglock) { if (_isrunning) { _logger.InfoFormat("Crawler is already running, aborting"); return; } _isrunning = true; var dir = _directoryHelper.GetDirectoryName(_index); _cancelled = false; try { _directoryHelper.CreateDirectoryBackup(dir); GetIndexWriter(context).DeleteDocuments(new Term(BuiltinFields.Tags, ValueOrEmpty(Tags))); var runningContextId = ShortID.NewId(); var urls = GetTransformedUrls().ToList(); if (_logger != null) { urls.ForEach(url => _logger.InfoFormat("Starting url: {0}", url)); } var documentProcessor = (_logger != null && _logger.IsDebugEnabled) ? new LogHtmlDocumentProcessor(_logger, _indexFilters, _followFilters) : new HtmlDocumentProcessor(_indexFilters, _followFilters); using (var c = new UpdateContextAwareCrawler(context, runningContextId, urls, new LogLoggerBridge(_logger), documentProcessor, this)) { if (_logger != null) { _logger.Info(String.Format("Crawler started: Using {0} threads", MaximumThreadCount)); } c.AdhereToRobotRules = AdhereToRobotRules; c.MaximumThreadCount = MaximumThreadCount; c.UriSensitivity = UriSensitivity; if (MaximumCrawlDepth > 0) { c.MaximumCrawlDepth = MaximumCrawlDepth; } if (MaximumDocuments > 0) { c.MaximumCrawlCount = MaximumDocuments; } if (MaximumCrawlTime.TotalMinutes > 0) { c.MaximumCrawlTime = MaximumCrawlTime; } c.UseCookies = UseCookies; c.ExcludeFilter = new[] { new RegexFilter(new Regex(RegexExcludeFilter)) }; c.AfterDownload += CrawlerAfterDownload; c.PipelineException += CrawlerPipelineException; c.DownloadException += CrawlerDownloadException; c.Cancelled += CrawlerCancelled; Event.RaiseEvent("SiteCrawler:Started", new CrawlStartedEventArgs(c)); c.Crawl(); Event.RaiseEvent("SiteCrawler:Finished", new CrawlFinishedEventArgs(c)); } } catch (Exception crawlException) { if (_logger != null) { _logger.Error(GetExceptionLog(crawlException).ToString()); } if (_directoryHelper.RestoreDirectoryBackup(dir)) { _cancelled = false; } } finally { if (_logger != null) { _logger.Info("Crawler finished"); } _isrunning = false; if (!_cancelled) { _directoryHelper.DeleteBackupDirectory(dir); } } } }
/// <summary> /// Indexes the version. /// </summary> /// <param name="item">The item to proceed.</param> /// <param name="latestVersion">The latest version.</param> /// <param name="context">The context.</param> /// <param name="catalogItem">The catalog item.</param> protected virtual void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context, Item catalogItem) { Assert.ArgumentNotNull(item, "item"); Assert.ArgumentNotNull(latestVersion, "latestVersion"); Assert.ArgumentNotNull(context, "context"); Document document = new Document(); this.AddVersionIdentifiers(item, latestVersion, document); this.AddAllFields(document, item, true); this.AddSpecialFields(document, item); this.AdjustBoost(document, item); if (catalogItem != null) { this.AddVirtualProductIdentifiers(document, item, catalogItem); } context.AddDocument(document); }
protected void RemoteBuild() { var job = Context.Job; if (job != null) { try { var serverAddress = Util.Config.RemoteIndexingServer; //Copy PsExec to Target Address foreach (var str in this.indexNames) { var index = SearchManager.GetIndex(str); if (index != null) { using (IndexUpdateContext context = new IndexUpdateContext(index)) { foreach (ICrawler crawler in SearchHelper.GetIndexes().Where(indexType => indexType.GetType() == Type.GetType("Sitecore.ItemBuckets.BigData.RemoteIndex.RemoteIndex, Sitecore.ItemBuckets.BigData"))) { crawler.Add(context); } context.Optimize(); context.Commit(); } } var status = job.Status; status.Processed += 1L; } } catch (Exception exception) { job.Status.Failed = true; job.Status.Messages.Add(exception.ToString()); } job.Status.State = JobState.Finished; } }
// checking if item has template protected override void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context) { if (item.Template != null) { base.IndexVersion(item, latestVersion, context); } else { Log.Warn(string.Format("AdvancedDatabaseCrawler: Cannot update item version. Reason: Template is NULL in item '{0}'.", item.Paths.FullPath), this); } }
/// <summary> /// Adds the virtual product. /// </summary> /// <param name="catalogItem">The catalog item.</param> /// <param name="version">The version.</param> /// <param name="latestVersion">The latest version.</param> /// <param name="context">The context.</param> protected virtual void AddVirtualProduct(Item catalogItem, Item version, Item latestVersion, IndexUpdateContext context) { this.IndexVersion(version, latestVersion, context, catalogItem); }
public IndexWriter GetIndexWriter(IndexUpdateContext updateContext) { //sigh return(_writerField.GetValue(updateContext) as IndexWriter); }
/// <summary> /// Adds the specified context. /// </summary> /// <param name="context">The context.</param> public void Add(IndexUpdateContext context) { Assert.ArgumentNotNull(context, "context"); this.AddTree(this.root, context); }