示例#1
0
 public UpdateContextAwareCrawler(IndexUpdateContext updateContext, ShortID runningContextId, IEnumerable <Uri> urlsToCrawl, ILog logger, params IPipelineStep[] pipeline)
     : base(urlsToCrawl, pipeline)
 {
     m_Logger         = logger;
     RunningContextId = runningContextId;
     _updateContext   = updateContext;
 }
 protected override void AddItem(Item item, IndexUpdateContext context)
 {
     if (item["include in search results"] == "1")
     {
         base.AddItem(item, context);
     }
 }
示例#3
0
 public void Add(IndexUpdateContext context)
 {
     if (_logger != null)
     {
         _logger.InfoFormat("Crawler rebuild called, going to crawl {0} urls", _urls.Count);
     }
     Crawl(context);
 }
示例#4
0
        protected virtual void AddVersion(Item version, IndexUpdateContext context)
        {
            Assert.ArgumentNotNull(version, "version");
            Assert.ArgumentNotNull(context, "context");
            Item latestVersion = version.Database.GetItem(version.ID, version.Language, Sitecore.Data.Version.Latest);

            if (latestVersion != null)
            {
                this.IndexVersion(version, latestVersion, context);
            }
        }
示例#5
0
 /// <summary>
 /// Index version of Item
 /// </summary>
 /// <param name="item">
 /// The item.
 /// </param>
 /// <param name="latestVersion">
 /// The latest version.
 /// </param>
 /// <param name="context">
 /// The context.
 /// </param>
 protected override void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context)
 {
     if (item.Template.IsNotNull())
     {
         base.IndexVersion(item, latestVersion, context);
     }
     else
     {
         Log.Warn(string.Format("Custom Database Crawler: Cannot update item version. Reason: Template is NULL in item '{0}'.", item.Paths.FullPath), this);
     }
 }
示例#6
0
 public void AddItem(Item item)
 {
     Assert.ArgumentNotNull(item, "item");
     if (this.IsMatch(item))
     {
         using (IndexUpdateContext context = this._index.CreateUpdateContext())
         {
             this.AddItem(item, context);
             context.Commit();
         }
     }
 }
示例#7
0
        protected void IndexSharedData(Item item, IndexUpdateContext context)
        {
            Assert.ArgumentNotNull(item, "item");
            Assert.ArgumentNotNull(context, "context");
            Document document = new Document();

            this.AddItemIdentifiers(item, document);
            this.AddAllFields(document, item, false);
            this.AddSpecialFields(document, item);
            this.AdjustBoost(document, item);
            context.AddDocument(document);
        }
示例#8
0
 public void AddVersion(Item version)
 {
     Assert.ArgumentNotNull(version, "version");
     if (this.IsMatch(version))
     {
         using (IndexUpdateContext context = this._index.CreateUpdateContext())
         {
             this.AddVersion(version, context);
             context.Commit();
         }
     }
 }
示例#9
0
 public void AddTree(Item root)
 {
     Assert.ArgumentNotNull(root, "root");
     if (root.Axes.IsDescendantOf(this._root))
     {
         using (IndexUpdateContext context = this._index.CreateUpdateContext())
         {
             this.AddTree(root, context);
             context.Commit();
         }
     }
 }
 public void Rebuild()
 {
     using (IndexUpdateContext context = this.CreateUpdateContext())
     {
         foreach (IRamCrawler crawler in this._crawlers)
         {
             crawler.Add(context);
         }
         context.Optimize();
         context.Commit();
     }
 }
示例#11
0
        protected virtual void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context)
        {
            Assert.ArgumentNotNull(item, "item");
            Assert.ArgumentNotNull(latestVersion, "latestVersion");
            Assert.ArgumentNotNull(context, "context");
            Document document = new Document();

            this.AddVersionIdentifiers(item, latestVersion, document);
            this.AddAllFields(document, item, true);
            this.AddSpecialFields(document, item);
            this.AdjustBoost(document, item);
            context.AddDocument(document);
        }
示例#12
0
        public void Rebuild()
        {
            using (IndexUpdateContext context = this.CreateUpdateContext())
            {
                foreach (IRemoteCrawler crawler in this._crawlers)
                {
                    crawler.Add(context);
                }
                context.Optimize();
                context.Commit();
            }

            File.Copy((Path.Combine(Config.RemoteIndexingServer, _folder)), Settings.IndexFolder);
        }
 /// <summary>
 /// Indexes the virtual products.
 /// </summary>
 /// <param name="catalogItem">The catalog item.</param>
 /// <param name="productItems">The product items.</param>
 /// <param name="context">The context.</param>
 protected virtual void AddVirtualProducts(Item catalogItem, IEnumerable<Item> productItems, IndexUpdateContext context)
 {
   foreach (Item itm in productItems)
   {
     foreach (Language language in itm.Languages)
     {
       Item latestVersion = itm.Database.GetItem(itm.ID, language, Version.Latest);
       if (latestVersion != null)
       {
         foreach (Item version in latestVersion.Versions.GetVersions(false))
         {
           this.AddVirtualProduct(catalogItem, version, latestVersion, context);
         }
       }
     }
   }
 }
        /// <summary>
        /// Adds the item to the index.
        /// </summary>
        /// <param name="item">The Sitecore item to index.</param>
        /// <param name="latestVersion">The latest version.</param>
        /// <param name="context">The context.</param>
        protected override void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context)
        {
            this.IndexVersion(item, latestVersion, context, null);

            if (this.IsCatalogItem(item))
            {
                string siteName = SiteUtils.GetSiteByItem(item);
                if (!string.IsNullOrEmpty(siteName))
                {
                    using (new SiteContextSwitcher(Factory.GetSite(siteName)))
                    {
                        using (new SiteIndependentDatabaseSwitcher(item.Database))
                        {
                            this.AddVirtualProducts(item, this.GetVirtualProductsForIndexing(item), context);
                        }
                    }
                }
            }
        }
示例#15
0
 protected virtual void AddItem(Item item, IndexUpdateContext context)
 {
     Assert.ArgumentNotNull(item, "item");
     Assert.ArgumentNotNull(context, "context");
     if (this.IsMatch(item))
     {
         foreach (Language language in item.Languages)
         {
             Item latestVersion = item.Database.GetItem(item.ID, language, Sitecore.Data.Version.Latest);
             if (latestVersion != null)
             {
                 foreach (Item item3 in latestVersion.Versions.GetVersions(false))
                 {
                     this.IndexVersion(item3, latestVersion, context);
                 }
             }
         }
     }
 }
示例#16
0
 protected void AddTree(Item root, IndexUpdateContext context)
 {
     Assert.ArgumentNotNull(root, "root");
     Assert.ArgumentNotNull(context, "context");
     using (new LimitMemoryContext(true))
     {
         this.AddItem(root, context);
         List <ID> list = new List <ID>();
         foreach (Item item in root.GetChildren(ChildListOptions.IgnoreSecurity))
         {
             list.Add(item.ID);
         }
         foreach (ID id in list)
         {
             Item item2 = root.Database.GetItem(id);
             Assert.IsNotNull(item2, "Child item was not found.");
             this.AddTree(item2, context);
         }
     }
 }
            protected void RemoteBuild()
            {
                var job = Context.Job;

                if (job != null)
                {
                    try
                    {
                        var serverAddress = Util.Config.RemoteIndexingServer;
                        //Copy PsExec to Target Address

                        foreach (var str in this.indexNames)
                        {
                            var index = SearchManager.GetIndex(str);
                            if (index != null)
                            {
                                using (IndexUpdateContext context = new IndexUpdateContext(index))
                                {
                                    foreach (ICrawler crawler in SearchManager.Indexes.Where(indexType => indexType.GetType() == Type.GetType("Sitecore.ItemBuckets.BigData.RemoteIndex.RemoteIndex, Sitecore.ItemBuckets.BigData")))
                                    {
                                        crawler.Add(context);
                                    }
                                    context.Optimize();
                                    context.Commit();
                                }
                            }

                            var status = job.Status;
                            status.Processed += 1L;
                        }
                    }
                    catch (Exception exception)
                    {
                        job.Status.Failed = true;
                        job.Status.Messages.Add(exception.ToString());
                    }

                    job.Status.State = JobState.Finished;
                }
            }
示例#18
0
        public override void Optimize()
        {
            this.CloseSearcher();

            using (IndexUpdateContext context = _index.CreateUpdateContext())
            {
                Type indexUpdateContextType = typeof(IndexUpdateContext);

                IndexWriter writer = indexUpdateContextType.InvokeMember("_writer",
                                                                         BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.GetField,
                                                                         null,
                                                                         context,
                                                                         null) as IndexWriter;

                if (writer != null)
                {
                    writer.Optimize();
                }

                context.Commit();
            }
        }
 /// <summary>
 /// Indexes the virtual products.
 /// </summary>
 /// <param name="catalogItem">The catalog item.</param>
 /// <param name="productItems">The product items.</param>
 /// <param name="context">The context.</param>
 protected virtual void AddVirtualProducts(Item catalogItem, IEnumerable <Item> productItems, IndexUpdateContext context)
 {
     foreach (Item itm in productItems)
     {
         foreach (Language language in itm.Languages)
         {
             Item latestVersion = itm.Database.GetItem(itm.ID, language, Version.Latest);
             if (latestVersion != null)
             {
                 foreach (Item version in latestVersion.Versions.GetVersions(false))
                 {
                     this.AddVirtualProduct(catalogItem, version, latestVersion, context);
                 }
             }
         }
     }
 }
示例#20
0
        private void Crawl(IndexUpdateContext context)
        {
            if (_isrunning)
            {
                _logger.InfoFormat("Crawler is already running, aborting");
                return;
            }

            lock (_runninglock)
            {
                if (_isrunning)
                {
                    _logger.InfoFormat("Crawler is already running, aborting");
                    return;
                }
                _isrunning = true;

                var dir = _directoryHelper.GetDirectoryName(_index);

                _cancelled = false;
                try
                {
                    _directoryHelper.CreateDirectoryBackup(dir);
                    GetIndexWriter(context).DeleteDocuments(new Term(BuiltinFields.Tags, ValueOrEmpty(Tags)));

                    var runningContextId = ShortID.NewId();
                    var urls             = GetTransformedUrls().ToList();
                    if (_logger != null)
                    {
                        urls.ForEach(url => _logger.InfoFormat("Starting url: {0}", url));
                    }

                    var documentProcessor = (_logger != null && _logger.IsDebugEnabled)
                            ? new LogHtmlDocumentProcessor(_logger, _indexFilters, _followFilters)
                            : new HtmlDocumentProcessor(_indexFilters, _followFilters);

                    using (var c = new UpdateContextAwareCrawler(context, runningContextId, urls, new LogLoggerBridge(_logger), documentProcessor, this))
                    {
                        if (_logger != null)
                        {
                            _logger.Info(String.Format("Crawler started: Using {0} threads", MaximumThreadCount));
                        }
                        c.AdhereToRobotRules = AdhereToRobotRules;
                        c.MaximumThreadCount = MaximumThreadCount;
                        c.UriSensitivity     = UriSensitivity;

                        if (MaximumCrawlDepth > 0)
                        {
                            c.MaximumCrawlDepth = MaximumCrawlDepth;
                        }

                        if (MaximumDocuments > 0)
                        {
                            c.MaximumCrawlCount = MaximumDocuments;
                        }

                        if (MaximumCrawlTime.TotalMinutes > 0)
                        {
                            c.MaximumCrawlTime = MaximumCrawlTime;
                        }

                        c.UseCookies    = UseCookies;
                        c.ExcludeFilter = new[]
                        {
                            new RegexFilter(new Regex(RegexExcludeFilter))
                        };

                        c.AfterDownload     += CrawlerAfterDownload;
                        c.PipelineException += CrawlerPipelineException;
                        c.DownloadException += CrawlerDownloadException;
                        c.Cancelled         += CrawlerCancelled;

                        Event.RaiseEvent("SiteCrawler:Started", new CrawlStartedEventArgs(c));

                        c.Crawl();

                        Event.RaiseEvent("SiteCrawler:Finished", new CrawlFinishedEventArgs(c));
                    }
                }

                catch (Exception crawlException)
                {
                    if (_logger != null)
                    {
                        _logger.Error(GetExceptionLog(crawlException).ToString());
                    }
                    if (_directoryHelper.RestoreDirectoryBackup(dir))
                    {
                        _cancelled = false;
                    }
                }
                finally
                {
                    if (_logger != null)
                    {
                        _logger.Info("Crawler finished");
                    }
                    _isrunning = false;
                    if (!_cancelled)
                    {
                        _directoryHelper.DeleteBackupDirectory(dir);
                    }
                }
            }
        }
    /// <summary>
    /// Indexes the version.
    /// </summary>
    /// <param name="item">The item to proceed.</param>
    /// <param name="latestVersion">The latest version.</param>
    /// <param name="context">The context.</param>
    /// <param name="catalogItem">The catalog item.</param>
    protected virtual void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context, Item catalogItem)
    {
      Assert.ArgumentNotNull(item, "item");
      Assert.ArgumentNotNull(latestVersion, "latestVersion");
      Assert.ArgumentNotNull(context, "context");

      Document document = new Document();
      this.AddVersionIdentifiers(item, latestVersion, document);
      this.AddAllFields(document, item, true);
      this.AddSpecialFields(document, item);
      this.AdjustBoost(document, item);

      if (catalogItem != null)
      {
        this.AddVirtualProductIdentifiers(document, item, catalogItem);
      }

      context.AddDocument(document);
    }
            protected void RemoteBuild()
            {
                var job = Context.Job;
                if (job != null)
                {
                    try
                    {
                        var serverAddress = Util.Config.RemoteIndexingServer;
                        //Copy PsExec to Target Address

                        foreach (var str in this.indexNames)
                        {
                            var index = SearchManager.GetIndex(str);
                            if (index != null)
                            {

                                using (IndexUpdateContext context = new IndexUpdateContext(index))
                                {
                                    foreach (ICrawler crawler in SearchHelper.GetIndexes().Where(indexType => indexType.GetType() == Type.GetType("Sitecore.ItemBuckets.BigData.RemoteIndex.RemoteIndex, Sitecore.ItemBuckets.BigData")))
                                    {
                                        crawler.Add(context);
                                    }
                                    context.Optimize();
                                    context.Commit();
                                }

                            }

                            var status = job.Status;
                            status.Processed += 1L;
                        }
                    }
                    catch (Exception exception)
                    {
                        job.Status.Failed = true;
                        job.Status.Messages.Add(exception.ToString());
                    }

                    job.Status.State = JobState.Finished;
                }
            }
        // checking if item has template
        protected override void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context)
        {
            if (item.Template != null)
            {
                base.IndexVersion(item, latestVersion, context);
            }

            else
            {
                Log.Warn(string.Format("AdvancedDatabaseCrawler: Cannot update item version. Reason: Template is NULL in item '{0}'.", item.Paths.FullPath), this);
            }
        }
    /// <summary>
    /// Adds the item to the index.
    /// </summary>
    /// <param name="item">The Sitecore item to index.</param>
    /// <param name="latestVersion">The latest version.</param>
    /// <param name="context">The context.</param>
    protected override void IndexVersion(Item item, Item latestVersion, IndexUpdateContext context)
    {
      this.IndexVersion(item, latestVersion, context, null);

      if (this.IsCatalogItem(item))
      {
        string siteName = SiteUtils.GetSiteByItem(item);
        if (!string.IsNullOrEmpty(siteName))
        {
          using (new SiteContextSwitcher(Factory.GetSite(siteName)))
          {
            using (new SiteIndependentDatabaseSwitcher(item.Database))
            {
              this.AddVirtualProducts(item, this.GetVirtualProductsForIndexing(item), context);
            }
          }
        }
      }
    }
 /// <summary>
 /// Adds the virtual product.
 /// </summary>
 /// <param name="catalogItem">The catalog item.</param>
 /// <param name="version">The version.</param>
 /// <param name="latestVersion">The latest version.</param>
 /// <param name="context">The context.</param>
 protected virtual void AddVirtualProduct(Item catalogItem, Item version, Item latestVersion, IndexUpdateContext context)
 {
     this.IndexVersion(version, latestVersion, context, catalogItem);
 }
示例#26
0
 public IndexWriter GetIndexWriter(IndexUpdateContext updateContext)
 {
     //sigh
     return(_writerField.GetValue(updateContext) as IndexWriter);
 }
 /// <summary>
 /// Adds the virtual product.
 /// </summary>
 /// <param name="catalogItem">The catalog item.</param>
 /// <param name="version">The version.</param>
 /// <param name="latestVersion">The latest version.</param>
 /// <param name="context">The context.</param>
 protected virtual void AddVirtualProduct(Item catalogItem, Item version, Item latestVersion, IndexUpdateContext context)
 {
   this.IndexVersion(version, latestVersion, context, catalogItem);
 }
        /// <summary>
        /// Adds the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        public void Add(IndexUpdateContext context)
        {
            Assert.ArgumentNotNull(context, "context");

            this.AddTree(this.root, context);
        }