Example #1
0
 public UrlFilterBehaviorRule(string name, ResourceBehavior behavior, WorkBucket <Resource, BucketContext> targetBucket, ICollection <CrawlingCondition> conditions)
 {
     _conditions  = conditions;
     Name         = name;
     Behavior     = behavior;
     TargetBucket = targetBucket;
 }
Example #2
0
        private void CrawlingPlay_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            if (Model.SelectedConfig == null)
            {
                return;
            }

            CrawlingContext context = Model.SelectedFile.Model;
            int             nbRetry = Int32.Parse(ConfigurationManager.AppSettings["NbRetry"]);

            context.Crawler = new Crawler(context, null);

            context.Crawler.OnCompleted += (_, __) =>
            {
                context.Status  = CrawlingStatus.Ready;
                context.Crawler = null;
                RefreshActions();
            };

            Dictionary <CrawlingBucket, WorkBucket <Resource, BucketContext> > bucketMapping = new Dictionary <CrawlingBucket, WorkBucket <Resource, BucketContext> >();

            foreach (CrawlingBucket bucket in Model.SelectedConfig.Buckets)
            {
                BucketContext bucketContext = new BucketContext(nbRetry);
                foreach (CrawlingHostMapping mapping in bucket.HostMappings)
                {
                    bucketContext.Hosts[mapping.Host] = mapping.IPAddress;
                }

                WorkBucket <Resource, BucketContext> workBucket = context.Crawler.AddBucket(bucket.Name, bucket.NbThreads, bucketContext);
                bucketMapping.Add(bucket, workBucket);
            }

            foreach (CrawlingRule rule in Model.SelectedConfig.Rules)
            {
                WorkBucket <Resource, BucketContext> workBucket = bucketMapping[rule.TargetBucket];
                context.Crawler.AddBehaviorRule(rule.Name, rule.Behavior, workBucket, rule.Conditions);
            }

            context.Crawler.Reprocess();
            //foreach (ResourceToProcess resourceToProcess in config.)
            //    context.Crawler.AddUrlToProcess(resourceToProcess.Url);
            foreach (CrawlingStartingUrl startingUrl in Model.SelectedConfig.StartingUrls)
            {
                context.Crawler.AddUrlToProcess(startingUrl.Value);
            }

            if (context.Crawler.WorkDispatcher.IsWorking)
            {
                context.Status = CrawlingStatus.Processing;
                RefreshActions();
            }
            else
            {
                MessageBox.Show(this, "Nothing to do", "Information", MessageBoxButton.OK, MessageBoxImage.Information);
            }
        }
        /// <summary>
        /// Ajouter un filtre par URL
        /// </summary>
        /// <param name="url">Filtre d'URL (avec des * pour wildcard)</param>
        /// <param name="behavior">Comportement à associer à la nouvelle règle</param>
        /// <returns></returns>
        public UrlFilterBehaviorRule AddUrlFilterRule(string name, ResourceBehavior behavior, WorkBucket <Resource, BucketContext> targetBucket, ICollection <CrawlingCondition> conditions)
        {
            UrlFilterBehaviorRule urlFilterBehaviorRule = new UrlFilterBehaviorRule(name, behavior, targetBucket, conditions);

            Add(urlFilterBehaviorRule);
            return(urlFilterBehaviorRule);
        }
Example #4
0
 public void AddBehaviorRule(string name, ResourceBehavior behavior, WorkBucket <Resource, BucketContext> targetBucket, ICollection <CrawlingCondition> conditions)
 {
     _behaviorRules.AddUrlFilterRule(name, behavior, targetBucket, conditions);
 }