Пример #1
0
        public FileViewModel(CrawlingContext model, DispatcherQueue dispatcherQueue)
        {
            Model     = model;
            Resources = model.Resources
                        .Select(x => new ResourceViewModel(x, dispatcherQueue))
                        .Dispatch(dispatcherQueue);

            var orderedResources   = Resources.OrderBy(x => x.HttpStatus).ThenBy(x => x.URL);
            var processedResources = orderedResources.Where(x => x.Status == ResourceStatus.Processed);

            Nodes = new ObservableCollection <Node>
            {
                new Node("Resources", true, BitmapIcons.Folder, null,

                         new Node("By processing status", true, BitmapIcons.Folder, orderedResources,
                                  EnumHelper <ResourceStatus> .Values
                                  .Select(status => CreateNodeSplitByDomain(DescriptionExtractor.GetDescription(status), BitmapIcons.WorkStatuses.TryGetValue(status), orderedResources.Where(x => x.Status == status)))
                                  .ToArray()),

                         new Node("By http status", true, BitmapIcons.Folder, processedResources,
                                  from resource in processedResources
                                  group resource by resource.HttpStatus into grp
                                  orderby grp.Key
                                  select new Node(DescriptionExtractor.GetDescription(grp.Key), false, BitmapIcons.GetImageFromHttpStatus(grp.Key), grp, SplitByDomain(grp))),

                         new Node("By bucket", true, BitmapIcons.Folder, null,
                                  from resource in Resources
                                  group resource by resource.CurrentBucket ?? "" into grp
                                  where grp.Key != ""
                                  orderby grp.Count descending, grp.Key
                                  select new Node(grp.Key, false, BitmapIcons.Folder, grp)))
            };

            Model.PropertyChanged += Model_PropertyChanged;
        }
Пример #2
0
 public Crawler(CrawlingContext context, int?limitResources = null)
 {
     Context         = context;
     Resources       = Context.Resources.ToDictionary(x => x.Url.AbsoluteUri);
     _limitResources = limitResources;
     WorkDispatcher  = new WorkDispatcher <Resource, BucketContext>(ProcessResource);
     WorkDispatcher.PropertyChanged += TrackerOnPropertyChanged;
 }
Пример #3
0
        private void AddFile(CrawlingContext model)
        {
            DispatcherQueue queue     = new DispatcherQueue(Dispatcher);
            FileViewModel   viewModel = new FileViewModel(model, queue);

            Model.Files.Add(viewModel);
            Model.SelectedFile = viewModel;
        }
Пример #4
0
        public static CrawlingContext DeserializeContext(XElement resourcesNode)
        {
            CrawlingContext context = new CrawlingContext();

            context.Resources.AddRange(DeserializeResourceCollection(resourcesNode));

            return(context);
        }
Пример #5
0
        private void CrawlingPlay_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            if (Model.SelectedConfig == null)
            {
                return;
            }

            CrawlingContext context = Model.SelectedFile.Model;
            int             nbRetry = Int32.Parse(ConfigurationManager.AppSettings["NbRetry"]);

            context.Crawler = new Crawler(context, null);

            context.Crawler.OnCompleted += (_, __) =>
            {
                context.Status  = CrawlingStatus.Ready;
                context.Crawler = null;
                RefreshActions();
            };

            Dictionary <CrawlingBucket, WorkBucket <Resource, BucketContext> > bucketMapping = new Dictionary <CrawlingBucket, WorkBucket <Resource, BucketContext> >();

            foreach (CrawlingBucket bucket in Model.SelectedConfig.Buckets)
            {
                BucketContext bucketContext = new BucketContext(nbRetry);
                foreach (CrawlingHostMapping mapping in bucket.HostMappings)
                {
                    bucketContext.Hosts[mapping.Host] = mapping.IPAddress;
                }

                WorkBucket <Resource, BucketContext> workBucket = context.Crawler.AddBucket(bucket.Name, bucket.NbThreads, bucketContext);
                bucketMapping.Add(bucket, workBucket);
            }

            foreach (CrawlingRule rule in Model.SelectedConfig.Rules)
            {
                WorkBucket <Resource, BucketContext> workBucket = bucketMapping[rule.TargetBucket];
                context.Crawler.AddBehaviorRule(rule.Name, rule.Behavior, workBucket, rule.Conditions);
            }

            context.Crawler.Reprocess();
            //foreach (ResourceToProcess resourceToProcess in config.)
            //    context.Crawler.AddUrlToProcess(resourceToProcess.Url);
            foreach (CrawlingStartingUrl startingUrl in Model.SelectedConfig.StartingUrls)
            {
                context.Crawler.AddUrlToProcess(startingUrl.Value);
            }

            if (context.Crawler.WorkDispatcher.IsWorking)
            {
                context.Status = CrawlingStatus.Processing;
                RefreshActions();
            }
            else
            {
                MessageBox.Show(this, "Nothing to do", "Information", MessageBoxButton.OK, MessageBoxImage.Information);
            }
        }
Пример #6
0
        private void CrawlingStop_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            CrawlingContext context = Model.SelectedFile.Model;

            if (context.Crawler.WorkDispatcher.IsWorking)
            {
                context.Status = CrawlingStatus.Stopping;
                Crawler crawler = context.Crawler;
                crawler.Stop();
                RefreshActions();
            }
        }
Пример #7
0
        private void SaveFile(CrawlingContext file, bool saveAs)
        {
            FileInfo target = null;

            if (file.FullPath == null || saveAs)
            {
                // Configure save file dialog box
                SaveFileDialog dlg = new SaveFileDialog();
                dlg.FileName   = file.FullPath == null ? "CrawlingResult.cwl" : file.FullPath.FullName; // Default file name
                dlg.DefaultExt = ".cwl";                                                                // Default file extension
                dlg.Filter     = "Crawling result (.cwl)|*.cwl";                                        // Filter files by extension

                // Process save file dialog box results
                if (dlg.ShowDialog() == true)
                {
                    target = new FileInfo(dlg.FileName);
                }
                else
                {
                    return;
                }
            }
            else
            {
                target = file.FullPath;
            }

            if (target != null)
            {
                file.Status = CrawlingStatus.Saving;
                ThreadPool.QueueUserWorkItem(_ =>
                {
                    try
                    {
                        XElement element = ResourcesSerializer.SerializeResourceCollection(file.Resources);
                        element.Save(target.FullName);
                        file.FullPath   = target;
                        file.HasChanged = false;
                    }
                    catch (Exception e)
                    {
                        Dispatcher.Invoke((Action)(() => MessageBox.Show("Failed to save file " + target.Name + ": " + e.Message)));
                    }
                    finally
                    {
                        file.Status = CrawlingStatus.Ready;
                        RefreshActions();
                    }
                });
            }
        }
Пример #8
0
        private void ImportUrls_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            CrawlingContext context = Model.SelectedFile.Model;

            if (context.Status == CrawlingStatus.Ready)
            {
                List <Uri>       urls         = ImportUrlsWindow.PromptUrls(this);
                HashSet <string> existingUrls = new HashSet <string>(context.Resources.Select(x => x.Url.AbsoluteUri));
                foreach (Uri url in urls)
                {
                    Uri urlWithoutSessionId = url.WithoutSession();
                    if (!existingUrls.Contains(urlWithoutSessionId.AbsoluteUri))
                    {
                        Resource resource = new Resource(urlWithoutSessionId, ResourceBehavior.Ignore);
                        resource.Status = ResourceStatus.ReadyToProcess;
                        context.Resources.Add(resource);
                        existingUrls.Add(urlWithoutSessionId.AbsoluteUri);
                    }
                }
            }
        }
Пример #9
0
        private void LoadFile(FileInfo file)
        {
            CrawlingContext context = new CrawlingContext();

            context.FullPath   = new FileInfo(file.FullName);
            context.HasChanged = false;
            context.Status     = CrawlingStatus.Loading;
            AddFile(context);

            ThreadPool.QueueUserWorkItem(_ =>
            {
                XDocument doc             = XDocument.Load(file.FullName);
                List <Resource> resources = ResourcesSerializer.DeserializeResourceCollection(doc.Root);
                foreach (Resource resource in resources)
                {
                    context.Resources.Add(resource);
                }

                context.Status = CrawlingStatus.Ready;
                RefreshActions();
            });
        }
Пример #10
0
        private void ReportingGenerate_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            CrawlingContext context = Model.SelectedFile.Model;

            string defaultFileName = context.FullPath != null?Path.GetFileNameWithoutExtension(context.FullPath.FullName) : "Report";

            // Configure save file dialog box
            SaveFileDialog dlg = new SaveFileDialog();

            dlg.FileName   = defaultFileName;                 // Default file name
            dlg.DefaultExt = ".xlsx";                         // Default file extension
            dlg.Filter     = "Excel document (.xlsx)|*.xlsx"; // Filter files by extension

            // Process save file dialog box results
            if (dlg.ShowDialog() == true)
            {
                context.Status = CrawlingStatus.GeneratingReport;

                ThreadPool.QueueUserWorkItem(_ =>
                {
                    ExcelPackage pkg = new ExcelPackage();
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateRequestsReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateCachingReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateReferenceSummaryReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateReferencesReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateRedirectionReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateContentReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateErrorsReport(context.Resources));
                    //foreach (ReportConfig reportConfig in Model.ReportConfigs)
                    //    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateReport(reportConfig, context.Resources));

                    pkg.SaveAs(new FileInfo(dlg.FileName));

                    context.Status = CrawlingStatus.Ready;
                    RefreshActions();
                });
            }
        }