Exemplo n.º 1
0
        public FileViewModel(CrawlingContext model, DispatcherQueue dispatcherQueue)
        {
            Model     = model;
            Resources = model.Resources
                        .Select(x => new ResourceViewModel(x, dispatcherQueue))
                        .Dispatch(dispatcherQueue);

            var orderedResources   = Resources.OrderBy(x => x.HttpStatus).ThenBy(x => x.URL);
            var processedResources = orderedResources.Where(x => x.Status == ResourceStatus.Processed);

            Nodes = new ObservableCollection <Node>
            {
                new Node("Resources", true, BitmapIcons.Folder, null,

                         new Node("By processing status", true, BitmapIcons.Folder, orderedResources,
                                  EnumHelper <ResourceStatus> .Values
                                  .Select(status => CreateNodeSplitByDomain(DescriptionExtractor.GetDescription(status), BitmapIcons.WorkStatuses.TryGetValue(status), orderedResources.Where(x => x.Status == status)))
                                  .ToArray()),

                         new Node("By http status", true, BitmapIcons.Folder, processedResources,
                                  from resource in processedResources
                                  group resource by resource.HttpStatus into grp
                                  orderby grp.Key
                                  select new Node(DescriptionExtractor.GetDescription(grp.Key), false, BitmapIcons.GetImageFromHttpStatus(grp.Key), grp, SplitByDomain(grp))),

                         new Node("By bucket", true, BitmapIcons.Folder, null,
                                  from resource in Resources
                                  group resource by resource.CurrentBucket ?? "" into grp
                                  where grp.Key != ""
                                  orderby grp.Count descending, grp.Key
                                  select new Node(grp.Key, false, BitmapIcons.Folder, grp)))
            };

            Model.PropertyChanged += Model_PropertyChanged;
        }
Exemplo n.º 2
0
 public Crawler(CrawlingContext context, int?limitResources = null)
 {
     Context         = context;
     Resources       = Context.Resources.ToDictionary(x => x.Url.AbsoluteUri);
     _limitResources = limitResources;
     WorkDispatcher  = new WorkDispatcher <Resource, BucketContext>(ProcessResource);
     WorkDispatcher.PropertyChanged += TrackerOnPropertyChanged;
 }
Exemplo n.º 3
0
        private void AddFile(CrawlingContext model)
        {
            DispatcherQueue queue     = new DispatcherQueue(Dispatcher);
            FileViewModel   viewModel = new FileViewModel(model, queue);

            Model.Files.Add(viewModel);
            Model.SelectedFile = viewModel;
        }
Exemplo n.º 4
0
        public static CrawlingContext DeserializeContext(XElement resourcesNode)
        {
            CrawlingContext context = new CrawlingContext();

            context.Resources.AddRange(DeserializeResourceCollection(resourcesNode));

            return(context);
        }
Exemplo n.º 5
0
        private void CrawlingPlay_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            if (Model.SelectedConfig == null)
            {
                return;
            }

            CrawlingContext context = Model.SelectedFile.Model;
            int             nbRetry = Int32.Parse(ConfigurationManager.AppSettings["NbRetry"]);

            context.Crawler = new Crawler(context, null);

            context.Crawler.OnCompleted += (_, __) =>
            {
                context.Status  = CrawlingStatus.Ready;
                context.Crawler = null;
                RefreshActions();
            };

            Dictionary <CrawlingBucket, WorkBucket <Resource, BucketContext> > bucketMapping = new Dictionary <CrawlingBucket, WorkBucket <Resource, BucketContext> >();

            foreach (CrawlingBucket bucket in Model.SelectedConfig.Buckets)
            {
                BucketContext bucketContext = new BucketContext(nbRetry);
                foreach (CrawlingHostMapping mapping in bucket.HostMappings)
                {
                    bucketContext.Hosts[mapping.Host] = mapping.IPAddress;
                }

                WorkBucket <Resource, BucketContext> workBucket = context.Crawler.AddBucket(bucket.Name, bucket.NbThreads, bucketContext);
                bucketMapping.Add(bucket, workBucket);
            }

            foreach (CrawlingRule rule in Model.SelectedConfig.Rules)
            {
                WorkBucket <Resource, BucketContext> workBucket = bucketMapping[rule.TargetBucket];
                context.Crawler.AddBehaviorRule(rule.Name, rule.Behavior, workBucket, rule.Conditions);
            }

            context.Crawler.Reprocess();
            //foreach (ResourceToProcess resourceToProcess in config.)
            //    context.Crawler.AddUrlToProcess(resourceToProcess.Url);
            foreach (CrawlingStartingUrl startingUrl in Model.SelectedConfig.StartingUrls)
            {
                context.Crawler.AddUrlToProcess(startingUrl.Value);
            }

            if (context.Crawler.WorkDispatcher.IsWorking)
            {
                context.Status = CrawlingStatus.Processing;
                RefreshActions();
            }
            else
            {
                MessageBox.Show(this, "Nothing to do", "Information", MessageBoxButton.OK, MessageBoxImage.Information);
            }
        }
Exemplo n.º 6
0
        private void CrawlingStop_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            CrawlingContext context = Model.SelectedFile.Model;

            if (context.Crawler.WorkDispatcher.IsWorking)
            {
                context.Status = CrawlingStatus.Stopping;
                Crawler crawler = context.Crawler;
                crawler.Stop();
                RefreshActions();
            }
        }
Exemplo n.º 7
0
        private void SaveFile(CrawlingContext file, bool saveAs)
        {
            FileInfo target = null;

            if (file.FullPath == null || saveAs)
            {
                // Configure save file dialog box
                SaveFileDialog dlg = new SaveFileDialog();
                dlg.FileName   = file.FullPath == null ? "CrawlingResult.cwl" : file.FullPath.FullName; // Default file name
                dlg.DefaultExt = ".cwl";                                                                // Default file extension
                dlg.Filter     = "Crawling result (.cwl)|*.cwl";                                        // Filter files by extension

                // Process save file dialog box results
                if (dlg.ShowDialog() == true)
                {
                    target = new FileInfo(dlg.FileName);
                }
                else
                {
                    return;
                }
            }
            else
            {
                target = file.FullPath;
            }

            if (target != null)
            {
                file.Status = CrawlingStatus.Saving;
                ThreadPool.QueueUserWorkItem(_ =>
                {
                    try
                    {
                        XElement element = ResourcesSerializer.SerializeResourceCollection(file.Resources);
                        element.Save(target.FullName);
                        file.FullPath   = target;
                        file.HasChanged = false;
                    }
                    catch (Exception e)
                    {
                        Dispatcher.Invoke((Action)(() => MessageBox.Show("Failed to save file " + target.Name + ": " + e.Message)));
                    }
                    finally
                    {
                        file.Status = CrawlingStatus.Ready;
                        RefreshActions();
                    }
                });
            }
        }
Exemplo n.º 8
0
        private void ImportUrls_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            CrawlingContext context = Model.SelectedFile.Model;

            if (context.Status == CrawlingStatus.Ready)
            {
                List <Uri>       urls         = ImportUrlsWindow.PromptUrls(this);
                HashSet <string> existingUrls = new HashSet <string>(context.Resources.Select(x => x.Url.AbsoluteUri));
                foreach (Uri url in urls)
                {
                    Uri urlWithoutSessionId = url.WithoutSession();
                    if (!existingUrls.Contains(urlWithoutSessionId.AbsoluteUri))
                    {
                        Resource resource = new Resource(urlWithoutSessionId, ResourceBehavior.Ignore);
                        resource.Status = ResourceStatus.ReadyToProcess;
                        context.Resources.Add(resource);
                        existingUrls.Add(urlWithoutSessionId.AbsoluteUri);
                    }
                }
            }
        }
Exemplo n.º 9
0
        private void LoadFile(FileInfo file)
        {
            CrawlingContext context = new CrawlingContext();

            context.FullPath   = new FileInfo(file.FullName);
            context.HasChanged = false;
            context.Status     = CrawlingStatus.Loading;
            AddFile(context);

            ThreadPool.QueueUserWorkItem(_ =>
            {
                XDocument doc             = XDocument.Load(file.FullName);
                List <Resource> resources = ResourcesSerializer.DeserializeResourceCollection(doc.Root);
                foreach (Resource resource in resources)
                {
                    context.Resources.Add(resource);
                }

                context.Status = CrawlingStatus.Ready;
                RefreshActions();
            });
        }
Exemplo n.º 10
0
        private void ReportingGenerate_Executed(object sender, ExecutedRoutedEventArgs e)
        {
            CrawlingContext context = Model.SelectedFile.Model;

            string defaultFileName = context.FullPath != null?Path.GetFileNameWithoutExtension(context.FullPath.FullName) : "Report";

            // Configure save file dialog box
            SaveFileDialog dlg = new SaveFileDialog();

            dlg.FileName   = defaultFileName;                 // Default file name
            dlg.DefaultExt = ".xlsx";                         // Default file extension
            dlg.Filter     = "Excel document (.xlsx)|*.xlsx"; // Filter files by extension

            // Process save file dialog box results
            if (dlg.ShowDialog() == true)
            {
                context.Status = CrawlingStatus.GeneratingReport;

                ThreadPool.QueueUserWorkItem(_ =>
                {
                    ExcelPackage pkg = new ExcelPackage();
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateRequestsReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateCachingReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateReferenceSummaryReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateReferencesReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateRedirectionReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateContentReport(context.Resources));
                    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateErrorsReport(context.Resources));
                    //foreach (ReportConfig reportConfig in Model.ReportConfigs)
                    //    ReportSerializer.AddReport(pkg, ReportGenerator.GenerateReport(reportConfig, context.Resources));

                    pkg.SaveAs(new FileInfo(dlg.FileName));

                    context.Status = CrawlingStatus.Ready;
                    RefreshActions();
                });
            }
        }