Exemple #1
0
 public PageActor(string webSocket, XpathConfigurationForPuppeteer configuration)
 {
     PageActorName = Self.Path.Name;
     WebSocket     = webSocket;
     Configuration = configuration;
     ActorTaskScheduler.RunTask(async() =>
     {
         await PreparePage();
     });
 }
Exemple #2
0
 public BrowserActor(XpathConfigurationForPuppeteer xpathConfiguration)
 {
     XpathConfiguration = xpathConfiguration;
     BrowserActorName   = Self.Path.Name;
     AmountOfPages      = 3;
     // TODO Add Chromium instance handling
     // TODO Add Chromium instance handling
     // TODO Add Chromium instance handling
     // TODO Add Chromium instance handling
     // TODO Add Chromium instance handling
 }
Exemple #3
0
        public ProjectActor(ProjectDefinition projectDefinition)
        {
            ProjectActorName = Self.Path.Name;
            List <ScraperSchema>           scraperSchemas       = projectDefinition.ScraperSchemas;
            XpathConfigurationForPuppeteer crawlerConfiguration = new XpathConfigurationForPuppeteer(projectDefinition);

            Context.ActorOf(Props.Create <UrlParserActor>(), ActorPaths.UrlParser.Name);                     // TODO Add coordinator for scaling
            Context.ActorOf(Props.Create <ObjectParserActor>(scraperSchemas), ActorPaths.ObjectParser.Name); // TODO Add coordinator for scaling
            Context.ActorOf(Props.Create <BrowserActor>(crawlerConfiguration), ActorPaths.Browser.Name);     // TODO Add coordinator for scaling

            #region SingleTons
            IActorRef urlTracker = Context.ActorOf(Props.Create <UrlTrackerActor>(projectDefinition), ActorPaths.UrlTracker.Name); // there can NEVER be more than ONE instance!!!!
            Context.ActorOf(Props.Create <ObjectTrackerActor>(scraperSchemas), ActorPaths.ObjectTracker.Name);                     // there can NEVER be more than ONE instance!!!!
            #endregion
            urlTracker.Tell(new UnprocessedUrlsMessage(projectDefinition.StartUrls));                                              // START URL
        }