public async Task RegisterProjectMessage(string json) { JSchemaValidatingReader jSchemaReader = new JSchemaValidatingReader(new JsonTextReader(new StringReader(json))) { Schema = JSchema.Parse(File.ReadAllText("requestSchema.json")) }; IList <string> errorMessages = new List <string>(); jSchemaReader.ValidationEventHandler += (o, a) => errorMessages.Add(a.Message); JsonSerializer serializer = new JsonSerializer(); PortiaRequest request = serializer.Deserialize <PortiaRequest>(jSchemaReader); if (errorMessages.Count > 0) { throw new HubException("The json provided was invalid"); } if (ProjectCrawlerById.ContainsKey(request.Id)) { throw new HubException("Project already registered"); } ProjectCrawler newProjectCrawler = new ProjectCrawler(request); if (ProjectCrawlerById.TryAdd(newProjectCrawler.portiaRequest.Id, newProjectCrawler) == false) { throw new HubException("Project registration failed"); } await Clients.All.SendAsync("RegisterProjectReply", newProjectCrawler.portiaRequest); }
public int maxConcurrentDownload = 3; // Should be in the puppeteerWrapper public ProjectCrawler(PortiaRequest request) { portiaRequest = request; portiaRequest.Jobs.ForEach((job) => { dataByJobName.TryAdd(job.Name, new JArray()); // Initialize a new Key-value Pair for each Job. job.Nodes.ForEach(jobNode => xpathsToWaitFor.AddRange(GetAllXpath(jobNode))); // Creates the list of xpathsToWaitFor. }); // Configure which crawler to use: if (portiaRequest.IsFixedListOfUrls) // static list { FilterAndAddUrls(portiaRequest.StartUrls, ref urlsToFixedList); } else // traversable { FilterAndAddUrls(portiaRequest.StartUrls, ref currentQueuedUrls); } }