示例#1
0
        public async Task RegisterProjectMessage(string json)
        {
            JSchemaValidatingReader jSchemaReader = new JSchemaValidatingReader(new JsonTextReader(new StringReader(json)))
            {
                Schema = JSchema.Parse(File.ReadAllText("requestSchema.json"))
            };
            IList <string> errorMessages = new List <string>();

            jSchemaReader.ValidationEventHandler += (o, a) => errorMessages.Add(a.Message);
            JsonSerializer serializer = new JsonSerializer();
            PortiaRequest  request    = serializer.Deserialize <PortiaRequest>(jSchemaReader);

            if (errorMessages.Count > 0)
            {
                throw new HubException("The json provided was invalid");
            }

            if (ProjectCrawlerById.ContainsKey(request.Id))
            {
                throw new HubException("Project already registered");
            }

            ProjectCrawler newProjectCrawler = new ProjectCrawler(request);


            if (ProjectCrawlerById.TryAdd(newProjectCrawler.portiaRequest.Id, newProjectCrawler) == false)
            {
                throw new HubException("Project registration failed");
            }
            await Clients.All.SendAsync("RegisterProjectReply", newProjectCrawler.portiaRequest);
        }
示例#2
0
        public int maxConcurrentDownload     = 3; // Should be in the puppeteerWrapper
        public ProjectCrawler(PortiaRequest request)
        {
            portiaRequest = request;
            portiaRequest.Jobs.ForEach((job) =>
            {
                dataByJobName.TryAdd(job.Name, new JArray());                                 // Initialize a new Key-value Pair for each Job.
                job.Nodes.ForEach(jobNode => xpathsToWaitFor.AddRange(GetAllXpath(jobNode))); // Creates the list of xpathsToWaitFor.
            });

            // Configure which crawler to use:
            if (portiaRequest.IsFixedListOfUrls) // static list
            {
                FilterAndAddUrls(portiaRequest.StartUrls, ref urlsToFixedList);
            }
            else // traversable
            {
                FilterAndAddUrls(portiaRequest.StartUrls, ref currentQueuedUrls);
            }
        }