Ejemplo n.º 1
0
        public static async Task Main()
        {
            var s3Connector = new S3Connector(RegionEndpoint.EUWest1, "jobposts-scraped");
            IEnumerable <S3Object> jobPostsKeys = await s3Connector.GetFiles(to : new DateTime(2020, 05, 08));

            IEnumerable <JobPost> jobPostsObjs = await s3Connector.GetFileContents(jobPostsKeys);

            JobPostRepo.Add(jobPostsObjs);
            Console.WriteLine("Starting the RUN()");
            new Program().Run();
        }
Ejemplo n.º 2
0
        public void JobPostRepoAddTest()
        {
            var jobPost1 = new JobPost("Best Job for you", "Full stack developer is needed");
            var jobPost2 = new JobPost("Best Job", ".Net developer is needed");
            var jobPosts = new List <JobPost>
            {
                jobPost1,
                jobPost2
            };

            Assert.Equal(0, JobPostRepo.Get().Count);

            JobPostRepo.Add(jobPosts);

            Assert.Equal(2, JobPostRepo.Get().Count);
        }
Ejemplo n.º 3
0
        public static async Task Main(string[] args)
        {
            const string           bucketName   = "jobposts-scraped";
            var                    s3Connector  = new S3Connector(RegionEndpoint.EUWest1, bucketName);
            ICollection <S3Object> jobPostsKeys = await s3Connector.GetFiles(from : DateTime.Now);

            ICollection <JobPost> jobPostsObjs = await s3Connector.GetFileContents(jobPostsKeys);

            JobPostRepo.Add(jobPostsObjs);
            Console.WriteLine("Starting the RUN()");
            if (args.Length > 0 && !string.IsNullOrEmpty(args[0]) && !string.IsNullOrEmpty(args[1]))
            {
                await new Program().RunAsync(args[0], Convert.ToInt32(args[1]));
            }
            else
            {
                await new Program().RunAsync();
            }
        }
Ejemplo n.º 4
0
        public async Task RunAsync(string endpoint = "localhost", int port = 8182)
        {
            try
            {
                var skills = new List <Skill>();

                // This uses the default Neptune and Gremlin port, 8182
                var gremlinDB = new GremlinDB(endpoint, port);

                // Drop entire DB
                //gremlinDB.Drop();

                // get job posts
                var jobPosts = JobPostRepo.Get();
                //var jobPosts = JobPostRepo.GetJobPosts();

                // load csv data for skills
                skills = LoadDataToMemory();
                // skills into DB
                Stopwatch stopWatch  = new Stopwatch();
                Stopwatch stopWatch1 = new Stopwatch();
                stopWatch.Start();
                stopWatch1.Start();
                gremlinDB.InsertNodes(skills);
                Console.WriteLine(stopWatch.Elapsed);
                Console.WriteLine("\tEND inserting NODES\n");

                // edges into DB
                IJobPostProcessor jobPostProcessor = new JobPostProcessor();
                Console.WriteLine("Start processing JOB POSTS");
                stopWatch.Restart();
                var jobPostsSkills = jobPostProcessor.ProcessJobPosts(skills, jobPosts);
                Console.WriteLine(stopWatch.Elapsed);
                Console.WriteLine("\tEND iprocessing JOB POSTS\n");

                Console.WriteLine("Start inserting EDGES");
                stopWatch.Restart();
                gremlinDB.InsertEdges(jobPostsSkills);
                Console.WriteLine(stopWatch.Elapsed);
                Console.WriteLine("\tEND inserting EDGES\n");

                // get related skills
                const string skillNameForSearch = "c#";
                const int    limit = 10;

                Console.WriteLine("Start RELATED skills");
                var relatedSkills = gremlinDB.GetRelatedSkills(skillNameForSearch, limit);
                Console.WriteLine(stopWatch1.Elapsed);

                Console.WriteLine($"Top {limit} skills related to {skillNameForSearch}:\n");
                foreach (var skill in relatedSkills)
                {
                    Console.WriteLine($"Name: {skill.Name}, Category: {skill.Category}, Weight: {skill.Weight}");
                }

                Console.WriteLine("\n\nTotal number of skills: {0}", gremlinDB.CountNodes());

                await Helpers.Metrics.CommitDataAsync();

                Console.WriteLine("Finished");
            }
            catch (Exception e)
            {
                Console.WriteLine("{0}", e);
            }
        }