public UnitOfWork(ScraperContext ctx)
 {
     if (ctx == null)
     {
         throw new ArgumentNullException(nameof(ctx));
     }
     _ctx = ctx;
 }
Exemple #2
0
        public async void Begin()
        {
            Console.WriteLine("Scraper Started");

            while (true)
            {
                using (var dbContext = new ScraperContext())
                {
                    ScrapedUri nextTarget = await dbContext.ScrapeUri.Where(s => s.Scraped == false && s.ScrapeAttempts < 3).FirstOrDefaultAsync();

                    if (nextTarget != default)
                    {
                        WebPage webpage = await TryScrapeWebPage(nextTarget.AbsoluteUri);

                        if (webpage != null)
                        {
                            HtmlNode[] linkNodes = webpage.Html.CssSelect("a").ToArray();


                            for (int x = 0; x < linkNodes.Length; x++)
                            {
                                string link = linkNodes[x].GetAttributeValue("href");
                                if (link != null && link != "")
                                {
                                    Uri uri = LinkValidation.Validate(link, nextTarget.AbsoluteUri);

                                    if (uri != null)
                                    {
                                        ScrapedUri scrapeUri = new ScrapedUri
                                        {
                                            AbsoluteUri    = uri.AbsoluteUri,
                                            Scheme         = uri.Scheme,
                                            Host           = uri.Host,
                                            QueryParams    = uri.Query,
                                            FileType       = GetFileType(uri.Segments[uri.Segments.Length - 1]),
                                            ScrapeDataTime = DateTime.UtcNow
                                        };

                                        dbContext.Add(scrapeUri);
                                        Console.WriteLine("Adding:" + scrapeUri.AbsoluteUri);
                                    }
                                }
                            }
                            nextTarget.Scraped = true;
                        }

                        nextTarget.ScrapeAttempts++;
                        await dbContext.SaveChangesAsync();
                    }
                    else
                    {
                        Console.WriteLine("No more Uris to scrape");
                        break;
                    }
                }
            }
            Console.WriteLine("Scraper Finished");
        }
Exemple #3
0
 public Scraper()
 {
     using (var dbContext = new ScraperContext())
     {
         if (dbContext.ScrapeUri.Count() == 0)
         {
             Console.WriteLine("Adding start point");
             ScrapedUri scrapeUri = new ScrapedUri
             {
                 AbsoluteUri = "http://demo.com",
                 Scheme      = "http",
                 Host        = "demo.com",
                 QueryParams = ""
             };
             dbContext.Add(scrapeUri);
         }
     }
 }
Exemple #4
0
        public static IContainer BuildContainer(ILoggerFactory factory)
        {
            var builder = new ContainerBuilder();

            var assemblyTypes = Assembly.GetExecutingAssembly().GetTypes().Where(x => x.GetInterfaces().Any()).ToArray();

            builder.RegisterTypes(assemblyTypes).AsImplementedInterfaces();

            var contextOptionsBuilder = new DbContextOptionsBuilder <ScraperContext>();
            var connectionString      = Environment.GetEnvironmentVariable(ConnectionStringName);
            DbContextOptions <ScraperContext> dbContextOptions = contextOptionsBuilder.UseSqlServer(connectionString).Options;

            var scraperContext = new ScraperContext(dbContextOptions);

            builder.RegisterInstance(scraperContext).As <DbContext>();

            builder.RegisterType <WriteOnlyRepository <ShowDal> >().As <IWriteOnlyRepository <ShowDal> >();
            builder.RegisterType <ReadOnlyRepository <ShowDal> >().As <IReadOnlyRepository <ShowDal> >();

            var logger = factory.CreateLogger(nameof(ContainerConfig));

            builder.RegisterInstance(logger).As <ILogger>();

            builder.Register(c => new MapperConfiguration(cfg =>
            {
                cfg.CreateMap <Actor, ActorDal>().ForMember(actor => actor.DateOfBirth, config => config.MapFrom(src => src.Birthday));
                cfg.CreateMap <ActorDal, Actor>().ForMember(actor => actor.Birthday, config => config.MapFrom(src => src.DateOfBirth));

                cfg.CreateMap <ShowWithCast, ShowDal>().ForMember(show => show.Timestamp, config => config.MapFrom(src => src.Updated));
                cfg.CreateMap <ShowDal, ShowWithCast>().ForMember(show => show.Updated, config => config.MapFrom(src => src.Timestamp));

                cfg.CreateMap <Show, ShowDal>().ReverseMap();
            })).AsImplementedInterfaces().SingleInstance();

            builder.Register(c => c.Resolve <IConfigurationProvider>().CreateMapper())
            .As <IMapper>();

            return(builder.Build());
        }
Exemple #5
0
 public SessionRepo(ScraperContext _ctx)
 {
     ctx = _ctx;
 }
Exemple #6
0
 public ScraperService(ScraperContext _ctx)
 {
     ctx         = _ctx;
     resultRepo  = new ScraperSingleResultRepo(ctx);
     sessionRepo = new SessionRepo(ctx);
 }
Exemple #7
0
        //TODO Remove for prod
        private void TruncateTable()
        {
            ScraperContext context = new ScraperContext();

            context.Database.ExecuteSqlInterpolated($"TRUNCATE TABLE ScrapeUri");
        }
Exemple #8
0
 public ScrapeRepository(ScraperContext context)
 {
     _context = context;
 }
 public ScraperSingleResultRepo(ScraperContext _ctx)
 {
     ctx = _ctx;
 }
 public ScraperModelsController(ScraperContext context)
 {
     _context = context;
 }
 public IndexModel(ScraperContext context)
 {
     _context = context;
 }
Exemple #12
0
 public ScraperController(IData data, ScraperContext context)
 {
     _Idata   = data;
     _context = context;
 }
Exemple #13
0
 public ScraperController(ScraperContext ctx)
 {
     Ctx            = ctx;
     scraperService = new ScraperService(Ctx);
     sessionRepo    = new SessionRepo(Ctx);
 }
 public ErrorModel(ScraperContext context)
 {
     _context = context;
 }
Exemple #15
0
 public ShowWithCastRepository(ScraperContext context)
 {
     _context = context;
 }
Exemple #16
0
 public OutputModel(ScraperContext context)
 {
     _context = context;
 }