Example #1
0
        public static ISiteCrawler Create(SiteParameter siteParameter)
        {
            if (siteParameter == null)
            {
                throw new ArgumentNullException(nameof(siteParameter));
            }

            string dataServiceName = GetValueOrDefault(siteParameter.CustomProcessors, "IDataService");
            string htmlReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IHtmlReader");
            string pageParserName  = GetValueOrDefault(siteParameter.CustomProcessors, "IPageParser");
            string itemReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IItemReader");
            string pageReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IPageReader");

            var context = CrawlerDbHelper.GetContext();
            ParameterOverride contextParameter = new ParameterOverride("context", context);
            IDataService      dataService      = Container.Resolve <IDataService>(dataServiceName, contextParameter);

            IHtmlReader       htmlReader          = Container.Resolve <IHtmlReader>(htmlReaderName);
            ParameterOverride htmlReaderParameter = new ParameterOverride("htmlReader", htmlReader);

            ParameterOverride siteParameterParameter = new ParameterOverride("siteParameter", siteParameter);

            IItemReader       itemReader          = Container.Resolve <IItemReader>(itemReaderName, siteParameterParameter);
            ParameterOverride itemReaderParameter = new ParameterOverride("itemReader", itemReader);

            IPageReader pageReader = Container.Resolve <IPageReader>(pageReaderName, siteParameterParameter, htmlReaderParameter, itemReaderParameter);

            IPageParser pageParser = Container.Resolve <IPageParser>(pageParserName, siteParameterParameter, htmlReaderParameter);

            return(new GeneralSiteCrawler(pageReader, pageParser, dataService));
        }
Example #2
0
        public static void Main(string[] args)
        {
            string connectionString = ConfigurationManager.ConnectionStrings["MySqlConStr"].ConnectionString;

            CrawlerDbHelper.Init(connectionString);
            using (var context = CrawlerDbHelper.GetContext())
            {
                if (!context.Database.Exists())
                {
                    // TODO: Create database here
                }
            }

            string logLevel = ConfigurationManager.AppSettings["MultiLogger.LogLevel"];

            if (!string.IsNullOrWhiteSpace(logLevel))
            {
                LogType level = LogType.Information;
                Enum.TryParse <LogType>(logLevel, out level);
                Logging.LogLevel = level;
            }

            string fileLoggerPath = ConfigurationManager.AppSettings["FileLogger.Path"];

            if (!string.IsNullOrWhiteSpace(fileLoggerPath))
            {
                FileLogger fileLogger = new FileLogger(fileLoggerPath);
                Logging.Loggers.Add(fileLogger);
            }

            string configPath = ConfigurationManager.AppSettings["ConfigurationFile"];

            if (string.IsNullOrWhiteSpace(configPath) || !File.Exists(configPath))
            {
                Console.WriteLine("Configuration file missing. \nPress any key to exit...");
                Console.ReadKey();
                return;
            }

            string config = File.ReadAllText(configPath);
            List <SiteParameter> siteParameters = JsonConvert.DeserializeObject <List <SiteParameter> >(config);

            foreach (var parameter in siteParameters)
            {
                Logging.WriteEntry("Main", LogType.Information, $"Starting crawler for {parameter.SiteName}");
                var crawler = CrawlerFactory.Create(parameter);
                crawler.Crawl(parameter);
                Logging.WriteEntry("Main", LogType.Information, $"Crawling {parameter.SiteName} done.");
            }

            #if DEBUG
            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
            #endif
        }
Example #3
0
        public GeneralSiteCrawler(SiteParameter siteParameter)
        {
            this.dataService = new DbDataService(CrawlerDbHelper.GetContext());
            IItemReader itemReader = new RegexItemReader(siteParameter);

            IHtmlReader htmlReader = new HttpClientReader();

            this.pageReader = new SequentialPageReader(siteParameter, htmlReader, itemReader);
            this.pageParser = new RegexPageParser(siteParameter, htmlReader);
            this.pageParser.SetErrorHandler((url, exception) =>
                                            this.dataService.AddLog(new CrawlerLog
            {
                Url     = url,
                LogTime = DateTime.Now,
                Message = exception.Message
            }));
        }