public static ISiteCrawler Create(SiteParameter siteParameter) { if (siteParameter == null) { throw new ArgumentNullException(nameof(siteParameter)); } string dataServiceName = GetValueOrDefault(siteParameter.CustomProcessors, "IDataService"); string htmlReaderName = GetValueOrDefault(siteParameter.CustomProcessors, "IHtmlReader"); string pageParserName = GetValueOrDefault(siteParameter.CustomProcessors, "IPageParser"); string itemReaderName = GetValueOrDefault(siteParameter.CustomProcessors, "IItemReader"); string pageReaderName = GetValueOrDefault(siteParameter.CustomProcessors, "IPageReader"); var context = CrawlerDbHelper.GetContext(); ParameterOverride contextParameter = new ParameterOverride("context", context); IDataService dataService = Container.Resolve <IDataService>(dataServiceName, contextParameter); IHtmlReader htmlReader = Container.Resolve <IHtmlReader>(htmlReaderName); ParameterOverride htmlReaderParameter = new ParameterOverride("htmlReader", htmlReader); ParameterOverride siteParameterParameter = new ParameterOverride("siteParameter", siteParameter); IItemReader itemReader = Container.Resolve <IItemReader>(itemReaderName, siteParameterParameter); ParameterOverride itemReaderParameter = new ParameterOverride("itemReader", itemReader); IPageReader pageReader = Container.Resolve <IPageReader>(pageReaderName, siteParameterParameter, htmlReaderParameter, itemReaderParameter); IPageParser pageParser = Container.Resolve <IPageParser>(pageParserName, siteParameterParameter, htmlReaderParameter); return(new GeneralSiteCrawler(pageReader, pageParser, dataService)); }
public static void Main(string[] args) { string connectionString = ConfigurationManager.ConnectionStrings["MySqlConStr"].ConnectionString; CrawlerDbHelper.Init(connectionString); using (var context = CrawlerDbHelper.GetContext()) { if (!context.Database.Exists()) { // TODO: Create database here } } string logLevel = ConfigurationManager.AppSettings["MultiLogger.LogLevel"]; if (!string.IsNullOrWhiteSpace(logLevel)) { LogType level = LogType.Information; Enum.TryParse <LogType>(logLevel, out level); Logging.LogLevel = level; } string fileLoggerPath = ConfigurationManager.AppSettings["FileLogger.Path"]; if (!string.IsNullOrWhiteSpace(fileLoggerPath)) { FileLogger fileLogger = new FileLogger(fileLoggerPath); Logging.Loggers.Add(fileLogger); } string configPath = ConfigurationManager.AppSettings["ConfigurationFile"]; if (string.IsNullOrWhiteSpace(configPath) || !File.Exists(configPath)) { Console.WriteLine("Configuration file missing. \nPress any key to exit..."); Console.ReadKey(); return; } string config = File.ReadAllText(configPath); List <SiteParameter> siteParameters = JsonConvert.DeserializeObject <List <SiteParameter> >(config); foreach (var parameter in siteParameters) { Logging.WriteEntry("Main", LogType.Information, $"Starting crawler for {parameter.SiteName}"); var crawler = CrawlerFactory.Create(parameter); crawler.Crawl(parameter); Logging.WriteEntry("Main", LogType.Information, $"Crawling {parameter.SiteName} done."); } #if DEBUG Console.WriteLine("Press any key to exit..."); Console.ReadKey(); #endif }
public GeneralSiteCrawler(SiteParameter siteParameter) { this.dataService = new DbDataService(CrawlerDbHelper.GetContext()); IItemReader itemReader = new RegexItemReader(siteParameter); IHtmlReader htmlReader = new HttpClientReader(); this.pageReader = new SequentialPageReader(siteParameter, htmlReader, itemReader); this.pageParser = new RegexPageParser(siteParameter, htmlReader); this.pageParser.SetErrorHandler((url, exception) => this.dataService.AddLog(new CrawlerLog { Url = url, LogTime = DateTime.Now, Message = exception.Message })); }