public void SetSettings(UrlItem urlItem, HostSetting settings) { try { using (var ctx = CreateContext()) { var existing = ctx.HostSettings.SingleOrDefault(s => s.Host == urlItem.Host); var newSettings = existing ?? settings; newSettings.Host = urlItem.Host; // ensure the host is correct // update if exists if (existing != null) { newSettings.CrawlDelay = settings.CrawlDelay; newSettings.RobotsTxt = settings.RobotsTxt; newSettings.Disallow = settings.Disallow; } ctx.HostSettings.Add(newSettings); ctx.Commit(); } _logger.SettingsStored(urlItem); } catch (Exception err) { _logger.LogError(urlItem, err); throw; } }
/// <summary> /// The method inserts to the DB initial values and must be called from Seed() method of the Configuration class /// </summary> /// <param name="ctx"></param> internal static void SeedDefaults(CrawlerDbContext ctx) { #region rules var linkRule = new CrawlRule { DataType = DataBlockType.Link, RegExpression = "(<a.*?>.*?</a>)", Name = "Link" }; var picRule = new CrawlRule { DataType = DataBlockType.Picture, RegExpression = "<(img)\b[^>]*>", Name = "Picture" }; var videoRule = new CrawlRule { DataType = DataBlockType.Video, RegExpression = @"(?<=<iframe[^>]*?)(?:\s*width=[""'](?<width>[^""']+)[""']|\s*height=[""'](?<height>[^'""]+)[""']|\s*src=[""'](?<src>[^'""]+[""']))+[^>]*?>", Name = "Video" }; ctx.CrawlRules.AddOrUpdate(r => r.Name, linkRule, picRule, videoRule); #endregion #region settings var defaultSettings = new HostSetting { Host = string.Empty, CrawlDelay = 60, Disallow = string.Empty, RobotsTxt = string.Empty }; ctx.HostSettings.AddOrUpdate(s => s.Host, defaultSettings); #endregion #region urls var defaultUrl = new UrlItem { Url = "http://binary-notes.ru", Host = "binary-notes.ru" }; //ctx.UrlItems.AddOrUpdate(s => s.Url, defaultUrl); #endregion }
public void Should_return_settings_for_host() { using (_db.CreateTransaction()) { const string testUrl = "http://sub.testhost.com/page?param=1¶m=2"; const string testHost = "testhost.com"; #region add test settings for some host var testSettings = new HostSetting { CrawlDelay = 60, Disallow = null, Host = testHost, RobotsTxt = string.Empty }; using (var ctx = _db.CreateDbContext()) { ctx.HostSettings.Add(testSettings); ctx.Commit(); } #endregion #region get settings for host var urlItem = new UrlItem { Url = testUrl, Host = testHost }; var settingsRep = new CrawlerSettingsRepository(Mock.Of <IActivityLogRepository>()); var hostSettings = settingsRep.GetSettings(urlItem); #endregion Assert.NotNull(hostSettings); Assert.Equal(testSettings.Host, hostSettings.Host); Assert.Equal(testSettings.CrawlDelay, hostSettings.CrawlDelay); Assert.Equal(testSettings.RobotsTxt, hostSettings.RobotsTxt); Assert.Equal(testSettings.Disallow, hostSettings.Disallow); } }
private void RetrieveHostSetting(string hostName) { try { HostSetting.StaticScope = ManagementHelper.GetScope(typeof(HostSetting), Catalog.Instance, Catalog.Database); foreach (HostSetting setting in HostSetting.GetInstances()) { if (hostName == setting.Name) { hostSetting = setting; } } } finally { HostSetting.StaticScope = null; } }
public void Should_store_host_settings() { using (_db.CreateTransaction()) { const string testUrl = "http://sub.testhost.com/page?param=1¶m=2"; const string testHost = "testhost.com"; var testSetting = new HostSetting { CrawlDelay = 60, Disallow = null, Host = testHost, RobotsTxt = string.Empty }; #region get settings for host var urlItem = new UrlItem { Url = testUrl, Host = testHost }; var settingsRep = new CrawlerSettingsRepository(Mock.Of <IActivityLogRepository>()); settingsRep.SetSettings(urlItem, testSetting); #endregion using (var ctx = _db.CreateDbContext()) { var storedSetting = ctx.HostSettings.Single(s => s.Host == testHost); Assert.Equal(testSetting.Host, storedSetting.Host); Assert.Equal(testSetting.CrawlDelay, storedSetting.CrawlDelay); Assert.Equal(testSetting.RobotsTxt, storedSetting.RobotsTxt); Assert.Equal(testSetting.Disallow, storedSetting.Disallow); } } }
public static void Request(HostSetting hostSetting, WebLocation location, IDictionary data, Action <WebResult> OnComplete = null, int retry = 0, float retryDelay = 0f) { Request(hostSetting.host, location, data, OnComplete, retry, retryDelay); }
public Web(HostSetting hostSetting, WebLocation location, IDictionary data, int retry = 0, float retryDelay = 0f) : this(hostSetting.host, location, data, retry, retryDelay) { }