//-------------------------------------------// public ManagerUpdate() { // initialize the pause spinner Pause = new SpinWait(); // initialize on start manager to log start events OnStart = new TaskMachine(); // initialize on end manager to log end events OnEnd = new TaskMachine(); Log.Line("RUNNING Efz PROGRAM"); #if INFO OnStart.OnTask = new ActionSet <string>(s => { if (!string.IsNullOrEmpty(s)) { Log.Info("Started '" + s + "'"); } }); OnStart.AddOnDone(() => Log.Line()); #endif // create needles Polling = new NeedleRhythmic("Polling", 1000 / 10, 050); // 10 updates per second - time related tasks Iterant = new NeedleRhythmic("Iterant", 1000 / 01, 010); // 1 update per second - low resolution updates Control = new NeedleDynamic("Control"); // continual updates - non-update tasks // initialize the task scheduler to handle task execution by default TaskScheduler = new EfzTaskScheduler(); // create and add thread handles for (int i = ThreadCount - 2; i >= 0; --i) { new ThreadHandle(SystemInformation.Processor64Bit ? Global.Megabyte * 8 : Global.Megabyte * 4); } // main process thread handler new ThreadHandle(0); // add needles to handles foreach (ThreadHandle handle in ThreadHandle.Handles.TakeItem()) { handle.Add(Polling); handle.Add(Control); handle.Add(Iterant); } ThreadHandle.Handles.Release(); }
/// <summary> /// Setup the session with a configuration Node. /// </summary> protected void OnConfigLoad(Configuration config) { _lock.Take(); // skip if no longer running if (!Running) { _lock.Release(); return; } // persist the config node Node node = config.Node; // setup updater watch _updater = new Watch(1000, true, Update, false); // number of crawlers to initialize with _crawlerCount = node.Default((ThreadHandle.HandleCount + 1) / 2, "Crawler_Count"); // url control parameters UrlControl.UrlBufferSize = node.Default(20, "Url_Buffer"); // parse control parameters ParseControl.HostNewScore = node.Default(50, "Host_New_Score"); ParseControl.HostParseScore = node.Default(-20, "Host_Parse_Score"); ParseControl.HostAttemptScore = node.Default(-40, "Host_Attempt_Score"); ParseControl.HostAssetScore = node.Default(10, "Host_Asset_Score"); ParseControl.HostMaxScore = node.Default(1000, "Host_Max_Score"); // crawler parameters CrawlerUrlBufferSize = node.Default(1000, "Url_Crawl_Buffer"); CrawlerByteBuffer = node.Default(4096, "Byte_Buffer"); CrawlerMaxBytes = (int)node.Default(Global.Kilobyte * 100, "Max_Bytes"); CrawlerMinBytes = (int)node.Default(Global.Kilobyte * 55, "Min_Bytes"); CrawlerMaxConnectTimeout = node.Default(5000, "Max_Connect_Timeout"); CrawlerMinConnectTimeout = node.Default(3000, "Min_Connect_Timeout"); CrawlerMaxProcessTimeout = node.Default(10000, "Max_Process_Timeout"); CrawlerMinProcessTimeout = node.Default(7000, "Min_Process_Timeout"); CrawlerMaxCookieCount = node.Default(8, "Max_Cookie_Count"); CrawlerMaxCookieSize = node.Default(3435, "Max_Cookie_Size"); // were any identities specified? if (node["Identities"].ArraySet) { // iterate the defined headers foreach (Node identityNode in node["Identities"].Array) { if (identityNode.DictionarySet) { // start the name value collection Identity identity = new Identity(true); // iterate the headers in the identity foreach (KeyValuePair <string, Node> entry in identityNode.Dictionary) { identity.Add(entry.Key, entry.Value.String); } } } } // have the url root files been set? if (node["Files"].ArraySet) { // yes, add the root file paths to the file extractor foreach (Node pathNode in node["Files"].Array) { if (!pathNode["Parsed"].Bool) { _lock.Release(); AddUrlFile(pathNode.String); _lock.Take(); } } } // run the task machine to start the session TaskMachine tm = new TaskMachine(); tm.AddOnDone(() => _updater.Run = true); tm.Add("Set up parse control", ParseControl.Start); tm.Add("Set up URL control", UrlControl.Start); #if INFO tm.OnTask = new ActionSet <string>(s => Log.Info(s)); #endif tm.Run(); config.Save(); _lock.Release(); Log.Debug("Crawler session configuration loaded."); }