/// <summary> /// Tries to create a new task machine. /// </summary> /// <param name="userTask">Task</param> internal static void TryCreateTaskMachine(Task userTask) { PSharpRuntime.Assert(PSharpRuntime.TaskScheduler is TaskWrapperScheduler, "Unable to wrap " + "the task in a machine, because the task wrapper scheduler is not enabled.\n"); TaskMachine taskMachine = new TaskMachine(PSharpRuntime.TaskScheduler as TaskWrapperScheduler, userTask); MachineId mid = taskMachine.Id; Output.Log("<CreateLog> TaskMachine({0}) is created.", mid.MVal); Task task = new Task(() => { PSharpRuntime.BugFinder.NotifyTaskStarted(); taskMachine.Run(); PSharpRuntime.BugFinder.NotifyTaskCompleted(); }); lock (PSharpRuntime.Lock) { PSharpRuntime.MachineTasks.Add(task); } PSharpRuntime.BugFinder.NotifyNewTaskCreated(task.Id, taskMachine); if (PSharpRuntime.Configuration.ScheduleIntraMachineConcurrency) { task.Start(PSharpRuntime.TaskScheduler); } else { task.Start(); } PSharpRuntime.BugFinder.WaitForTaskToStart(task.Id); PSharpRuntime.BugFinder.Schedule(); }
/// <summary> /// Setup the session with a configuration Node. /// </summary> protected void OnConfigLoad(Configuration config) { _lock.Take(); // skip if no longer running if (!Running) { _lock.Release(); return; } // persist the config node Node node = config.Node; // setup updater watch _updater = new Watch(1000, true, Update, false); // number of crawlers to initialize with _crawlerCount = node.Default((ThreadHandle.HandleCount + 1) / 2, "Crawler_Count"); // url control parameters UrlControl.UrlBufferSize = node.Default(20, "Url_Buffer"); // parse control parameters ParseControl.HostNewScore = node.Default(50, "Host_New_Score"); ParseControl.HostParseScore = node.Default(-20, "Host_Parse_Score"); ParseControl.HostAttemptScore = node.Default(-40, "Host_Attempt_Score"); ParseControl.HostAssetScore = node.Default(10, "Host_Asset_Score"); ParseControl.HostMaxScore = node.Default(1000, "Host_Max_Score"); // crawler parameters CrawlerUrlBufferSize = node.Default(1000, "Url_Crawl_Buffer"); CrawlerByteBuffer = node.Default(4096, "Byte_Buffer"); CrawlerMaxBytes = (int)node.Default(Global.Kilobyte * 100, "Max_Bytes"); CrawlerMinBytes = (int)node.Default(Global.Kilobyte * 55, "Min_Bytes"); CrawlerMaxConnectTimeout = node.Default(5000, "Max_Connect_Timeout"); CrawlerMinConnectTimeout = node.Default(3000, "Min_Connect_Timeout"); CrawlerMaxProcessTimeout = node.Default(10000, "Max_Process_Timeout"); CrawlerMinProcessTimeout = node.Default(7000, "Min_Process_Timeout"); CrawlerMaxCookieCount = node.Default(8, "Max_Cookie_Count"); CrawlerMaxCookieSize = node.Default(3435, "Max_Cookie_Size"); // were any identities specified? if (node["Identities"].ArraySet) { // iterate the defined headers foreach (Node identityNode in node["Identities"].Array) { if (identityNode.DictionarySet) { // start the name value collection Identity identity = new Identity(true); // iterate the headers in the identity foreach (KeyValuePair <string, Node> entry in identityNode.Dictionary) { identity.Add(entry.Key, entry.Value.String); } } } } // have the url root files been set? if (node["Files"].ArraySet) { // yes, add the root file paths to the file extractor foreach (Node pathNode in node["Files"].Array) { if (!pathNode["Parsed"].Bool) { _lock.Release(); AddUrlFile(pathNode.String); _lock.Take(); } } } // run the task machine to start the session TaskMachine tm = new TaskMachine(); tm.AddOnDone(() => _updater.Run = true); tm.Add("Set up parse control", ParseControl.Start); tm.Add("Set up URL control", UrlControl.Start); #if INFO tm.OnTask = new ActionSet <string>(s => Log.Info(s)); #endif tm.Run(); config.Save(); _lock.Release(); Log.Debug("Crawler session configuration loaded."); }