예제 #1
0
        //-------------------------------------------//

        public ManagerUpdate()
        {
            // initialize the pause spinner
            Pause = new SpinWait();
            // initialize on start manager to log start events
            OnStart = new TaskMachine();
            // initialize on end manager to log end events
            OnEnd = new TaskMachine();

            Log.Line("RUNNING Efz PROGRAM");

      #if INFO
            OnStart.OnTask = new ActionSet <string>(s => { if (!string.IsNullOrEmpty(s))
                                                           {
                                                               Log.Info("Started '" + s + "'");
                                                           }
                                                    });
            OnStart.AddOnDone(() => Log.Line());
      #endif

            // create needles
            Polling = new NeedleRhythmic("Polling", 1000 / 10, 050); // 10 updates per second - time related tasks
            Iterant = new NeedleRhythmic("Iterant", 1000 / 01, 010); //  1 update  per second - low resolution updates
            Control = new NeedleDynamic("Control");                  //     continual updates - non-update tasks

            // initialize the task scheduler to handle task execution by default
            TaskScheduler = new EfzTaskScheduler();

            // create and add thread handles
            for (int i = ThreadCount - 2; i >= 0; --i)
            {
                new ThreadHandle(SystemInformation.Processor64Bit ? Global.Megabyte * 8 : Global.Megabyte * 4);
            }
            // main process thread handler
            new ThreadHandle(0);

            // add needles to handles
            foreach (ThreadHandle handle in ThreadHandle.Handles.TakeItem())
            {
                handle.Add(Polling);
                handle.Add(Control);
                handle.Add(Iterant);
            }
            ThreadHandle.Handles.Release();
        }
예제 #2
0
        /// <summary>
        /// Setup the session with a configuration Node.
        /// </summary>
        protected void OnConfigLoad(Configuration config)
        {
            _lock.Take();

            // skip if no longer running
            if (!Running)
            {
                _lock.Release();
                return;
            }

            // persist the config node
            Node node = config.Node;

            // setup updater watch
            _updater = new Watch(1000, true, Update, false);

            // number of crawlers to initialize with
            _crawlerCount = node.Default((ThreadHandle.HandleCount + 1) / 2, "Crawler_Count");

            // url control parameters
            UrlControl.UrlBufferSize = node.Default(20, "Url_Buffer");

            // parse control parameters
            ParseControl.HostNewScore     = node.Default(50, "Host_New_Score");
            ParseControl.HostParseScore   = node.Default(-20, "Host_Parse_Score");
            ParseControl.HostAttemptScore = node.Default(-40, "Host_Attempt_Score");
            ParseControl.HostAssetScore   = node.Default(10, "Host_Asset_Score");
            ParseControl.HostMaxScore     = node.Default(1000, "Host_Max_Score");

            // crawler parameters
            CrawlerUrlBufferSize     = node.Default(1000, "Url_Crawl_Buffer");
            CrawlerByteBuffer        = node.Default(4096, "Byte_Buffer");
            CrawlerMaxBytes          = (int)node.Default(Global.Kilobyte * 100, "Max_Bytes");
            CrawlerMinBytes          = (int)node.Default(Global.Kilobyte * 55, "Min_Bytes");
            CrawlerMaxConnectTimeout = node.Default(5000, "Max_Connect_Timeout");
            CrawlerMinConnectTimeout = node.Default(3000, "Min_Connect_Timeout");
            CrawlerMaxProcessTimeout = node.Default(10000, "Max_Process_Timeout");
            CrawlerMinProcessTimeout = node.Default(7000, "Min_Process_Timeout");
            CrawlerMaxCookieCount    = node.Default(8, "Max_Cookie_Count");
            CrawlerMaxCookieSize     = node.Default(3435, "Max_Cookie_Size");

            // were any identities specified?
            if (node["Identities"].ArraySet)
            {
                // iterate the defined headers
                foreach (Node identityNode in node["Identities"].Array)
                {
                    if (identityNode.DictionarySet)
                    {
                        // start the name value collection
                        Identity identity = new Identity(true);

                        // iterate the headers in the identity
                        foreach (KeyValuePair <string, Node> entry in identityNode.Dictionary)
                        {
                            identity.Add(entry.Key, entry.Value.String);
                        }
                    }
                }
            }

            // have the url root files been set?
            if (node["Files"].ArraySet)
            {
                // yes, add the root file paths to the file extractor
                foreach (Node pathNode in node["Files"].Array)
                {
                    if (!pathNode["Parsed"].Bool)
                    {
                        _lock.Release();
                        AddUrlFile(pathNode.String);
                        _lock.Take();
                    }
                }
            }

            // run the task machine to start the session
            TaskMachine tm = new TaskMachine();

            tm.AddOnDone(() => _updater.Run = true);
            tm.Add("Set up parse control", ParseControl.Start);
            tm.Add("Set up URL control", UrlControl.Start);

      #if INFO
            tm.OnTask = new ActionSet <string>(s => Log.Info(s));
      #endif

            tm.Run();

            config.Save();

            _lock.Release();

            Log.Debug("Crawler session configuration loaded.");
        }