public HtmlPageCategorizationProcessor(Initializer initializer,Queue<Url> frontier)
 {
     extractor = new Extractor();
     categorizer = new Categorizer(initializer.getCategoryList());
     ranker = new Ranker(categorizer);
     filter = new Filter("http://",initializer.getContraints());
     queueFronier = frontier;
     taskId = initializer.getTaskId();
 }
        public void Test1()
        {
            Initializer initializer = new Initializer("", constraints, categories);
            Queue<Url> queue = new Queue<Url>();
            ResourceContent resource = new ResourceContent("www.nana10.co.il", ResourceType.HtmlResource,
                getFileContent("nana10.txt"), 200, 0);

            HtmlPageCategorizationProcessor processor = new HtmlPageCategorizationProcessor(initializer,queue);
            processor.process(resource);
        }
Пример #3
0
        public static void MainTest()
        {
            List<Category> categories = new List<Category>();
            Constraints constraints   = new Constraints(5, true, "", "www.facebook.com");
            Initializer initializer   = new Initializer(constraints, categories);

            Url task1 = new Url("http://www.facebook.com/admirer4", 34243432, 35, "http://www.facebook.com/", 34243432);
            //Url task2 = new Url("http://www.nana10.co.il/", 34223432, 35, "http://www.nana10.co.il/", 34223432);

            Queue<Url> taskQueue = new Queue<Url>();
            taskQueue.Enqueue(task1);
            //taskQueue.Enqueue(task2);

            Queue<Url> feedBackQueue = new Queue<Url>();

            Worker worker = new Worker(initializer, taskQueue, feedBackQueue);
            worker.run();
        }
Пример #4
0
        /**
         * constructs a new worker with the specified initialdata, it will work on the tasks
         *  and return the feed to the feedback, note it won't create new queues it will use
         *  the passed arguments - and they may need to be thread safe
         */
        public Worker(Initializer initialData, Queue<Url> tasks, Queue<Url> feedback, int workersDim, int workerID)
        {
            _tasks    = tasks;
            _feedback = feedback;

            // sets default timer
            _timer    = 1000;

            // initailizing the fetcher - page downloaders
            _fetchers = new FetcherManager();
            HttpResourceFetcher httpfetcher = new HttpResourceFetcher();
            _fetchers.addProtocol("http", httpfetcher);

            // initailizing the processors - will process the downloaded urls
            _processors = new ResourceProcessorManager();

            HtmlPageCategorizationProcessor htmlProcessor = new HtmlPageCategorizationProcessor(initialData, feedback);
            htmlProcessor.ThreadID = workerID;
            htmlProcessor.ThreadsDim = workersDim;
            _processors.attachProcessor("PageProc", htmlProcessor);
        }
Пример #5
0
 /**
  * initialize the initializer object which will be used in the system objects
  */
 protected static void SetInitializer(String taskId)
 {
     System.Console.Write("$$$ Getting Constraints .. ");
     if (_operationMode == operationMode_t.Manual)
     {
         _categories  = new List<Category>();
         _constraints = new Constraints(1, false, "", ".com");
     }
     else if (_operationMode == operationMode_t.Auto)
     {
         _categories  = StorageSystem.StorageSystem.getInstance().getCategories(taskId);
         _constraints = StorageSystem.StorageSystem.getInstance().getRestrictions(taskId);
     }
     _initializer = new Initializer(taskId, _constraints, _categories);
     System.Console.WriteLine("SUCCESS");
 }