public HtmlPageCategorizationProcessor(Initializer initializer,Queue<Url> frontier) { extractor = new Extractor(); categorizer = new Categorizer(initializer.getCategoryList()); ranker = new Ranker(categorizer); filter = new Filter("http://",initializer.getContraints()); queueFronier = frontier; taskId = initializer.getTaskId(); }
public void Test1() { Initializer initializer = new Initializer("", constraints, categories); Queue<Url> queue = new Queue<Url>(); ResourceContent resource = new ResourceContent("www.nana10.co.il", ResourceType.HtmlResource, getFileContent("nana10.txt"), 200, 0); HtmlPageCategorizationProcessor processor = new HtmlPageCategorizationProcessor(initializer,queue); processor.process(resource); }
public static void MainTest() { List<Category> categories = new List<Category>(); Constraints constraints = new Constraints(5, true, "", "www.facebook.com"); Initializer initializer = new Initializer(constraints, categories); Url task1 = new Url("http://www.facebook.com/admirer4", 34243432, 35, "http://www.facebook.com/", 34243432); //Url task2 = new Url("http://www.nana10.co.il/", 34223432, 35, "http://www.nana10.co.il/", 34223432); Queue<Url> taskQueue = new Queue<Url>(); taskQueue.Enqueue(task1); //taskQueue.Enqueue(task2); Queue<Url> feedBackQueue = new Queue<Url>(); Worker worker = new Worker(initializer, taskQueue, feedBackQueue); worker.run(); }
/** * constructs a new worker with the specified initialdata, it will work on the tasks * and return the feed to the feedback, note it won't create new queues it will use * the passed arguments - and they may need to be thread safe */ public Worker(Initializer initialData, Queue<Url> tasks, Queue<Url> feedback, int workersDim, int workerID) { _tasks = tasks; _feedback = feedback; // sets default timer _timer = 1000; // initailizing the fetcher - page downloaders _fetchers = new FetcherManager(); HttpResourceFetcher httpfetcher = new HttpResourceFetcher(); _fetchers.addProtocol("http", httpfetcher); // initailizing the processors - will process the downloaded urls _processors = new ResourceProcessorManager(); HtmlPageCategorizationProcessor htmlProcessor = new HtmlPageCategorizationProcessor(initialData, feedback); htmlProcessor.ThreadID = workerID; htmlProcessor.ThreadsDim = workersDim; _processors.attachProcessor("PageProc", htmlProcessor); }
/** * initialize the initializer object which will be used in the system objects */ protected static void SetInitializer(String taskId) { System.Console.Write("$$$ Getting Constraints .. "); if (_operationMode == operationMode_t.Manual) { _categories = new List<Category>(); _constraints = new Constraints(1, false, "", ".com"); } else if (_operationMode == operationMode_t.Auto) { _categories = StorageSystem.StorageSystem.getInstance().getCategories(taskId); _constraints = StorageSystem.StorageSystem.getInstance().getRestrictions(taskId); } _initializer = new Initializer(taskId, _constraints, _categories); System.Console.WriteLine("SUCCESS"); }