Beispiel #1
0
        //test that displays statistics
        public void test4()
        {
            RuntimeStatistics.addToExtractedUrls(1);
            initQueuesTest2();
            RuntimeStatistics.addToFeedUrls(5000);
            DateTime     startTime      = DateTime.Now;
            RankFrointer rankFrontier   = new RankFrointer(feedback, serverQueues);
            Thread       frontierThread = new Thread(new ThreadStart(rankFrontier.sceduleTasks));

            frontierThread.Start();
            Thread workerThread = new Thread(new ThreadStart(workerSimulator3));

            workerThread.Start();

            int iteration = 5000;

            while (true)
            {
                StatusDisplay.DisplayOnScreen(feedback, serverQueues);
                Thread.Sleep(1000);
                if (iteration > 40000)
                {
                    break;
                }
                fillQueue(iteration, iteration + 5000);
                iteration += 5000;
                RuntimeStatistics.addToFeedUrls(5000);
            }
            while (true)
            {
                StatusDisplay.DisplayOnScreen(feedback, serverQueues);
                Thread.Sleep(1000);
            }
        }
Beispiel #2
0
        //tests a multi-threaded workers
        public void test6()
        {
            DateTime startTime = DateTime.Now;

            initQueuesTest3();
            RuntimeStatistics.addToExtractedUrls(1);
            RuntimeStatistics.addToFeedUrls(5000);
            RankFrointer rankFrontier   = new RankFrointer(feedback, serverQueues);
            Thread       frontierThread = new Thread(new ThreadStart(rankFrontier.sceduleTasks));

            frontierThread.Start();
            Thread workerThread = new Thread(new ThreadStart(workerSimulator4));

            workerThread.Start();

            Thread workerThread2 = new Thread(new ThreadStart(workerSimulator5));

            workerThread2.Start();

            int iteration = 5000;

            while (true)
            {
                Thread.Sleep(10);
                fillQueue(iteration, iteration + 5000);
                iteration += 5000;
                RuntimeStatistics.addToFeedUrls(5000);
                StatusDisplay.DisplayOnScreen(feedback, serverQueues);
                if (iteration > 1000000)
                {
                    break;
                }
            }
            while (true)
            {
                Thread.Sleep(1000);
                StatusDisplay.DisplayOnScreen(feedback, serverQueues);
                //workerThread.Interrupt();
                ThreadState state = workerThread.ThreadState;
                Console.WriteLine("Workerthread is : " + state.ToString());
                ThreadState state2 = workerThread2.ThreadState;
                if ((state == ThreadState.Stopped) && (state2 == ThreadState.Stopped))
                {
                    //continue;
                    //workerThread2.Join();
                    DateTime endTime   = DateTime.Now;
                    TimeSpan totalTime = endTime - startTime;
                }
                Console.WriteLine("Workerthread2 is : " + state2.ToString());
            }
        }
Beispiel #3
0
        public void test5()
        {
            initQueuesTest2();
            RuntimeStatistics.addToExtractedUrls(1);
            RuntimeStatistics.addToFeedUrls(5000);
            RankFrointer rankFrontier   = new RankFrointer(feedback, serverQueues);
            Thread       frontierThread = new Thread(new ThreadStart(rankFrontier.sceduleTasks));

            frontierThread.Start();
            Thread workerThread = new Thread(new ThreadStart(workerSimulator4));

            workerThread.Start();

            while (true)
            {
                Thread.Sleep(1000);
                StatusDisplay.DisplayOnScreen(feedback, serverQueues);
            }
        }
Beispiel #4
0
        public void Test2()
        {
            List <String> urls = new List <string>();

            urls.Add("http://www.autonews.com/");
            urls.Add("http://www.geonius.com/www/");
            urls.Add("http://en.wikipedia.org/wiki/Main_Page");
            urls.Add("http://www.computerworld.com/");
            List <string> seeds = StorageSystem.StorageSystem.getInstance().getSeedList(taskId);

            foreach (string seed in seeds)
            {
                urls.Add(seed);
            }

            List <Category> _categories;
            Constraints     _constraints;

            _categories  = StorageSystem.StorageSystem.getInstance().getCategories(taskId);
            _constraints = StorageSystem.StorageSystem.getInstance().getRestrictions(taskId);

            StorageSystem.StorageSystem.getInstance().getSeedList(taskId);
            Filter      filter      = new Filter("http://", _constraints);
            Categorizer categorizer = new Categorizer(_categories);
            Ranker      ranker      = new Ranker(categorizer);
            Extractor   extractor   = new Extractor();

            HttpResourceFetcher httpfetcher = new HttpResourceFetcher();


            foreach (String url in urls)
            {
                DateTime        startTime = DateTime.Now;
                ResourceContent resource  = null;
                if (httpfetcher.canFetch(url))
                {
                    resource = httpfetcher.fetch(url, 10000, 100);
                }

                DateTime fetchEndTime = DateTime.Now;

                if ((resource == null) || (resource.getResourceContent() == null))
                {
                    continue;
                }

                /*** 0. fetching the link from the internet ***/
                TimeSpan fetchingTime = fetchEndTime - startTime;

                List <LinkItem> listOfLinks = new List <LinkItem>();
                //extract all the links in page
                listOfLinks = extractor.extractLinks(resource.getResourceUrl(), resource.getResourceContent());
                RuntimeStatistics.addToExtractedUrls(listOfLinks.Count);

                DateTime extEndTime = DateTime.Now;

                /*** 1. Extracting the link from the request ***/
                TimeSpan extRequest = extEndTime - fetchEndTime;

                //reset the dictionary in filter that contains the urls from the same page
                filter.resetDictionary();
                int filteredUrlsCount = 0;
                foreach (LinkItem item in listOfLinks)
                {
                    //Filter the links and return only links that can be crawled
                    List <String> links = new List <String>();
                    links.Add(item.getLink());
                    List <String> filteredLinks = filter.filterLinks(links);

                    //If filteredLinks is not empty
                    if (filteredLinks.Count > 0)
                    {
                        filteredUrlsCount++;
                        Url url1 = new Url(filteredLinks[0], hashUrl(filteredLinks[0]), ranker.rankUrl(resource, item),
                                           item.getDomainUrl(), hashUrl(item.getDomainUrl()));
                        deployLinksToFrontier(url1);
                        RuntimeStatistics.addToFeedUrls(1);
                    }
                }

                DateTime catStartTime = DateTime.Now;

                /*** 2. Ranking and deployment to the frontier ***/
                TimeSpan rankTotalRequest = catStartTime - extEndTime;

                //Ascribe the url to all the categories it is belonged to.
                List <Result> classifiedResults = categorizer.classifyContent(resource.getResourceContent(),
                                                                              resource.getResourceUrl());
                if (classifiedResults.Count != 0)
                {
                    RuntimeStatistics.addToCrawledUrls(1);
                }

                DateTime catEndTime = DateTime.Now;

                /*** 3. Classification of the current request ***/
                TimeSpan catTotalRequest = catEndTime - catStartTime;

                foreach (Result classifiedResult in classifiedResults)
                {
                    Result result = new Result("0", classifiedResult.getUrl(), classifiedResult.getCategoryID(),
                                               resource.getRankOfUrl(), classifiedResult.getTrustMeter());
                    deployResourceToStorage(result);
                }

                DateTime endTime = DateTime.Now;

                /*** 4. deployment to the database (result) ***/
                TimeSpan deployRequest = endTime - catEndTime;

                /*** 5. Total processing time ***/
                TimeSpan totalRequest = endTime - startTime;
            }
        }
        /**
         * This method tries to process the given content assuming that the given content
         * can be processed via this processor
         */
        public void process(ResourceContent resource)
        {
            DateTime startTime = DateTime.Now;

            List <LinkItem> listOfLinks;

            //extract all the links in page
            listOfLinks = extractor.extractLinks(resource.getResourceUrl(), resource.getResourceContent());
            RuntimeStatistics.addToExtractedUrls(listOfLinks.Count);

            DateTime extEndTime = DateTime.Now;

            /*** 1. Extracting the link from the request ***/
            TimeSpan extRequest = extEndTime - startTime;

            //reset the dictionary in filter that contains the urls from the same page
            filter.resetDictionary();
            int filteredUrlsCount = 0;

            foreach (LinkItem item in listOfLinks)
            {
                //Filter the links and return only links that can be crawled
                List <String> links = new List <String>();
                links.Add(item.getLink());
                List <String> filteredLinks = filter.filterLinks(links);

                //If filteredLinks is not empty
                if (filteredLinks.Count > 0)
                {
                    filteredUrlsCount++;
                    Url url = new Url(filteredLinks[0], hashUrl(filteredLinks[0]), ranker.rankUrl(resource, item),
                                      item.getDomainUrl(), hashUrl(item.getDomainUrl()));
                    deployLinksToFrontier(url);
                    RuntimeStatistics.addToFeedUrls(1);
                }
            }

            DateTime catStartTime = DateTime.Now;

            /*** 2. Ranking and deployment to the frontier ***/
            TimeSpan rankTotalRequest = catStartTime - extEndTime;

            //Ascribe the url to all the categories it is belonged to.
            List <Result> classifiedResults = categorizer.classifyContent(resource.getResourceContent(),
                                                                          resource.getResourceUrl());

            if (classifiedResults.Count != 0)
            {
                RuntimeStatistics.addToCrawledUrls(1);
            }
            DateTime catEndTime = DateTime.Now;

            /*** 3. Classification of the current request ***/
            TimeSpan catTotalRequest = catEndTime - catStartTime;

            //Save all the results to Storage
            foreach (Result classifiedResult in classifiedResults)
            {
                Result result = new Result("0", classifiedResult.getUrl(), classifiedResult.getCategoryID(),
                                           resource.getRankOfUrl(), classifiedResult.getTrustMeter());
                deployResourceToStorage(result);
            }

            DateTime endTime = DateTime.Now;
            /*** 4. deployment to the database (result) ***/
            TimeSpan deployRequest = endTime - catEndTime;

            /*** $. Total processing time ***/
            TimeSpan totalRequest = endTime - startTime;

            // write request time to timing log file
            if (LogDebuggerControl.getInstance().enableTiming)
            {
                StreamWriter sw = new
                                  StreamWriter("_DEBUG_INFO_PROCESSOR_TIMING@" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true);
                sw.WriteLine(" TIMING FOR REQ - [] ");
                sw.WriteLine(" - Extractor Time " + extRequest.TotalSeconds + " seconds ");
                sw.WriteLine(" - Ranker    Time " + extRequest.TotalSeconds + " seconds ");
                sw.WriteLine(" - Categori. Time " + catTotalRequest.TotalSeconds + " seconds ");
                sw.WriteLine(" - Deploy    Time " + deployRequest.TotalSeconds + " seconds ");
                sw.WriteLine(" - Total Timing " + totalRequest.TotalSeconds + " seconds ");
                sw.Close();
            }
        }