public ClassificationResult ClassifyText(string textToClassify)
        {
            // XXX TODO - OpenNLP people deprecated the method that takes a plain string. Is splitting here correct?
            // It seems to be, because not splitting gives us all categories with the same result basically (evenly split probabilities every time).
            var classResult     = Categorizer.categorize(textToClassify.Split(' '));
            var internalBestCat = Categorizer.getBestCategory(classResult);

            return(new ClassificationResult(MappedCategories[internalBestCat], classResult.Max()));
        }
Beispiel #2
0
 public CategorizerTest()
 {
     Categorizer = new(
         new List <ICategory>()
     {
         new LowRiskCategory(),
         new MediumRiskCategory(),
         new HighRiskCategory()
     }
         );
 }
Beispiel #3
0
        public YearResultsPresentation(int year, Expense[] expenses, Categorizer categorizer)
        {
            _year = year;

            var allCategories  = categorizer.GetAllCategories();
            var categorization = categorizer.Categorize(expenses);

            var results = allCategories
                          .Select(c => new CategoryResultsPresentation(c.Name,
                                                                       categorization
                                                                       .Where(i => i.Category == c)
                                                                       .Select(i => i.Expense).ToArray()));

            Categories.AddRange(results);
        }
Beispiel #4
0
 public ParseVkJob(
     TelegramBotClient client,
     StateRepository stateRepository,
     NoticeRepository noticeRepository,
     VkParser parser,
     Categorizer categorizer,
     Geocoding geocoding,
     ILogger logger)
     : base(client, stateRepository, logger)
 {
     this.parser           = parser;
     this.noticeRepository = noticeRepository;
     this.categorizer      = categorizer;
     this.geocoding        = geocoding;
 }
        public BasicCompiler(
            string instructionsFile,
            string printDataFile,
            string varAndArrayFile,
            string baseFile,
            string beforePrintDataFile,
            string finalOutputFile)
        {
            this.fileManager = new FileManager(
                instructionsFile,
                printDataFile,
                varAndArrayFile,
                baseFile,
                beforePrintDataFile,
                finalOutputFile);

            this.asciiCategorizer = new Categorizer();
            this.lexical          = new LexicalStateMachine();
            this.syntax           = new SyntaxStateMachine(this.fileManager);
            this.asciiCategorizer.NotifySymbolCategorization += this.lexical.ConsumeCategorizedSymbolEvent;
            this.lexical.NotifyTokenIdentified += this.syntax.ConsumeIdentifiedTokenEvent;
        }
Beispiel #6
0
        public void Test2()
        {
            List<String> urls = new List<string>();
            urls.Add("http://www.autonews.com/");
            urls.Add("http://www.geonius.com/www/");
            urls.Add("http://en.wikipedia.org/wiki/Main_Page");
            urls.Add("http://www.computerworld.com/");
            List<string> seeds = StorageSystem.StorageSystem.getInstance().getSeedList(taskId);
            foreach (string seed in seeds)
            {
                urls.Add(seed);
            }

            List<Category> _categories;
            Constraints _constraints;

            _categories = StorageSystem.StorageSystem.getInstance().getCategories(taskId);
            _constraints = StorageSystem.StorageSystem.getInstance().getRestrictions(taskId);

            StorageSystem.StorageSystem.getInstance().getSeedList(taskId);
            Filter filter = new Filter("http://", _constraints);
            Categorizer categorizer = new Categorizer(_categories);
            Ranker ranker = new Ranker(categorizer);
            Extractor extractor = new Extractor();

            HttpResourceFetcher httpfetcher = new HttpResourceFetcher();

            foreach (String url in urls)
            {
                DateTime startTime = DateTime.Now;
                ResourceContent resource = null;
                if (httpfetcher.canFetch(url))
                    resource = httpfetcher.fetch(url, 10000, 100);

                DateTime fetchEndTime = DateTime.Now;

                if ((resource == null)||(resource.getResourceContent()==null))
                    continue;

                /*** 0. fetching the link from the internet ***/
                TimeSpan fetchingTime = fetchEndTime - startTime;

                List<LinkItem> listOfLinks = new List<LinkItem>();
                //extract all the links in page
                listOfLinks = extractor.extractLinks(resource.getResourceUrl(), resource.getResourceContent());
                RuntimeStatistics.addToExtractedUrls(listOfLinks.Count);

                DateTime extEndTime = DateTime.Now;

                /*** 1. Extracting the link from the request ***/
                TimeSpan extRequest = extEndTime - fetchEndTime;

                //reset the dictionary in filter that contains the urls from the same page
                filter.resetDictionary();
                int filteredUrlsCount = 0;
                foreach (LinkItem item in listOfLinks)
                {
                    //Filter the links and return only links that can be crawled
                    List<String> links = new List<String>();
                    links.Add(item.getLink());
                    List<String> filteredLinks = filter.filterLinks(links);

                    //If filteredLinks is not empty
                    if (filteredLinks.Count > 0)
                    {
                        filteredUrlsCount++;
                        Url url1 = new Url(filteredLinks[0], hashUrl(filteredLinks[0]), ranker.rankUrl(resource, item),
                                          item.getDomainUrl(), hashUrl(item.getDomainUrl()));
                        deployLinksToFrontier(url1);
                        RuntimeStatistics.addToFeedUrls(1);
                    }
                }

                DateTime catStartTime = DateTime.Now;

                /*** 2. Ranking and deployment to the frontier ***/
                TimeSpan rankTotalRequest = catStartTime - extEndTime;

                //Ascribe the url to all the categories it is belonged to.
                List<Result> classifiedResults = categorizer.classifyContent(resource.getResourceContent(),
                                                                                resource.getResourceUrl());
                if (classifiedResults.Count != 0) RuntimeStatistics.addToCrawledUrls(1);

                DateTime catEndTime = DateTime.Now;

                /*** 3. Classification of the current request ***/
                TimeSpan catTotalRequest = catEndTime - catStartTime;

                foreach (Result classifiedResult in classifiedResults)
                {
                     Result result = new Result("0", classifiedResult.getUrl(), classifiedResult.getCategoryID(),
                                 resource.getRankOfUrl(), classifiedResult.getTrustMeter());
                     deployResourceToStorage(result);
                }

                DateTime endTime = DateTime.Now;

                /*** 4. deployment to the database (result) ***/
                TimeSpan deployRequest = endTime - catEndTime;

                /*** 5. Total processing time ***/
                TimeSpan totalRequest = endTime - startTime;
            }
        }
Beispiel #7
0
        public void Test2()
        {
            List <String> urls = new List <string>();

            urls.Add("http://www.autonews.com/");
            urls.Add("http://www.geonius.com/www/");
            urls.Add("http://en.wikipedia.org/wiki/Main_Page");
            urls.Add("http://www.computerworld.com/");
            List <string> seeds = StorageSystem.StorageSystem.getInstance().getSeedList(taskId);

            foreach (string seed in seeds)
            {
                urls.Add(seed);
            }

            List <Category> _categories;
            Constraints     _constraints;

            _categories  = StorageSystem.StorageSystem.getInstance().getCategories(taskId);
            _constraints = StorageSystem.StorageSystem.getInstance().getRestrictions(taskId);

            StorageSystem.StorageSystem.getInstance().getSeedList(taskId);
            Filter      filter      = new Filter("http://", _constraints);
            Categorizer categorizer = new Categorizer(_categories);
            Ranker      ranker      = new Ranker(categorizer);
            Extractor   extractor   = new Extractor();

            HttpResourceFetcher httpfetcher = new HttpResourceFetcher();


            foreach (String url in urls)
            {
                DateTime        startTime = DateTime.Now;
                ResourceContent resource  = null;
                if (httpfetcher.canFetch(url))
                {
                    resource = httpfetcher.fetch(url, 10000, 100);
                }

                DateTime fetchEndTime = DateTime.Now;

                if ((resource == null) || (resource.getResourceContent() == null))
                {
                    continue;
                }

                /*** 0. fetching the link from the internet ***/
                TimeSpan fetchingTime = fetchEndTime - startTime;

                List <LinkItem> listOfLinks = new List <LinkItem>();
                //extract all the links in page
                listOfLinks = extractor.extractLinks(resource.getResourceUrl(), resource.getResourceContent());
                RuntimeStatistics.addToExtractedUrls(listOfLinks.Count);

                DateTime extEndTime = DateTime.Now;

                /*** 1. Extracting the link from the request ***/
                TimeSpan extRequest = extEndTime - fetchEndTime;

                //reset the dictionary in filter that contains the urls from the same page
                filter.resetDictionary();
                int filteredUrlsCount = 0;
                foreach (LinkItem item in listOfLinks)
                {
                    //Filter the links and return only links that can be crawled
                    List <String> links = new List <String>();
                    links.Add(item.getLink());
                    List <String> filteredLinks = filter.filterLinks(links);

                    //If filteredLinks is not empty
                    if (filteredLinks.Count > 0)
                    {
                        filteredUrlsCount++;
                        Url url1 = new Url(filteredLinks[0], hashUrl(filteredLinks[0]), ranker.rankUrl(resource, item),
                                           item.getDomainUrl(), hashUrl(item.getDomainUrl()));
                        deployLinksToFrontier(url1);
                        RuntimeStatistics.addToFeedUrls(1);
                    }
                }

                DateTime catStartTime = DateTime.Now;

                /*** 2. Ranking and deployment to the frontier ***/
                TimeSpan rankTotalRequest = catStartTime - extEndTime;

                //Ascribe the url to all the categories it is belonged to.
                List <Result> classifiedResults = categorizer.classifyContent(resource.getResourceContent(),
                                                                              resource.getResourceUrl());
                if (classifiedResults.Count != 0)
                {
                    RuntimeStatistics.addToCrawledUrls(1);
                }

                DateTime catEndTime = DateTime.Now;

                /*** 3. Classification of the current request ***/
                TimeSpan catTotalRequest = catEndTime - catStartTime;

                foreach (Result classifiedResult in classifiedResults)
                {
                    Result result = new Result("0", classifiedResult.getUrl(), classifiedResult.getCategoryID(),
                                               resource.getRankOfUrl(), classifiedResult.getTrustMeter());
                    deployResourceToStorage(result);
                }

                DateTime endTime = DateTime.Now;

                /*** 4. deployment to the database (result) ***/
                TimeSpan deployRequest = endTime - catEndTime;

                /*** 5. Total processing time ***/
                TimeSpan totalRequest = endTime - startTime;
            }
        }
Beispiel #8
0
 public ResultsPresentation(Categorizer categorizer, ExpenseRepository expenseRepository)
 {
     _categorizer       = categorizer;
     _expenseRepository = expenseRepository;
 }