Exemplo n.º 1
0
        public void ExtendDatabaseWithArticle(LabeledArticle article)
        {
            Annotation       annotation         = _processor.Annotate(article.Article);
            HashSet <string> persons            = NLPCoreHelper.GetPersons(annotation);
            HashSet <string> organizations      = NLPCoreHelper.GetOrganizations(annotation);
            HashSet <string> locations          = NLPCoreHelper.GetLocation(annotation);
            HashSet <string> sportSpecificWords = NLPCoreHelper.GetOccurence(annotation, _sportSpecificWords);

            foreach (string person in persons)
            {
                PersonOccurenceDatabase.AddWord(person, article.Category);
            }

            foreach (string organization in organizations)
            {
                OrganizationOccurenceDatabase.AddWord(organization, article.Category);
            }

            foreach (string location in locations)
            {
                LocationsOccurenceDatabase.AddWord(location, article.Category);
            }

            foreach (string sportSpecificWord in sportSpecificWords)
            {
                SportSpecificWordsOccurenceDatabase.AddWord(sportSpecificWord, article.Category);
            }
        }
Exemplo n.º 2
0
        private double[] GetRawFeatures(string article)
        {
            Annotation       annotation         = _processor.Annotate(article);
            HashSet <string> persons            = NLPCoreHelper.GetPersons(annotation);
            HashSet <string> organizations      = NLPCoreHelper.GetOrganizations(annotation);
            HashSet <string> locations          = NLPCoreHelper.GetLocation(annotation);
            HashSet <string> sportSpecificWords = NLPCoreHelper.GetOccurence(annotation, _sportSpecificWords);

            double[] personFeatures       = PersonOccurenceDatabase.GetFeatures(persons);
            double[] organizationFeatures = OrganizationOccurenceDatabase.GetFeatures(organizations);
            double[] locationFeatures     = LocationsOccurenceDatabase.GetFeatures(locations);
            double[] specificFeatures     = SportSpecificWordsOccurenceDatabase.GetFeatures(sportSpecificWords);

            double[] features = new double[personFeatures.Length + organizationFeatures.Length + locationFeatures.Length + specificFeatures.Length];
            Array.Copy(personFeatures, 0, features, 0, personFeatures.Length);
            Array.Copy(organizationFeatures, 0, features, personFeatures.Length, organizationFeatures.Length);
            Array.Copy(locationFeatures, 0, features, personFeatures.Length + organizationFeatures.Length, locationFeatures.Length);
            Array.Copy(specificFeatures, 0, features, personFeatures.Length + organizationFeatures.Length + locationFeatures.Length, specificFeatures.Length);

            for (int i = 0; i < features.Length; i++)
            {
                features[i] -= 0.5;
            }

            return(features);
        }
Exemplo n.º 3
0
        public Marker(NLPProcessor processor, string modelPath)
        {
            _processor = processor;
            _modelPath = modelPath;

            Annotation sportSpecificWords = _processor.Annotate(File.ReadAllText(modelPath + "specificwords.txt"));

            _sportSpecificWords = NLPCoreHelper.GetLemmas(sportSpecificWords);
            File.Delete(modelPath + "specificwords.txt");
            StringBuilder builder = new StringBuilder();

            foreach (string word in _sportSpecificWords)
            {
                builder.AppendLine(word);
            }
            File.WriteAllText(modelPath + "specificwords.txt", builder.ToString());

            PersonOccurenceDatabase             = new WordOccurenceDatabase();
            OrganizationOccurenceDatabase       = new WordOccurenceDatabase();
            LocationsOccurenceDatabase          = new WordOccurenceDatabase();
            SportSpecificWordsOccurenceDatabase = new WordOccurenceDatabase();
            _categoriesCount = Enum.GetValues(typeof(SportCategory)).Length;
            int hiddenLayerCount = (10 * _categoriesCount) / 3;

            _classifier = new ActivationNetwork(new SigmoidFunction(), 4 * _categoriesCount, hiddenLayerCount, _categoriesCount);
            _teacher    = new BackPropagationLearning(_classifier);

            Array values = Enum.GetValues(typeof(SportCategory));

            _categoryIndex = new Dictionary <SportCategory, int>();
            _indexCategory = new Dictionary <int, SportCategory>();
            for (int index = 0; index < values.Length; index++)
            {
                SportCategory category = (SportCategory)values.GetValue(index);
                _categoryIndex.Add(category, index);
                _indexCategory.Add(index, category);
            }
        }