Пример #1
0
        public void ExtendDatabaseWithArticle(LabeledArticle article)
        {
            Annotation       annotation         = _processor.Annotate(article.Article);
            HashSet <string> persons            = NLPCoreHelper.GetPersons(annotation);
            HashSet <string> organizations      = NLPCoreHelper.GetOrganizations(annotation);
            HashSet <string> locations          = NLPCoreHelper.GetLocation(annotation);
            HashSet <string> sportSpecificWords = NLPCoreHelper.GetOccurence(annotation, _sportSpecificWords);

            foreach (string person in persons)
            {
                PersonOccurenceDatabase.AddWord(person, article.Category);
            }

            foreach (string organization in organizations)
            {
                OrganizationOccurenceDatabase.AddWord(organization, article.Category);
            }

            foreach (string location in locations)
            {
                LocationsOccurenceDatabase.AddWord(location, article.Category);
            }

            foreach (string sportSpecificWord in sportSpecificWords)
            {
                SportSpecificWordsOccurenceDatabase.AddWord(sportSpecificWord, article.Category);
            }
        }
Пример #2
0
        private double[] GetRawFeatures(string article)
        {
            Annotation       annotation         = _processor.Annotate(article);
            HashSet <string> persons            = NLPCoreHelper.GetPersons(annotation);
            HashSet <string> organizations      = NLPCoreHelper.GetOrganizations(annotation);
            HashSet <string> locations          = NLPCoreHelper.GetLocation(annotation);
            HashSet <string> sportSpecificWords = NLPCoreHelper.GetOccurence(annotation, _sportSpecificWords);

            double[] personFeatures       = PersonOccurenceDatabase.GetFeatures(persons);
            double[] organizationFeatures = OrganizationOccurenceDatabase.GetFeatures(organizations);
            double[] locationFeatures     = LocationsOccurenceDatabase.GetFeatures(locations);
            double[] specificFeatures     = SportSpecificWordsOccurenceDatabase.GetFeatures(sportSpecificWords);

            double[] features = new double[personFeatures.Length + organizationFeatures.Length + locationFeatures.Length + specificFeatures.Length];
            Array.Copy(personFeatures, 0, features, 0, personFeatures.Length);
            Array.Copy(organizationFeatures, 0, features, personFeatures.Length, organizationFeatures.Length);
            Array.Copy(locationFeatures, 0, features, personFeatures.Length + organizationFeatures.Length, locationFeatures.Length);
            Array.Copy(specificFeatures, 0, features, personFeatures.Length + organizationFeatures.Length + locationFeatures.Length, specificFeatures.Length);

            for (int i = 0; i < features.Length; i++)
            {
                features[i] -= 0.5;
            }

            return(features);
        }