public void ExtendDatabaseWithArticle(LabeledArticle article) { Annotation annotation = _processor.Annotate(article.Article); HashSet <string> persons = NLPCoreHelper.GetPersons(annotation); HashSet <string> organizations = NLPCoreHelper.GetOrganizations(annotation); HashSet <string> locations = NLPCoreHelper.GetLocation(annotation); HashSet <string> sportSpecificWords = NLPCoreHelper.GetOccurence(annotation, _sportSpecificWords); foreach (string person in persons) { PersonOccurenceDatabase.AddWord(person, article.Category); } foreach (string organization in organizations) { OrganizationOccurenceDatabase.AddWord(organization, article.Category); } foreach (string location in locations) { LocationsOccurenceDatabase.AddWord(location, article.Category); } foreach (string sportSpecificWord in sportSpecificWords) { SportSpecificWordsOccurenceDatabase.AddWord(sportSpecificWord, article.Category); } }
private double[] GetRawFeatures(string article) { Annotation annotation = _processor.Annotate(article); HashSet <string> persons = NLPCoreHelper.GetPersons(annotation); HashSet <string> organizations = NLPCoreHelper.GetOrganizations(annotation); HashSet <string> locations = NLPCoreHelper.GetLocation(annotation); HashSet <string> sportSpecificWords = NLPCoreHelper.GetOccurence(annotation, _sportSpecificWords); double[] personFeatures = PersonOccurenceDatabase.GetFeatures(persons); double[] organizationFeatures = OrganizationOccurenceDatabase.GetFeatures(organizations); double[] locationFeatures = LocationsOccurenceDatabase.GetFeatures(locations); double[] specificFeatures = SportSpecificWordsOccurenceDatabase.GetFeatures(sportSpecificWords); double[] features = new double[personFeatures.Length + organizationFeatures.Length + locationFeatures.Length + specificFeatures.Length]; Array.Copy(personFeatures, 0, features, 0, personFeatures.Length); Array.Copy(organizationFeatures, 0, features, personFeatures.Length, organizationFeatures.Length); Array.Copy(locationFeatures, 0, features, personFeatures.Length + organizationFeatures.Length, locationFeatures.Length); Array.Copy(specificFeatures, 0, features, personFeatures.Length + organizationFeatures.Length + locationFeatures.Length, specificFeatures.Length); for (int i = 0; i < features.Length; i++) { features[i] -= 0.5; } return(features); }