public void ExtendDatabaseWithArticle(LabeledArticle article) { Annotation annotation = _processor.Annotate(article.Article); HashSet <string> persons = NLPCoreHelper.GetPersons(annotation); HashSet <string> organizations = NLPCoreHelper.GetOrganizations(annotation); HashSet <string> locations = NLPCoreHelper.GetLocation(annotation); HashSet <string> sportSpecificWords = NLPCoreHelper.GetOccurence(annotation, _sportSpecificWords); foreach (string person in persons) { PersonOccurenceDatabase.AddWord(person, article.Category); } foreach (string organization in organizations) { OrganizationOccurenceDatabase.AddWord(organization, article.Category); } foreach (string location in locations) { LocationsOccurenceDatabase.AddWord(location, article.Category); } foreach (string sportSpecificWord in sportSpecificWords) { SportSpecificWordsOccurenceDatabase.AddWord(sportSpecificWord, article.Category); } }
private double[] GetRawFeatures(string article) { Annotation annotation = _processor.Annotate(article); HashSet <string> persons = NLPCoreHelper.GetPersons(annotation); HashSet <string> organizations = NLPCoreHelper.GetOrganizations(annotation); HashSet <string> locations = NLPCoreHelper.GetLocation(annotation); HashSet <string> sportSpecificWords = NLPCoreHelper.GetOccurence(annotation, _sportSpecificWords); double[] personFeatures = PersonOccurenceDatabase.GetFeatures(persons); double[] organizationFeatures = OrganizationOccurenceDatabase.GetFeatures(organizations); double[] locationFeatures = LocationsOccurenceDatabase.GetFeatures(locations); double[] specificFeatures = SportSpecificWordsOccurenceDatabase.GetFeatures(sportSpecificWords); double[] features = new double[personFeatures.Length + organizationFeatures.Length + locationFeatures.Length + specificFeatures.Length]; Array.Copy(personFeatures, 0, features, 0, personFeatures.Length); Array.Copy(organizationFeatures, 0, features, personFeatures.Length, organizationFeatures.Length); Array.Copy(locationFeatures, 0, features, personFeatures.Length + organizationFeatures.Length, locationFeatures.Length); Array.Copy(specificFeatures, 0, features, personFeatures.Length + organizationFeatures.Length + locationFeatures.Length, specificFeatures.Length); for (int i = 0; i < features.Length; i++) { features[i] -= 0.5; } return(features); }
public Marker(NLPProcessor processor, string modelPath) { _processor = processor; _modelPath = modelPath; Annotation sportSpecificWords = _processor.Annotate(File.ReadAllText(modelPath + "specificwords.txt")); _sportSpecificWords = NLPCoreHelper.GetLemmas(sportSpecificWords); File.Delete(modelPath + "specificwords.txt"); StringBuilder builder = new StringBuilder(); foreach (string word in _sportSpecificWords) { builder.AppendLine(word); } File.WriteAllText(modelPath + "specificwords.txt", builder.ToString()); PersonOccurenceDatabase = new WordOccurenceDatabase(); OrganizationOccurenceDatabase = new WordOccurenceDatabase(); LocationsOccurenceDatabase = new WordOccurenceDatabase(); SportSpecificWordsOccurenceDatabase = new WordOccurenceDatabase(); _categoriesCount = Enum.GetValues(typeof(SportCategory)).Length; int hiddenLayerCount = (10 * _categoriesCount) / 3; _classifier = new ActivationNetwork(new SigmoidFunction(), 4 * _categoriesCount, hiddenLayerCount, _categoriesCount); _teacher = new BackPropagationLearning(_classifier); Array values = Enum.GetValues(typeof(SportCategory)); _categoryIndex = new Dictionary <SportCategory, int>(); _indexCategory = new Dictionary <int, SportCategory>(); for (int index = 0; index < values.Length; index++) { SportCategory category = (SportCategory)values.GetValue(index); _categoryIndex.Add(category, index); _indexCategory.Add(index, category); } }