public List <double> GetFeatures(PreprocessedArticle article) { List <double> features = new List <double>(); List <string> enabledFeatures = Features.Select(t => t.Name).ToList(); foreach (string tag in enabledFeatures) { double feature = 0; foreach (string keyWord in KeyWords[tag]) { feature += article.Words.Take((int)(article.Words.Count * PercentOfData)).Distinct().Contains(keyWord) ? 1 : 0; } features.Add(feature); } double max = features.Max(); if (!max.Equals(0)) { for (int i = 0; i < features.Count; i++) { features[i] /= max; } } return(features); }
public List <double> GetFeaturesVector(PreprocessedArticle article) { List <double> features = new List <double>(); foreach (IFeatureExtractor featureExtractor in FeatureExtractors) { features.AddRange(featureExtractor.GetFeatures(article)); } return(features); }
public List <double> GetFeatures(PreprocessedArticle article) { List <double> features = new List <double>(); List <string> enabledFeatures = Features.Select(t => t.Name).ToList(); foreach (string tag in enabledFeatures) { double feature = 0; foreach (string keyWord in KeyWords[tag]) { feature += article.Words.Take((int)(article.Words.Count * PercentOfData)).Sum(t => SimilarityFunction.CalculateSimilarity(keyWord, t)); } if (feature.Equals(0)) { features.Add(0); } else { features.Add(feature); } } double max = features.Max(); if (!max.Equals(0.0)) { for (int i = 0; i < features.Count; i++) { features[i] /= max; } } return(features); }