public List <double> GetFeatures(PreprocessedArticle article)
        {
            List <double> features        = new List <double>();
            List <string> enabledFeatures = Features.Select(t => t.Name).ToList();

            foreach (string tag in enabledFeatures)
            {
                double feature = 0;

                foreach (string keyWord in KeyWords[tag])
                {
                    feature += article.Words.Take((int)(article.Words.Count * PercentOfData)).Distinct().Contains(keyWord) ? 1 : 0;
                }

                features.Add(feature);
            }

            double max = features.Max();

            if (!max.Equals(0))
            {
                for (int i = 0; i < features.Count; i++)
                {
                    features[i] /= max;
                }
            }
            return(features);
        }
        public List <double> GetFeaturesVector(PreprocessedArticle article)
        {
            List <double> features = new List <double>();

            foreach (IFeatureExtractor featureExtractor in FeatureExtractors)
            {
                features.AddRange(featureExtractor.GetFeatures(article));
            }

            return(features);
        }
Beispiel #3
0
        public List <double> GetFeatures(PreprocessedArticle article)
        {
            List <double> features        = new List <double>();
            List <string> enabledFeatures = Features.Select(t => t.Name).ToList();

            foreach (string tag in enabledFeatures)
            {
                double feature = 0;

                foreach (string keyWord in KeyWords[tag])
                {
                    feature += article.Words.Take((int)(article.Words.Count * PercentOfData)).Sum(t => SimilarityFunction.CalculateSimilarity(keyWord, t));
                }

                if (feature.Equals(0))
                {
                    features.Add(0);
                }
                else
                {
                    features.Add(feature);
                }
            }

            double max = features.Max();

            if (!max.Equals(0.0))
            {
                for (int i = 0; i < features.Count; i++)
                {
                    features[i] /= max;
                }
            }

            return(features);
        }