示例#1
0
        //Gathers the required counts for the features and performs feature selection
        private FeaturesStatistics selectFeatures(List <Document> dataset, int numberOfFeatures = 30)
        {
            FeatureExtraction featureExtractor = new FeatureExtraction();

            //the FeatureStatistica object contains statistics about all the features found in the documents
            FeaturesStatistics statistics = featureExtractor.extractFeatureStatistics(dataset);

            //we pass this information to the feature selection algorithm and we get a list with the selected features
            Dictionary <String, Double> selectedFeatures = featureExtractor.select(statistics, numberOfFeatures);


            Dictionary <String, Dictionary <String, int> > newfeatureCategoryJointCount = new Dictionary <string, Dictionary <string, int> >();

            //clip from the stats all the features that are not selected
            foreach (var arr in statistics.featureCategoryJointCount)
            {
                if (selectedFeatures.ContainsKey(arr.Key))
                {
                    newfeatureCategoryJointCount.Add(arr.Key, arr.Value);
                }
            }

            statistics.featureCategoryJointCount = newfeatureCategoryJointCount;
            return(statistics);
        }