/// <summary> /// Gathers the required counts for the features and performs feature selection /// on the above counts. It returns a FeatureStats object that is later used /// for calculating the probabilities of the model. /// </summary> /// <param name="dataset"> </param> /// <returns> </returns> private FeatureStats selectFeatures(IList <Document> dataset) { FeatureExtraction featureExtractor = new FeatureExtraction(); //the FeatureStats object contains statistics about all the features found in the documents FeatureStats stats = featureExtractor.extractFeatureStats(dataset); //extract the stats of the dataset //we pass this information to the feature selection algorithm and we get a list with the selected features IDictionary <string, double?> selectedFeatures = featureExtractor.chisquare(stats, chisquareCriticalValue); //clip from the stats all the features that are not selected IEnumerator <KeyValuePair <string, IDictionary <string, int> > > it = stats.featureCategoryJointCount.GetEnumerator(); while (it.MoveNext()) { string feature = it.Current.Key; if (selectedFeatures.ContainsKey(feature) == false) { //if the feature is not in the selectedFeatures list remove it it.Current.Value.Remove(feature); } } return(stats); }