public TemplateModelFeatureSelecting( IFeatureSetSelectorDiscrete <DomainType, LabelType> selector, ITemplateModelDiscrete <DomainType, LabelType> template) { this.selector = selector; this.template = template; }
public double Evaluate(ITemplateModelDiscrete <DomainType, LabelType> template, IDataSet <DomainType, LabelType> data_set, ISet <int> feature_set) { IDataSet <DomainType, LabelType> selected_data_set = data_set.SelectFeatures(new List <int>(feature_set)); double[] scores = new double[this.FoldCount]; Parallel.For(0, this.FoldCount, fold_index => { Tuple <IDataSet <DomainType, LabelType>, IDataSet <DomainType, LabelType> > split = selected_data_set.Split(this.TrainingSetFraction); ReportDiscrete <DomainType, LabelType> report = template.GenerateAndTestDiscrete(split.Item1, split.Item2); scores[fold_index] = report.CorrectLabelRate; }); return(ToolsMathCollection.Sum(scores) / ((double)this.FoldCount)); }
public Tuple <IModelDiscrete <DomainType, LabelType>, IList <int> > SelectFeatureSet(ITemplateModelDiscrete <DomainType, LabelType> template, IDataSet <DomainType, LabelType> data_set) { ISet <ISet <int> > black_list = new HashSet <ISet <int> >(); ISet <int> full_set = new HashSet <int>(); for (int feature_index = 0; feature_index < data_set.FeatureCount; feature_index++) { full_set.Add(feature_index); } ISet <int> current_set = new HashSet <int>(); double best_score = feature_set_evaluator.Evaluate(template, data_set, current_set); bool improvement = true; while (improvement) { List <ISet <int> > options = new List <ISet <int> >(); options.AddRange(RemoveFeature(current_set, black_list)); options.AddRange(AddFeature(current_set, black_list, full_set)); improvement = false; foreach (ISet <int> option in options) { double score = feature_set_evaluator.Evaluate(template, data_set, option); //System.out.print("scorein " + score); //CollectionTools.print(option); if (best_score.CompareTo(score) == -1) { //System.out.print("New best scorein " + score); //CollectionTools.print(option); best_score = score; current_set = option; improvement = true; } } } List <int> current_list = new List <int>(current_set); current_list.Sort(); IModelDiscrete <DomainType, LabelType> model = template.GenerateModelDiscrete(data_set.SelectFeatures(current_list)); return(new Tuple <IModelDiscrete <DomainType, LabelType>, IList <int> >(model, current_list)); }