Esempio n. 1
0
 public TemplateModelFeatureSelecting(
     IFeatureSetSelectorDiscrete <DomainType, LabelType> selector,
     ITemplateModelDiscrete <DomainType, LabelType> template)
 {
     this.selector = selector;
     this.template = template;
 }
        public double Evaluate(ITemplateModelDiscrete <DomainType, LabelType> template, IDataSet <DomainType, LabelType> data_set, ISet <int> feature_set)
        {
            IDataSet <DomainType, LabelType> selected_data_set = data_set.SelectFeatures(new List <int>(feature_set));

            double[] scores = new double[this.FoldCount];
            Parallel.For(0, this.FoldCount, fold_index =>
            {
                Tuple <IDataSet <DomainType, LabelType>, IDataSet <DomainType, LabelType> > split = selected_data_set.Split(this.TrainingSetFraction);
                ReportDiscrete <DomainType, LabelType> report = template.GenerateAndTestDiscrete(split.Item1, split.Item2);
                scores[fold_index] = report.CorrectLabelRate;
            });
            return(ToolsMathCollection.Sum(scores) / ((double)this.FoldCount));
        }
Esempio n. 3
0
        public Tuple <IModelDiscrete <DomainType, LabelType>, IList <int> > SelectFeatureSet(ITemplateModelDiscrete <DomainType, LabelType> template, IDataSet <DomainType, LabelType> data_set)
        {
            ISet <ISet <int> > black_list = new HashSet <ISet <int> >();
            ISet <int>         full_set   = new HashSet <int>();

            for (int feature_index = 0; feature_index < data_set.FeatureCount; feature_index++)
            {
                full_set.Add(feature_index);
            }

            ISet <int> current_set = new HashSet <int>();
            double     best_score  = feature_set_evaluator.Evaluate(template, data_set, current_set);
            bool       improvement = true;

            while (improvement)
            {
                List <ISet <int> > options = new List <ISet <int> >();
                options.AddRange(RemoveFeature(current_set, black_list));
                options.AddRange(AddFeature(current_set, black_list, full_set));
                improvement = false;
                foreach (ISet <int> option in options)
                {
                    double score = feature_set_evaluator.Evaluate(template, data_set, option);
                    //System.out.print("scorein " + score);
                    //CollectionTools.print(option);
                    if (best_score.CompareTo(score) == -1)
                    {
                        //System.out.print("New best scorein " + score);
                        //CollectionTools.print(option);
                        best_score  = score;
                        current_set = option;
                        improvement = true;
                    }
                }
            }
            List <int> current_list = new List <int>(current_set);

            current_list.Sort();

            IModelDiscrete <DomainType, LabelType> model = template.GenerateModelDiscrete(data_set.SelectFeatures(current_list));

            return(new Tuple <IModelDiscrete <DomainType, LabelType>, IList <int> >(model, current_list));
        }