Esempio n. 1
0
        //Helper, score a model on a single element
        private static double ScoreModelSingle <Ty>(this IFeatureSynthesizer <Ty> synth, Dictionary <string, int> classRanks, DiscreteEventSeries <Ty> item, int verbosity, string nameCategory = null)
        {
            int correctClass;

            if (!classRanks.TryGetValue(item.labels [synth.ClassificationCriterion], out correctClass))
            {
                if (verbosity >= 1)
                {
                    Console.WriteLine("Classifier does not contain data for " + item.labels [synth.ClassificationCriterion] + ".  Skipping this item.");
                }
                return(-1);
            }

            double[] scores = synth.SynthesizeFeaturesSumToOne(item);

            if (verbosity >= 2)
            {
                string toPrint;
                if (nameCategory != null)
                {
                    toPrint = item.labels[nameCategory] + " (" + item.labels [synth.ClassificationCriterion] + ")";
                }
                else
                {
                    toPrint = item.labels [synth.ClassificationCriterion];
                }
                toPrint += ": " + scores.FoldToString() + " (" + scores [correctClass] + ")";
                Console.WriteLine(toPrint);
            }

            return(scores [correctClass]);
        }
Esempio n. 2
0
        public static string ClassifyItem <Ty>(IFeatureSynthesizer <Ty> synth, DiscreteEventSeries <Ty> item, string nameField)
        {
            double[] scores = synth.SynthesizeFeaturesSumToOne(item);

            double max = scores.Max();

            //TODO don't report ambiguous cases.
            return(item.labels[nameField] + ": " + synth.SynthesizeLabelFeature(item) + "" +
                   "(" + max + " confidence)");
        }
Esempio n. 3
0
 public static double[] SynthesizeFeaturesSumToOne <Ty>(this IFeatureSynthesizer <Ty> synth, DiscreteEventSeries <Ty> item)
 {
     double[] vals = synth.SynthesizeFeatures(item).NormalizeSumInPlace();
     //It can happen that all are 0, in which case NaN results.
     if (Double.IsNaN(vals[0]))
     {
         //TODO Higher order function for this!
         for (int i = 0; i < vals.Length; i++)
         {
             vals[i] = 1.0 / vals.Length;
         }
     }
     return(vals);
 }
Esempio n. 4
0
        public static double ScoreModelType <Ty>(IEnumerable <string> categoryLabels, Func <string, IFeatureSynthesizer <Ty> > modelGenerator, DiscreteSeriesDatabase <Ty> trainingData, DiscreteSeriesDatabase <Ty> testData)
        {
            double sumScore = 0;
            int    count    = 0;

            foreach (string categoryLabel in categoryLabels)
            {
                //Train a model for this category label.
                IFeatureSynthesizer <Ty> model = modelGenerator(categoryLabel);
                model.Train(trainingData);
                sumScore += model.ScoreModel(testData);
            }

            return(sumScore / count);
        }
Esempio n. 5
0
        public static double ScoreModel <Ty> (this IFeatureSynthesizer <Ty> synth, DiscreteSeriesDatabase <Ty> testData, int verbosity, string nameCategory = null)
        {
            Dictionary <string, int> classRanks = synth.GetFeatureSchema().Select((item, index) => new Tuple <string, int> (item, index)).ToDictionary(a => a.Item1, a => a.Item2);

            //Display schema
            if (verbosity >= 2)
            {
                Console.WriteLine(synth.GetFeatureSchema().FoldToString());
            }

            double score = testData.data.AsParallel()
                           .Where(item => classRanks.ContainsKey(item.labels.GetWithDefault(synth.ClassificationCriterion, ""))) //Filter for items for which we have regressors for.
                           .Select(i => ScoreModelSingle(synth, classRanks, i, verbosity, nameCategory)).Average();              //Score them and take the average.

            if (verbosity >= 2)
            {
                Console.WriteLine("Total Score = " + score);
                Console.WriteLine("E[random model score] = " + (1.0 / classRanks.Count));
            }

            return(score);
        }
Esempio n. 6
0
 public ZScoreNomalizerSynthesizerWrapper(IFeatureSynthesizer <Ty> synth)
 {
     this.synth = synth;
 }
Esempio n. 7
0
 public SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier(IFeatureSynthesizer <Ty> synthesizer, IProbabalisticClassifier classifier)
 {
     this.synthesizer = synthesizer;
     this.classifier  = classifier;
 }
Esempio n. 8
0
 //CLASSIFICATION:
 public static string ClassifyDataSet <Ty>(IFeatureSynthesizer <Ty> synth, DiscreteSeriesDatabase <Ty> db, string nameField)
 {
     return(db.data.AsParallel().Select(item => ClassifyItem(synth, item, nameField)).FoldToString());
 }
Esempio n. 9
0
        public static IFeatureSynthesizer <string> deriveOptimalClassifier()
        {
            //Load databases
            DiscreteSeriesDatabase <string> allData = LoadRegionsDatabase();

            Tuple <DiscreteSeriesDatabase <string>, DiscreteSeriesDatabase <string> > split = allData.SplitDatabase(.8);

            DiscreteSeriesDatabase <string> trainingData = split.Item1;
            DiscreteSeriesDatabase <string> testData     = split.Item2;

            string cat = "region";

            double optimalScore = 0;
            IFeatureSynthesizer <string> optimalClassifier = null;
            string optimalInfoStr = null;

            //Preliminary scan

            int[] ks = new int[] { 2, 3, 4 };
            //int[] minCutoffs = new int[]{5, 10, 20};
            int[] minCutoffs       = new int[] { 10 };
            int[] kmerCounts       = new int[] { 10, 25, 50, 100 };
            int[] smoothingAmounts = new int[] { 1, 5, 10 };

            string[] colNames = "k minCutoff kmerCount smoothingAmount score".Split(' ');

            Console.WriteLine(colNames.FoldToString("", "", ","));

            foreach (int k in ks)
            {
                foreach (int minCutoff in minCutoffs)
                {
                    foreach (int kmerCount in kmerCounts)
                    {
                        foreach (int smoothingAmount in smoothingAmounts)
                        {
                            IFeatureSynthesizer <string> classifier = new RegressorFeatureSynthesizerKmerFrequenciesVarK <string>(cat, minCutoff, smoothingAmount, kmerCount, k);
                            classifier.Train(trainingData);

                            double score = classifier.ScoreModel(testData);

                            string infoStr = new double[] { k, minCutoff, kmerCount, smoothingAmount, score }.FoldToString("", "", ",");

                            Console.WriteLine(infoStr);
                            if (score > optimalScore)
                            {
                                optimalScore      = score;
                                optimalClassifier = classifier;
                                optimalInfoStr    = infoStr;
                            }
                        }
                    }
                }
            }

            Console.WriteLine("Optimal Classifier:");
            Console.WriteLine(optimalInfoStr);
            Console.WriteLine(optimalClassifier);

            return(optimalClassifier);
        }
Esempio n. 10
0
 //Score a model.  Value returned on [0, 1], where 1 represents a perfectly accurate model and 0 a completely inaccurate model.
 public static double ScoreModel <Ty> (this IFeatureSynthesizer <Ty> synth, DiscreteSeriesDatabase <Ty> testData)
 {
     return(synth.ScoreModel(testData, 1));
 }
Esempio n. 11
0
 public static string SynthesizeLabelFeature <Ty>(this IFeatureSynthesizer <Ty> synth, DiscreteEventSeries <Ty> item)
 {
     return(synth.GetFeatureSchema()[synth.SynthesizeFeatures(item).MaxIndex()]);
 }