//Helper, score a model on a single element private static double ScoreModelSingle <Ty>(this IFeatureSynthesizer <Ty> synth, Dictionary <string, int> classRanks, DiscreteEventSeries <Ty> item, int verbosity, string nameCategory = null) { int correctClass; if (!classRanks.TryGetValue(item.labels [synth.ClassificationCriterion], out correctClass)) { if (verbosity >= 1) { Console.WriteLine("Classifier does not contain data for " + item.labels [synth.ClassificationCriterion] + ". Skipping this item."); } return(-1); } double[] scores = synth.SynthesizeFeaturesSumToOne(item); if (verbosity >= 2) { string toPrint; if (nameCategory != null) { toPrint = item.labels[nameCategory] + " (" + item.labels [synth.ClassificationCriterion] + ")"; } else { toPrint = item.labels [synth.ClassificationCriterion]; } toPrint += ": " + scores.FoldToString() + " (" + scores [correctClass] + ")"; Console.WriteLine(toPrint); } return(scores [correctClass]); }
public static string ClassifyItem <Ty>(IFeatureSynthesizer <Ty> synth, DiscreteEventSeries <Ty> item, string nameField) { double[] scores = synth.SynthesizeFeaturesSumToOne(item); double max = scores.Max(); //TODO don't report ambiguous cases. return(item.labels[nameField] + ": " + synth.SynthesizeLabelFeature(item) + "" + "(" + max + " confidence)"); }
public static double[] SynthesizeFeaturesSumToOne <Ty>(this IFeatureSynthesizer <Ty> synth, DiscreteEventSeries <Ty> item) { double[] vals = synth.SynthesizeFeatures(item).NormalizeSumInPlace(); //It can happen that all are 0, in which case NaN results. if (Double.IsNaN(vals[0])) { //TODO Higher order function for this! for (int i = 0; i < vals.Length; i++) { vals[i] = 1.0 / vals.Length; } } return(vals); }
public static double ScoreModelType <Ty>(IEnumerable <string> categoryLabels, Func <string, IFeatureSynthesizer <Ty> > modelGenerator, DiscreteSeriesDatabase <Ty> trainingData, DiscreteSeriesDatabase <Ty> testData) { double sumScore = 0; int count = 0; foreach (string categoryLabel in categoryLabels) { //Train a model for this category label. IFeatureSynthesizer <Ty> model = modelGenerator(categoryLabel); model.Train(trainingData); sumScore += model.ScoreModel(testData); } return(sumScore / count); }
public static double ScoreModel <Ty> (this IFeatureSynthesizer <Ty> synth, DiscreteSeriesDatabase <Ty> testData, int verbosity, string nameCategory = null) { Dictionary <string, int> classRanks = synth.GetFeatureSchema().Select((item, index) => new Tuple <string, int> (item, index)).ToDictionary(a => a.Item1, a => a.Item2); //Display schema if (verbosity >= 2) { Console.WriteLine(synth.GetFeatureSchema().FoldToString()); } double score = testData.data.AsParallel() .Where(item => classRanks.ContainsKey(item.labels.GetWithDefault(synth.ClassificationCriterion, ""))) //Filter for items for which we have regressors for. .Select(i => ScoreModelSingle(synth, classRanks, i, verbosity, nameCategory)).Average(); //Score them and take the average. if (verbosity >= 2) { Console.WriteLine("Total Score = " + score); Console.WriteLine("E[random model score] = " + (1.0 / classRanks.Count)); } return(score); }
public ZScoreNomalizerSynthesizerWrapper(IFeatureSynthesizer <Ty> synth) { this.synth = synth; }
public SeriesFeatureSynthesizerToVectorProbabalisticClassifierEventSeriesProbabalisticClassifier(IFeatureSynthesizer <Ty> synthesizer, IProbabalisticClassifier classifier) { this.synthesizer = synthesizer; this.classifier = classifier; }
//CLASSIFICATION: public static string ClassifyDataSet <Ty>(IFeatureSynthesizer <Ty> synth, DiscreteSeriesDatabase <Ty> db, string nameField) { return(db.data.AsParallel().Select(item => ClassifyItem(synth, item, nameField)).FoldToString()); }
public static IFeatureSynthesizer <string> deriveOptimalClassifier() { //Load databases DiscreteSeriesDatabase <string> allData = LoadRegionsDatabase(); Tuple <DiscreteSeriesDatabase <string>, DiscreteSeriesDatabase <string> > split = allData.SplitDatabase(.8); DiscreteSeriesDatabase <string> trainingData = split.Item1; DiscreteSeriesDatabase <string> testData = split.Item2; string cat = "region"; double optimalScore = 0; IFeatureSynthesizer <string> optimalClassifier = null; string optimalInfoStr = null; //Preliminary scan int[] ks = new int[] { 2, 3, 4 }; //int[] minCutoffs = new int[]{5, 10, 20}; int[] minCutoffs = new int[] { 10 }; int[] kmerCounts = new int[] { 10, 25, 50, 100 }; int[] smoothingAmounts = new int[] { 1, 5, 10 }; string[] colNames = "k minCutoff kmerCount smoothingAmount score".Split(' '); Console.WriteLine(colNames.FoldToString("", "", ",")); foreach (int k in ks) { foreach (int minCutoff in minCutoffs) { foreach (int kmerCount in kmerCounts) { foreach (int smoothingAmount in smoothingAmounts) { IFeatureSynthesizer <string> classifier = new RegressorFeatureSynthesizerKmerFrequenciesVarK <string>(cat, minCutoff, smoothingAmount, kmerCount, k); classifier.Train(trainingData); double score = classifier.ScoreModel(testData); string infoStr = new double[] { k, minCutoff, kmerCount, smoothingAmount, score }.FoldToString("", "", ","); Console.WriteLine(infoStr); if (score > optimalScore) { optimalScore = score; optimalClassifier = classifier; optimalInfoStr = infoStr; } } } } } Console.WriteLine("Optimal Classifier:"); Console.WriteLine(optimalInfoStr); Console.WriteLine(optimalClassifier); return(optimalClassifier); }
//Score a model. Value returned on [0, 1], where 1 represents a perfectly accurate model and 0 a completely inaccurate model. public static double ScoreModel <Ty> (this IFeatureSynthesizer <Ty> synth, DiscreteSeriesDatabase <Ty> testData) { return(synth.ScoreModel(testData, 1)); }
public static string SynthesizeLabelFeature <Ty>(this IFeatureSynthesizer <Ty> synth, DiscreteEventSeries <Ty> item) { return(synth.GetFeatureSchema()[synth.SynthesizeFeatures(item).MaxIndex()]); }