public static void TestNewDesign() { DiscreteSeriesDatabase <string> allData = LoadRegionsDatabase(); Tuple <DiscreteSeriesDatabase <string>, DiscreteSeriesDatabase <string> > split = allData.SplitDatabase(.8); DiscreteSeriesDatabase <string> trainingData = split.Item1; DiscreteSeriesDatabase <string> testData = split.Item2; IFeatureSynthesizer <string> synth = new RegressorFeatureSynthesizerKmerFrequenciesVarK <string>("region", 8, 2, 100, 3); //IFeatureSynthesizer<string> synth = new RegressorFeatureSynthesizerKmerFrequencies<string>("region", 4, 10, 100, 3); //IFeatureSynthesizer<string> synth = new RegressorFeatureSynthesizerFrequencies<string>("region", 4, 10, 100); synth.Train(trainingData); Console.WriteLine(synth.ToString()); synth.ScoreModel(testData, 2, "filename"); Console.WriteLine(ClassifyDataSet(synth, testData, "filename")); //TODO may be good to use something unspecifiable in the file syntax such as "filename;" //Console.WriteLine (allData.DatabaseLatexString("Regional Spanish Database")); }
public static IFeatureSynthesizer <string> deriveOptimalClassifier() { //Load databases DiscreteSeriesDatabase <string> allData = LoadRegionsDatabase(); Tuple <DiscreteSeriesDatabase <string>, DiscreteSeriesDatabase <string> > split = allData.SplitDatabase(.8); DiscreteSeriesDatabase <string> trainingData = split.Item1; DiscreteSeriesDatabase <string> testData = split.Item2; string cat = "region"; double optimalScore = 0; IFeatureSynthesizer <string> optimalClassifier = null; string optimalInfoStr = null; //Preliminary scan int[] ks = new int[] { 2, 3, 4 }; //int[] minCutoffs = new int[]{5, 10, 20}; int[] minCutoffs = new int[] { 10 }; int[] kmerCounts = new int[] { 10, 25, 50, 100 }; int[] smoothingAmounts = new int[] { 1, 5, 10 }; string[] colNames = "k minCutoff kmerCount smoothingAmount score".Split(' '); Console.WriteLine(colNames.FoldToString("", "", ",")); foreach (int k in ks) { foreach (int minCutoff in minCutoffs) { foreach (int kmerCount in kmerCounts) { foreach (int smoothingAmount in smoothingAmounts) { IFeatureSynthesizer <string> classifier = new RegressorFeatureSynthesizerKmerFrequenciesVarK <string>(cat, minCutoff, smoothingAmount, kmerCount, k); classifier.Train(trainingData); double score = classifier.ScoreModel(testData); string infoStr = new double[] { k, minCutoff, kmerCount, smoothingAmount, score }.FoldToString("", "", ","); Console.WriteLine(infoStr); if (score > optimalScore) { optimalScore = score; optimalClassifier = classifier; optimalInfoStr = infoStr; } } } } } Console.WriteLine("Optimal Classifier:"); Console.WriteLine(optimalInfoStr); Console.WriteLine(optimalClassifier); return(optimalClassifier); }