コード例 #1
0
        public static void TestNewDesign()
        {
            DiscreteSeriesDatabase <string> allData = LoadRegionsDatabase();

            Tuple <DiscreteSeriesDatabase <string>, DiscreteSeriesDatabase <string> > split = allData.SplitDatabase(.8);

            DiscreteSeriesDatabase <string> trainingData = split.Item1;
            DiscreteSeriesDatabase <string> testData     = split.Item2;


            IFeatureSynthesizer <string> synth = new RegressorFeatureSynthesizerKmerFrequenciesVarK <string>("region", 8, 2, 100, 3);

            //IFeatureSynthesizer<string> synth = new RegressorFeatureSynthesizerKmerFrequencies<string>("region", 4, 10, 100, 3);
            //IFeatureSynthesizer<string> synth = new RegressorFeatureSynthesizerFrequencies<string>("region", 4, 10, 100);

            synth.Train(trainingData);

            Console.WriteLine(synth.ToString());
            synth.ScoreModel(testData, 2, "filename");
            Console.WriteLine(ClassifyDataSet(synth, testData, "filename"));              //TODO may be good to use something unspecifiable in the file syntax such as "filename;"


            //Console.WriteLine (allData.DatabaseLatexString("Regional Spanish Database"));
        }
コード例 #2
0
        public static IFeatureSynthesizer <string> deriveOptimalClassifier()
        {
            //Load databases
            DiscreteSeriesDatabase <string> allData = LoadRegionsDatabase();

            Tuple <DiscreteSeriesDatabase <string>, DiscreteSeriesDatabase <string> > split = allData.SplitDatabase(.8);

            DiscreteSeriesDatabase <string> trainingData = split.Item1;
            DiscreteSeriesDatabase <string> testData     = split.Item2;

            string cat = "region";

            double optimalScore = 0;
            IFeatureSynthesizer <string> optimalClassifier = null;
            string optimalInfoStr = null;

            //Preliminary scan

            int[] ks = new int[] { 2, 3, 4 };
            //int[] minCutoffs = new int[]{5, 10, 20};
            int[] minCutoffs       = new int[] { 10 };
            int[] kmerCounts       = new int[] { 10, 25, 50, 100 };
            int[] smoothingAmounts = new int[] { 1, 5, 10 };

            string[] colNames = "k minCutoff kmerCount smoothingAmount score".Split(' ');

            Console.WriteLine(colNames.FoldToString("", "", ","));

            foreach (int k in ks)
            {
                foreach (int minCutoff in minCutoffs)
                {
                    foreach (int kmerCount in kmerCounts)
                    {
                        foreach (int smoothingAmount in smoothingAmounts)
                        {
                            IFeatureSynthesizer <string> classifier = new RegressorFeatureSynthesizerKmerFrequenciesVarK <string>(cat, minCutoff, smoothingAmount, kmerCount, k);
                            classifier.Train(trainingData);

                            double score = classifier.ScoreModel(testData);

                            string infoStr = new double[] { k, minCutoff, kmerCount, smoothingAmount, score }.FoldToString("", "", ",");

                            Console.WriteLine(infoStr);
                            if (score > optimalScore)
                            {
                                optimalScore      = score;
                                optimalClassifier = classifier;
                                optimalInfoStr    = infoStr;
                            }
                        }
                    }
                }
            }

            Console.WriteLine("Optimal Classifier:");
            Console.WriteLine(optimalInfoStr);
            Console.WriteLine(optimalClassifier);

            return(optimalClassifier);
        }