Пример #1
0
        /// <summary>
        /// This assumes that we are not using the dll interface, because we can get CV labels and save much more directly.
        /// </summary>
        /// <param name="doSave"></param>
        /// <param name="path"></param>
        public virtual void SaveModelByLongEval(bool doSave, string path)
        {
            Prepare();
            setFeatures();
            object[] args = getObjArgs();
            object   argsOut;

            OptoGlobals.CreateDirectoryAndThenFile(path);
            OptoGlobals.executor.Feval("longMCSave", 2, out argsOut, doSave, path, getFunctionString(),
                                       _nLearners, Util.TwoDimListToSmoothArray(myTrX), Util.TwoDimListToSmoothArray(myTeX),
                                       OptoGlobals.trainingYIntArray, OptoGlobals.testingYIntArray, myBaseLabels, args);

            object[] parsedArgsOut = (object[])argsOut;
            if (!Multiclass)
            {
                CVGeneratedLabels = ListFromColumnArray((int[, ])parsedArgsOut[0]);
                GeneratedLabels   = ListFromColumnArray((int[, ])parsedArgsOut[1]);
            }
            else
            {
                CVGeneratedLabels = ListFromColumnArray((int[, ])parsedArgsOut[0]);
                GeneratedLabels   = ListFromColumnArray((int[, ])parsedArgsOut[1]);
            }
            NullData();
        }
Пример #2
0
        internal static List <double[]> setFromCollection(List <List <double> > set, List <List <String> > catSet,
                                                          List <List <Boolean> > boolSet)
        {
            List <List <Double> > ret = new List <List <Double> >(set.Count);

            for (int i = 0; i < set.Count; ++i)
            {
                List <Double> x = set[i];
                for (int j = 0; j < catSet[i].Count; ++j)
                {
                    x.Add(CategoryValues[catSet[i][j]]);//Append normed Cats and Bools to the end here and below
                }
                for (int j = 0; j < boolSet[i].Count; ++j)
                {
                    x.Add(boolSet[i][j] ? OptoGlobals.FalseDoubleVal : OptoGlobals.TrueDoubleVal);
                }

                ret.Add(x);
            }
            ret = OptoGlobals.NormalizeArray(ret, OptoGlobals.SqueezedMinMaxNorm, true);
            List <Double[]> realRet = new List <Double[]>();

            foreach (List <Double> r in ret)
            {
                realRet.Add(r.ToArray());
            }
            return(realRet);
        }
Пример #3
0
        private static double[] NormTest()
        {
            double[]              poot = { OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(),
                                           OptoGlobals.RNG.NextDouble(),              OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble() };
            List <Double>         test = new List <double>(poot);
            List <List <Double> > stat = new List <List <double> >();

            for (int i = 0; i < 10; ++i)
            {
                double[]      t    = { 0, 0, 0, 0 };
                List <Double> temp = OptoGlobals.GetStats(poot);
                stat.Add(temp);
            }
            List <Double>         t3       = OptoGlobals.StdDevNorm(test, stat);
            List <List <double> > devStats = new List <List <double> >(10);
            List <Double>         devTemp  = OptoGlobals.GetStats(t3.ToArray());

            for (int i = 0; i < 10; ++i)
            {
                devStats.Add(devTemp);
            }
            List <Double> t2 = OptoGlobals.MinMaxNorm(test, stat);
            List <Double> t4 = OptoGlobals.MinMaxNorm(t3, devStats);

            return(poot);
        }
Пример #4
0
 private void dumpPopulationToFile()
 {
     OptoGlobals.CreateDirectoryAndThenFile(_currentGenPath);
     using (StreamWriter fout = new StreamWriter(File.Create(_currentGenPath)))
     {
         foreach (Optimizer O in Population)
         {
             fout.WriteLine(O.ToString());
         }
     }
 }
Пример #5
0
 public OptimoEvolver(int PopSize, CrossoverType xType, string fileStem, bool loadFromFile = false)
 {
     ReadInCurrentGeneration = loadFromFile;
     CountFeatures           = OptoGlobals.IsDebugMode;
     Console.WriteLine("Count Features is " + (CountFeatures ? "On" : "Off"));
     generation  = 0;
     _cross      = xType;
     _popSize    = PopSize;
     _population = new List <T>(_popSize);
     FileStem    = fileStem;
     for (int i = 0; i < _popSize; ++i)
     {
         _population.Add(new T());
     }
     _outputPath     = FileStem + "OutputTable.csv";
     _currentGenPath = FileStem + "/CurrentGenPop.csv";
     _lookup         = new Dictionary <string, Tuple <Double, Double> >();
     if (File.Exists(_outputPath))
     {
         readLookupFromFile();
     }
     else
     {
         OptoGlobals.CreateDirectoryAndThenFile(_outputPath);
     }
     if (FocusOnAllColumns)
     {
         SetPopToAllCols();
     }
     if (ReadInCurrentGeneration)
     {
         if (File.Exists(_currentGenPath))
         {
             loadPopulationFromFile();
         }
         else
         {
             dumpPopulationToFile();
         }
     }
 }
Пример #6
0
        internal static void readInSpecialTestSet()
        {
            teXPath = "../../../Data/Hackathon/test.csv";

            Object[] tempX = readInDataset(ref xCols, ref xIgnore, xBlacklist, ref BooleanColumns, CategoricalColumns, teXPath, true, false, true, false) as Object[];
            TestingXRaw      = tempX[0] as List <List <Double> >;
            TestingXBools    = tempX[1] as List <List <Boolean> >;
            TestingXCats     = tempX[2] as List <List <String> >;
            NumberOfFeatures = xCols.Count;

            TestingXNormed = NormalizeArray(TestingXRaw, SqueezedMinMaxNorm, false);

            Dictionary <String, List <Double> > meanSumDict = new Dictionary <string, List <double> >();

            gatherCategoricalMeans(TestingXNormed, TestingXCats, meanSumDict);
            meanSumsToCatValues(meanSumDict);

            DaedalusValidationSet = OptoGlobals.setFromCollection(OptoGlobals.TestingXNormed, OptoGlobals.TestingXCats, OptoGlobals.TestingXBools);


            OptoGlobals.TestingXNormed = Util.ListArrayToListList(DaedalusValidationSet);
        }
Пример #7
0
        internal static void ConfigureForDataset(string globalPath)
        {
            bool catBlackList = false, boolBlackList = false;

            Console.WriteLine(Path.GetFullPath(globalPath));
            globalPath = Path.GetFullPath(globalPath);
            using (StreamReader fin = new StreamReader(new BufferedStream(new FileStream(globalPath, FileMode.Open))))
            {
                datasetName = GetNextNonCommentedLine(fin).Trim();

                classNamesPath     = GetNextNonCommentedLine(fin).Trim();
                trXPath            = GetNextNonCommentedLine(fin).Trim();
                trYPath            = GetNextNonCommentedLine(fin).Trim();
                teXPath            = GetNextNonCommentedLine(fin).Trim();
                teYPath            = GetNextNonCommentedLine(fin).Trim();
                CategoricalColumns = new HashSet <int>();
                BooleanColumns     = new HashSet <int>();
                GenerateIgnoreList(GetNextNonCommentedLine(fin).Trim(), ref xIgnore, ref xBlacklist);
                GenerateIgnoreList(GetNextNonCommentedLine(fin).Trim(), ref yIgnore, ref yBlacklist);
                GenerateIgnoreList(GetNextNonCommentedLine(fin).Trim(), ref CategoricalColumns, ref catBlackList);
                GenerateIgnoreList(GetNextNonCommentedLine(fin).Trim(), ref BooleanColumns, ref boolBlackList);

                //TODO:
                //What needs to be in the file?
                //Class definition filepath
                //location of training, testing sets (these give us the number of features etc., almost- need to know which columns to ignore
                //So there should be 2 lists- X ignore Columns, Y ignore Columns (could be for the same file, for that matter)
                //Also, for x ignore and y ignore, an option specifying whether the ignore list is actually an include list (shorter for Y if in master file)
                //
                //Format: one variable per line, except for ignore columns.  So it should go like this:
                ///ignore all lines beginning with #
                ///Dataset Name
                ///Class Names File
                ///DaedalusTrainingSet X Path
                ///DaedalusTrainingSet Y Path
                ///TestingSet  X Path
                ///TestingSet  Y Path
                ///X ignore list, comma separated and starting with w if it's a whitelist (otherwise, blacklist)
                ///Y ignore list, as above
                ///After that, we should be able to refer to the variables generated to do the work.

                ///Now, load the datasets:
            }
            int len;

            using (StreamReader fin = new StreamReader(new BufferedStream(new FileStream(trXPath, FileMode.Open))))
            {
                char[]   tokens    = { ',' };
                string   firstLine = fin.ReadLine();
                string[] headers   = firstLine.Split(tokens, StringSplitOptions.RemoveEmptyEntries);
                len = headers.Length;
            }

            getColumnList(catBlackList, CategoricalColumns, out CategoricalColumns, len);
            getColumnList(boolBlackList, BooleanColumns, out BooleanColumns, len);
            Object[] tempX = readInDataset(ref xCols, ref xIgnore, xBlacklist, ref BooleanColumns, CategoricalColumns, trXPath, catBlackList, boolBlackList, true, false) as Object[];
            TrainingXRaw     = tempX[0] as List <List <Double> >;
            TrainingXBools   = tempX[1] as List <List <Boolean> >;
            TrainingXCats    = tempX[2] as List <List <String> >;
            NumberOfFeatures = xCols.Count;
            TrainingYRaw     = readInDataset(ref yCols, ref yIgnore, yBlacklist, ref BooleanColumns, CategoricalColumns, trYPath, catBlackList, boolBlackList, false, false) as List <List <String> >;
            TrainingYString  = Util.TwoDimListToSmoothArray(TrainingYRaw);
            tempX            = readInDataset(ref xCols, ref xIgnore, xBlacklist, ref BooleanColumns, CategoricalColumns, teXPath, catBlackList, boolBlackList, true, true) as Object[];
            TestingXRaw      = tempX[0] as List <List <Double> >;
            TestingXBools    = tempX[1] as List <List <Boolean> >;
            TestingXCats     = tempX[2] as List <List <String> >;

            TestingYRaw    = readInDataset(ref yCols, ref yIgnore, yBlacklist, ref BooleanColumns, CategoricalColumns, teYPath, catBlackList, boolBlackList, false, true) as List <List <String> >;
            TestingYString = Util.TwoDimListToSmoothArray(TestingYRaw);


            TrainingXNormed = NormalizeArray(TrainingXRaw, SqueezedMinMaxNorm, true);
            TestingXNormed  = NormalizeArray(TestingXRaw, SqueezedMinMaxNorm, false);

            Dictionary <String, List <Double> > meanSumDict = new Dictionary <string, List <double> >();

            gatherCategoricalMeans(TrainingXNormed, TrainingXCats, meanSumDict);
            gatherCategoricalMeans(TestingXNormed, TestingXCats, meanSumDict);
            meanSumsToCatValues(meanSumDict);
            DaedalusTrainingSet = OptoGlobals.setFromCollection(OptoGlobals.TrainingXNormed, OptoGlobals.TrainingXCats, OptoGlobals.TrainingXBools);

            DaedalusValidationSet = OptoGlobals.setFromCollection(OptoGlobals.TestingXNormed, OptoGlobals.TestingXCats, OptoGlobals.TestingXBools);


            OptoGlobals.TrainingXNormed = Util.ListArrayToListList(DaedalusTrainingSet);
            OptoGlobals.TestingXNormed  = Util.ListArrayToListList(DaedalusValidationSet);

            ClassDict = new Dictionary <string, int>();
            ClassList = new List <string>();
            int tempCl = 0;

            //ClassDict is a translator to convert string classes to integers.  ClassList is a list to do the same thing with integers.
            //ClassList[ClassDict["className"]] is will yield "className", if it is in the dictionary.
            //Datasets are loaded... what's next?
            tempCl = buildClassListAndDict(tempCl, TrainingYRaw);//If Training and Testing sets are configured correctly, the next line is pointless.
            buildClassListAndDict(tempCl, TestingYRaw);


            NumberOfClasses = ClassList.Count;

            NumericalColumns = xCols.SetDifference(CategoricalColumns);
            NumericalColumns = NumericalColumns.SetDifference(BooleanColumns);


            testingYIntArray  = intArrayFromStringList(TestingYRaw);
            trainingYIntArray = intArrayFromStringList(TrainingYRaw);


            DaedalusTrainingY   = new List <int>(MyUtils.Util.Flatten2dArray(OptoGlobals.trainingYIntArray));
            DaedalusValidationY = new List <int>(MyUtils.Util.Flatten2dArray(OptoGlobals.testingYIntArray));


            AllPredictorNames = GetPredictorNames(xCols, trXPath);
            if (TrainingXRaw == null || TestingXRaw == null || TrainingYRaw == null || TestingYRaw == null)
            {
                Console.WriteLine("Something went horribly wrong loading data, one or more of the datasets is null.  Could be a bad path.");
                throw new InvalidCastException();
            }
        }
Пример #8
0
        static void Main(string[] args)
        {
            string compTagFile = @"..\..\compTag.txt";

            if (System.IO.File.Exists(compTagFile))
            {
                OptoGlobals.LoadTagFromFile(compTagFile);
            }
            else
            {
                using (System.IO.StreamWriter fout = new System.IO.StreamWriter(compTagFile)){
                    fout.WriteLine("NoTag");
                }
            }

            OptoGlobals.IsDebugMode = false;
            // Create the MATLAB instance
            String GlobalPath = "../../../Data/Hackathon/DataSetConfigSquish.csv";
            int    maxGen = 100, saveAfterGens = 25, popSize = 50, baseCompUB = 10, maxComp = 2000;

            if (args.Length >= 2)
            {
                GlobalPath = args[1];
            }
            for (int i = 2; i < args.Length; ++i)
            {
                switch (args[i].ToLower())
                {
                case "path":
                case "-p":
                    GlobalPath = args[++i];
                    break;

                case "gen":
                case "-g":
                    maxGen = Int32.Parse(args[++i]);
                    break;

                case "save":
                case "-r":
                    saveAfterGens = Int32.Parse(args[++i]);
                    break;

                case "population":
                case "-pop":
                    saveAfterGens = Int32.Parse(args[++i]);
                    break;

                case "compub":
                case "-c":
                    baseCompUB = Int32.Parse(args[++i]);
                    break;

                case "maxcomp":
                case "-m":
                    maxComp = Int32.Parse(args[++i]);
                    break;
                }
            }

            OptoGlobals.ConfigureForDataset(GlobalPath);
            double[] poot = { OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(),
                              OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble() };
            object   pootwrap = poot;
            //Hunter x = new Hunter();
            double   nerp;
            Daedalus D = new Daedalus();

            //x.Vote(pootwrap, out nerp);
            CTreeOptimizer.CTreeOptimizer.RewriteBitLengths();
            EvoOptimizerProgram <CTreeOptimizer.CTreeOptimizer> decisionTreeProgram = new EvoOptimizerProgram <CTreeOptimizer.CTreeOptimizer>();

            decisionTreeProgram.MaxGen         = maxGen;
            decisionTreeProgram.SaveAfterGens  = saveAfterGens;
            decisionTreeProgram.PopSize        = popSize;
            decisionTreeProgram.OutputBaseline = false;
            MulticlassNBOptimizer.MulticlassNBOptimizer.RewriteBits();
            EvoOptimizerProgram <MulticlassNBOptimizer.MulticlassNBOptimizer> naiveBayesProgram = new EvoOptimizerProgram <MulticlassNBOptimizer.MulticlassNBOptimizer>();

            naiveBayesProgram.MaxGen         = maxGen;
            naiveBayesProgram.SaveAfterGens  = saveAfterGens;
            naiveBayesProgram.PopSize        = popSize;
            naiveBayesProgram.OutputBaseline = false;

            //Configure the program here- set things like multi-threading, etc, if desired

            D.MaxGen         = maxGen * 10;
            D.RecordInterval = saveAfterGens;
            D.PopSize        = popSize * 10;
            D.InitialComplexityUpperBound = baseCompUB;
            D.MaxCellComplexity           = maxComp;
            D.ConfigureCellDelegatesForDatabase();

            //System.Threading.Thread t = new System.Threading.Thread(() => D.Run());
            //t.Start();
            //D.Run();

            naiveBayesProgram.ConfigureAndRun();

            decisionTreeProgram.ConfigureAndRun();

            /*MulticlassNBOptimizer.MulticlassNBOptimizer bestNb = new MulticlassNBOptimizer.MulticlassNBOptimizer("1101110111010101001010100000101111000111000010101110101011011111111100011011100");
             * bestNb.Eval();
             *
             * OptoGlobals.readInSpecialTestSet();
             *
             * bestNb.Eval();
             * using(StreamWriter fout = new StreamWriter(new FileStream("mysubmission.csv", FileMode.Create))){
             *  bestNb.DumpLabelsToStream(fout);
             * }
             * SerializationChecks();
             */
        }