/// <summary> /// This assumes that we are not using the dll interface, because we can get CV labels and save much more directly. /// </summary> /// <param name="doSave"></param> /// <param name="path"></param> public virtual void SaveModelByLongEval(bool doSave, string path) { Prepare(); setFeatures(); object[] args = getObjArgs(); object argsOut; OptoGlobals.CreateDirectoryAndThenFile(path); OptoGlobals.executor.Feval("longMCSave", 2, out argsOut, doSave, path, getFunctionString(), _nLearners, Util.TwoDimListToSmoothArray(myTrX), Util.TwoDimListToSmoothArray(myTeX), OptoGlobals.trainingYIntArray, OptoGlobals.testingYIntArray, myBaseLabels, args); object[] parsedArgsOut = (object[])argsOut; if (!Multiclass) { CVGeneratedLabels = ListFromColumnArray((int[, ])parsedArgsOut[0]); GeneratedLabels = ListFromColumnArray((int[, ])parsedArgsOut[1]); } else { CVGeneratedLabels = ListFromColumnArray((int[, ])parsedArgsOut[0]); GeneratedLabels = ListFromColumnArray((int[, ])parsedArgsOut[1]); } NullData(); }
internal static List <double[]> setFromCollection(List <List <double> > set, List <List <String> > catSet, List <List <Boolean> > boolSet) { List <List <Double> > ret = new List <List <Double> >(set.Count); for (int i = 0; i < set.Count; ++i) { List <Double> x = set[i]; for (int j = 0; j < catSet[i].Count; ++j) { x.Add(CategoryValues[catSet[i][j]]);//Append normed Cats and Bools to the end here and below } for (int j = 0; j < boolSet[i].Count; ++j) { x.Add(boolSet[i][j] ? OptoGlobals.FalseDoubleVal : OptoGlobals.TrueDoubleVal); } ret.Add(x); } ret = OptoGlobals.NormalizeArray(ret, OptoGlobals.SqueezedMinMaxNorm, true); List <Double[]> realRet = new List <Double[]>(); foreach (List <Double> r in ret) { realRet.Add(r.ToArray()); } return(realRet); }
private static double[] NormTest() { double[] poot = { OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble() }; List <Double> test = new List <double>(poot); List <List <Double> > stat = new List <List <double> >(); for (int i = 0; i < 10; ++i) { double[] t = { 0, 0, 0, 0 }; List <Double> temp = OptoGlobals.GetStats(poot); stat.Add(temp); } List <Double> t3 = OptoGlobals.StdDevNorm(test, stat); List <List <double> > devStats = new List <List <double> >(10); List <Double> devTemp = OptoGlobals.GetStats(t3.ToArray()); for (int i = 0; i < 10; ++i) { devStats.Add(devTemp); } List <Double> t2 = OptoGlobals.MinMaxNorm(test, stat); List <Double> t4 = OptoGlobals.MinMaxNorm(t3, devStats); return(poot); }
private void dumpPopulationToFile() { OptoGlobals.CreateDirectoryAndThenFile(_currentGenPath); using (StreamWriter fout = new StreamWriter(File.Create(_currentGenPath))) { foreach (Optimizer O in Population) { fout.WriteLine(O.ToString()); } } }
public OptimoEvolver(int PopSize, CrossoverType xType, string fileStem, bool loadFromFile = false) { ReadInCurrentGeneration = loadFromFile; CountFeatures = OptoGlobals.IsDebugMode; Console.WriteLine("Count Features is " + (CountFeatures ? "On" : "Off")); generation = 0; _cross = xType; _popSize = PopSize; _population = new List <T>(_popSize); FileStem = fileStem; for (int i = 0; i < _popSize; ++i) { _population.Add(new T()); } _outputPath = FileStem + "OutputTable.csv"; _currentGenPath = FileStem + "/CurrentGenPop.csv"; _lookup = new Dictionary <string, Tuple <Double, Double> >(); if (File.Exists(_outputPath)) { readLookupFromFile(); } else { OptoGlobals.CreateDirectoryAndThenFile(_outputPath); } if (FocusOnAllColumns) { SetPopToAllCols(); } if (ReadInCurrentGeneration) { if (File.Exists(_currentGenPath)) { loadPopulationFromFile(); } else { dumpPopulationToFile(); } } }
internal static void readInSpecialTestSet() { teXPath = "../../../Data/Hackathon/test.csv"; Object[] tempX = readInDataset(ref xCols, ref xIgnore, xBlacklist, ref BooleanColumns, CategoricalColumns, teXPath, true, false, true, false) as Object[]; TestingXRaw = tempX[0] as List <List <Double> >; TestingXBools = tempX[1] as List <List <Boolean> >; TestingXCats = tempX[2] as List <List <String> >; NumberOfFeatures = xCols.Count; TestingXNormed = NormalizeArray(TestingXRaw, SqueezedMinMaxNorm, false); Dictionary <String, List <Double> > meanSumDict = new Dictionary <string, List <double> >(); gatherCategoricalMeans(TestingXNormed, TestingXCats, meanSumDict); meanSumsToCatValues(meanSumDict); DaedalusValidationSet = OptoGlobals.setFromCollection(OptoGlobals.TestingXNormed, OptoGlobals.TestingXCats, OptoGlobals.TestingXBools); OptoGlobals.TestingXNormed = Util.ListArrayToListList(DaedalusValidationSet); }
internal static void ConfigureForDataset(string globalPath) { bool catBlackList = false, boolBlackList = false; Console.WriteLine(Path.GetFullPath(globalPath)); globalPath = Path.GetFullPath(globalPath); using (StreamReader fin = new StreamReader(new BufferedStream(new FileStream(globalPath, FileMode.Open)))) { datasetName = GetNextNonCommentedLine(fin).Trim(); classNamesPath = GetNextNonCommentedLine(fin).Trim(); trXPath = GetNextNonCommentedLine(fin).Trim(); trYPath = GetNextNonCommentedLine(fin).Trim(); teXPath = GetNextNonCommentedLine(fin).Trim(); teYPath = GetNextNonCommentedLine(fin).Trim(); CategoricalColumns = new HashSet <int>(); BooleanColumns = new HashSet <int>(); GenerateIgnoreList(GetNextNonCommentedLine(fin).Trim(), ref xIgnore, ref xBlacklist); GenerateIgnoreList(GetNextNonCommentedLine(fin).Trim(), ref yIgnore, ref yBlacklist); GenerateIgnoreList(GetNextNonCommentedLine(fin).Trim(), ref CategoricalColumns, ref catBlackList); GenerateIgnoreList(GetNextNonCommentedLine(fin).Trim(), ref BooleanColumns, ref boolBlackList); //TODO: //What needs to be in the file? //Class definition filepath //location of training, testing sets (these give us the number of features etc., almost- need to know which columns to ignore //So there should be 2 lists- X ignore Columns, Y ignore Columns (could be for the same file, for that matter) //Also, for x ignore and y ignore, an option specifying whether the ignore list is actually an include list (shorter for Y if in master file) // //Format: one variable per line, except for ignore columns. So it should go like this: ///ignore all lines beginning with # ///Dataset Name ///Class Names File ///DaedalusTrainingSet X Path ///DaedalusTrainingSet Y Path ///TestingSet X Path ///TestingSet Y Path ///X ignore list, comma separated and starting with w if it's a whitelist (otherwise, blacklist) ///Y ignore list, as above ///After that, we should be able to refer to the variables generated to do the work. ///Now, load the datasets: } int len; using (StreamReader fin = new StreamReader(new BufferedStream(new FileStream(trXPath, FileMode.Open)))) { char[] tokens = { ',' }; string firstLine = fin.ReadLine(); string[] headers = firstLine.Split(tokens, StringSplitOptions.RemoveEmptyEntries); len = headers.Length; } getColumnList(catBlackList, CategoricalColumns, out CategoricalColumns, len); getColumnList(boolBlackList, BooleanColumns, out BooleanColumns, len); Object[] tempX = readInDataset(ref xCols, ref xIgnore, xBlacklist, ref BooleanColumns, CategoricalColumns, trXPath, catBlackList, boolBlackList, true, false) as Object[]; TrainingXRaw = tempX[0] as List <List <Double> >; TrainingXBools = tempX[1] as List <List <Boolean> >; TrainingXCats = tempX[2] as List <List <String> >; NumberOfFeatures = xCols.Count; TrainingYRaw = readInDataset(ref yCols, ref yIgnore, yBlacklist, ref BooleanColumns, CategoricalColumns, trYPath, catBlackList, boolBlackList, false, false) as List <List <String> >; TrainingYString = Util.TwoDimListToSmoothArray(TrainingYRaw); tempX = readInDataset(ref xCols, ref xIgnore, xBlacklist, ref BooleanColumns, CategoricalColumns, teXPath, catBlackList, boolBlackList, true, true) as Object[]; TestingXRaw = tempX[0] as List <List <Double> >; TestingXBools = tempX[1] as List <List <Boolean> >; TestingXCats = tempX[2] as List <List <String> >; TestingYRaw = readInDataset(ref yCols, ref yIgnore, yBlacklist, ref BooleanColumns, CategoricalColumns, teYPath, catBlackList, boolBlackList, false, true) as List <List <String> >; TestingYString = Util.TwoDimListToSmoothArray(TestingYRaw); TrainingXNormed = NormalizeArray(TrainingXRaw, SqueezedMinMaxNorm, true); TestingXNormed = NormalizeArray(TestingXRaw, SqueezedMinMaxNorm, false); Dictionary <String, List <Double> > meanSumDict = new Dictionary <string, List <double> >(); gatherCategoricalMeans(TrainingXNormed, TrainingXCats, meanSumDict); gatherCategoricalMeans(TestingXNormed, TestingXCats, meanSumDict); meanSumsToCatValues(meanSumDict); DaedalusTrainingSet = OptoGlobals.setFromCollection(OptoGlobals.TrainingXNormed, OptoGlobals.TrainingXCats, OptoGlobals.TrainingXBools); DaedalusValidationSet = OptoGlobals.setFromCollection(OptoGlobals.TestingXNormed, OptoGlobals.TestingXCats, OptoGlobals.TestingXBools); OptoGlobals.TrainingXNormed = Util.ListArrayToListList(DaedalusTrainingSet); OptoGlobals.TestingXNormed = Util.ListArrayToListList(DaedalusValidationSet); ClassDict = new Dictionary <string, int>(); ClassList = new List <string>(); int tempCl = 0; //ClassDict is a translator to convert string classes to integers. ClassList is a list to do the same thing with integers. //ClassList[ClassDict["className"]] is will yield "className", if it is in the dictionary. //Datasets are loaded... what's next? tempCl = buildClassListAndDict(tempCl, TrainingYRaw);//If Training and Testing sets are configured correctly, the next line is pointless. buildClassListAndDict(tempCl, TestingYRaw); NumberOfClasses = ClassList.Count; NumericalColumns = xCols.SetDifference(CategoricalColumns); NumericalColumns = NumericalColumns.SetDifference(BooleanColumns); testingYIntArray = intArrayFromStringList(TestingYRaw); trainingYIntArray = intArrayFromStringList(TrainingYRaw); DaedalusTrainingY = new List <int>(MyUtils.Util.Flatten2dArray(OptoGlobals.trainingYIntArray)); DaedalusValidationY = new List <int>(MyUtils.Util.Flatten2dArray(OptoGlobals.testingYIntArray)); AllPredictorNames = GetPredictorNames(xCols, trXPath); if (TrainingXRaw == null || TestingXRaw == null || TrainingYRaw == null || TestingYRaw == null) { Console.WriteLine("Something went horribly wrong loading data, one or more of the datasets is null. Could be a bad path."); throw new InvalidCastException(); } }
static void Main(string[] args) { string compTagFile = @"..\..\compTag.txt"; if (System.IO.File.Exists(compTagFile)) { OptoGlobals.LoadTagFromFile(compTagFile); } else { using (System.IO.StreamWriter fout = new System.IO.StreamWriter(compTagFile)){ fout.WriteLine("NoTag"); } } OptoGlobals.IsDebugMode = false; // Create the MATLAB instance String GlobalPath = "../../../Data/Hackathon/DataSetConfigSquish.csv"; int maxGen = 100, saveAfterGens = 25, popSize = 50, baseCompUB = 10, maxComp = 2000; if (args.Length >= 2) { GlobalPath = args[1]; } for (int i = 2; i < args.Length; ++i) { switch (args[i].ToLower()) { case "path": case "-p": GlobalPath = args[++i]; break; case "gen": case "-g": maxGen = Int32.Parse(args[++i]); break; case "save": case "-r": saveAfterGens = Int32.Parse(args[++i]); break; case "population": case "-pop": saveAfterGens = Int32.Parse(args[++i]); break; case "compub": case "-c": baseCompUB = Int32.Parse(args[++i]); break; case "maxcomp": case "-m": maxComp = Int32.Parse(args[++i]); break; } } OptoGlobals.ConfigureForDataset(GlobalPath); double[] poot = { OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble(), OptoGlobals.RNG.NextDouble() }; object pootwrap = poot; //Hunter x = new Hunter(); double nerp; Daedalus D = new Daedalus(); //x.Vote(pootwrap, out nerp); CTreeOptimizer.CTreeOptimizer.RewriteBitLengths(); EvoOptimizerProgram <CTreeOptimizer.CTreeOptimizer> decisionTreeProgram = new EvoOptimizerProgram <CTreeOptimizer.CTreeOptimizer>(); decisionTreeProgram.MaxGen = maxGen; decisionTreeProgram.SaveAfterGens = saveAfterGens; decisionTreeProgram.PopSize = popSize; decisionTreeProgram.OutputBaseline = false; MulticlassNBOptimizer.MulticlassNBOptimizer.RewriteBits(); EvoOptimizerProgram <MulticlassNBOptimizer.MulticlassNBOptimizer> naiveBayesProgram = new EvoOptimizerProgram <MulticlassNBOptimizer.MulticlassNBOptimizer>(); naiveBayesProgram.MaxGen = maxGen; naiveBayesProgram.SaveAfterGens = saveAfterGens; naiveBayesProgram.PopSize = popSize; naiveBayesProgram.OutputBaseline = false; //Configure the program here- set things like multi-threading, etc, if desired D.MaxGen = maxGen * 10; D.RecordInterval = saveAfterGens; D.PopSize = popSize * 10; D.InitialComplexityUpperBound = baseCompUB; D.MaxCellComplexity = maxComp; D.ConfigureCellDelegatesForDatabase(); //System.Threading.Thread t = new System.Threading.Thread(() => D.Run()); //t.Start(); //D.Run(); naiveBayesProgram.ConfigureAndRun(); decisionTreeProgram.ConfigureAndRun(); /*MulticlassNBOptimizer.MulticlassNBOptimizer bestNb = new MulticlassNBOptimizer.MulticlassNBOptimizer("1101110111010101001010100000101111000111000010101110101011011111111100011011100"); * bestNb.Eval(); * * OptoGlobals.readInSpecialTestSet(); * * bestNb.Eval(); * using(StreamWriter fout = new StreamWriter(new FileStream("mysubmission.csv", FileMode.Create))){ * bestNb.DumpLabelsToStream(fout); * } * SerializationChecks(); */ }