static void Main(string[] args) { Trace.Listeners.Add(new ConsoleTraceListener()); Trace.TraceInformation(""); string trainingSetPathConfig = ConfigurationManager.AppSettings["TrainingSetPath"]; string testingSetPathConfig = ConfigurationManager.AppSettings["TestingSetPath"]; string numberOfModelsConfig = ConfigurationManager.AppSettings["NumberOfModels"]; string randomSeedConfig = ConfigurationManager.AppSettings["RandomSeed"]; int? randomSeed = null; if (String.IsNullOrWhiteSpace(trainingSetPathConfig)) { trainingSetPathConfig = "TrainingRatings.txt"; Trace.TraceInformation("TrainingSetPath in config file is not set. Default to {0} in current directory.", trainingSetPathConfig); } if (String.IsNullOrWhiteSpace(testingSetPathConfig)) { testingSetPathConfig = "TestingRatings.txt"; Trace.TraceInformation("TestingSetPath in config file is not set. Default to {0} in current directory.", testingSetPathConfig); } if (String.IsNullOrWhiteSpace(numberOfModelsConfig)) { numberOfModelsConfig = "1"; Trace.TraceInformation("NumberOfModels in config file is not set. Default to {0}.", numberOfModelsConfig); } if (String.IsNullOrWhiteSpace(randomSeedConfig)) { randomSeed = null; Trace.TraceInformation("RandomSeed in config file is not set. Default to null."); } else { randomSeed = Int32.Parse(randomSeedConfig); } Diabetes.Run(trainingSetPathConfig, testingSetPathConfig, Int32.Parse(numberOfModelsConfig), randomSeed); Trace.TraceInformation(""); if (System.Diagnostics.Debugger.IsAttached) { Console.WriteLine("Done. Press enter to continue."); Console.Read(); } }
private static List <double[]> CalculateBucketBoundaries(List <double[]> data) { // Calculate the bucket boundaries for each attribute except the target attribute List <double[]> allBucketBoundaries = new List <double[]>(); for (int i = 0; i < Diabetes.NumAttributes - 1; i++) { double[] attributeData = data.Select(x => x[i]).ToArray(); double[] bucketBoundaries = Diabetes.BucketBoundaries(Diabetes.NumBuckets, attributeData); allBucketBoundaries.Add(bucketBoundaries); } return(allBucketBoundaries); }
public static void Run(string trainingSetPath, string testingSetPath, int numberOfModels, int?randomSeed) { Trace.TraceInformation("Starting bagging"); Trace.TraceInformation("TrainingSetPath: {0}", trainingSetPath); Trace.TraceInformation("TestingSetPath: {0}", testingSetPath); Trace.TraceInformation("Models: {0}", numberOfModels); Instances trainingInstances = Diabetes.LoadData(trainingSetPath, Mode.Train); Bagging bagging = new Bagging(); bagging.Train(trainingInstances, numberOfModels, randomSeed); Instances testingInstances = Diabetes.LoadData(testingSetPath, Mode.Test); bagging.TestNonBagging(testingInstances); bagging.Test(testingInstances); }
public static void GenerateNaiveBayesInputFiles(string trainingInputFile, string testingInputFile, string trainingOutputFile, string testingOutputFile) { // We don't want to change the naive bayes implementation for spam detection. So, let target attribute 0 = "ham" and 1 = "spam" // Training Data Instances trainingInstances = Diabetes.LoadData(trainingInputFile, Mode.Train, Category.Buckets); int targetAttributeIndex = Diabetes.NumAttributes - 1; StringBuilder trainingOutput = new StringBuilder(); for (int i = 0; i < trainingInstances.numInstances(); i++) { trainingOutput.AppendFormat("{0} {1}", i, trainingInstances.instance(i).value(targetAttributeIndex) == 0 ? "ham" : "spam"); for (int j = 0; j < trainingInstances.numAttributes() - 1; j++) { trainingOutput.AppendFormat(" {0}_{1} {2}", j, trainingInstances.instance(i).value(j), 1); //trainingOutput.AppendFormat(" {0} {1}", j, trainingInstances.instance(i).value(j)); } trainingOutput.AppendLine(); } File.WriteAllText(trainingOutputFile, trainingOutput.ToString()); // Testing Data Instances testingInstances = Diabetes.LoadData(testingInputFile, Mode.Test, Category.Buckets); StringBuilder testingOutput = new StringBuilder(); for (int i = 0; i < testingInstances.numInstances(); i++) { testingOutput.AppendFormat("{0} {1}", i, testingInstances.instance(i).value(targetAttributeIndex) == 0 ? "ham" : "spam"); for (int j = 0; j < testingInstances.numAttributes() - 1; j++) { testingOutput.AppendFormat(" {0}_{1} {2}", j, testingInstances.instance(i).value(j), 1); //testingOutput.AppendFormat(" {0} {1}", j, testingInstances.instance(i).value(j)); } testingOutput.AppendLine(); } File.WriteAllText(testingOutputFile, testingOutput.ToString()); }
public static Instances LoadData(string filePath, Mode mode, Category category = Category.Buckets) { Trace.TraceInformation("Loading {0} data from {1}", Enum.GetName(mode.GetType(), mode), filePath); List <double[]> data = new List <double[]>(); // Read in each row using (StreamReader sr = File.OpenText(filePath)) { string s = String.Empty; while ((s = sr.ReadLine()) != null) { string[] parts = s.Split(Diabetes.Delimiter); double[] row = new double[Diabetes.NumAttributes]; for (int i = 0; i < parts.Length; i++) { row[i] = Double.Parse(parts[i]); } data.Add(row); } } // Set the attribute values, add to the Instances object Instances instances; if (category == Category.Buckets) { instances = Diabetes.DefineBucketAttributes(Diabetes.NumBuckets); // Calculate the bucket boundaries for each attribute except the target attribute if (mode == Mode.Train) { Diabetes.AllBucketBoundaries = Diabetes.CalculateBucketBoundaries(data); } // Put each value into a quartile/bucket for (int i = 0; i < data.Count; i++) { Instance instance = new Instance(Diabetes.NumAttributes); instance.setDataset(instances); for (int j = 0; j < Diabetes.NumAttributes - 1; j++) { // Figure out which quartile/bucket the value should get dropped into double[] bucketBoundaries = Diabetes.AllBucketBoundaries[j]; for (int k = 0; k < bucketBoundaries.Length; k++) { double value = data[i][j]; if (k == 0 && value <= bucketBoundaries[k]) { // bucket k instance.setValue(j, k.ToString()); break; } else if (k == bucketBoundaries.Length - 1) { // bucket k+1 instance.setValue(j, (k + 1).ToString()); break; } else if (value > bucketBoundaries[k] && value <= bucketBoundaries[k + 1]) { // bucket k+1 instance.setValue(j, (k + 1).ToString()); break; } } } Diabetes.ValueForDiabetes(instance, Diabetes.NumAttributes - 1, data[i][Diabetes.NumAttributes - 1]); instances.add(instance); } } else { instances = Diabetes.DefineCategoricalAttributes(); foreach (double[] row in data) { Instance instance = new Instance(Diabetes.NumAttributes); instance.setDataset(instances); Diabetes.ValueForNumberOfTimesPregnant(instance, 0, row[0]); Diabetes.ValueForPlasmaGlucoseConcentrationt(instance, 1, row[1]); Diabetes.ValueForDiastolicBloodPressure(instance, 2, row[2]); Diabetes.ValueForTricepsSkinFoldThickness(instance, 3, row[3]); Diabetes.ValueForTwoHourSerumInsulin(instance, 4, row[4]); Diabetes.ValueForBmi(instance, 5, row[5]); Diabetes.ValueForDiabetesPedigreeFunction(instance, 6, row[6]); Diabetes.ValueForAge(instance, 7, row[7]); Diabetes.ValueForDiabetes(instance, 8, row[8]); instances.add(instance); } } return(instances); }