Beispiel #1
0
        static void Main(string[] args)
        {
            Trace.Listeners.Add(new ConsoleTraceListener());

            Trace.TraceInformation("");

            string trainingSetPathConfig = ConfigurationManager.AppSettings["TrainingSetPath"];
            string testingSetPathConfig  = ConfigurationManager.AppSettings["TestingSetPath"];
            string numberOfModelsConfig  = ConfigurationManager.AppSettings["NumberOfModels"];
            string randomSeedConfig      = ConfigurationManager.AppSettings["RandomSeed"];
            int?   randomSeed            = null;

            if (String.IsNullOrWhiteSpace(trainingSetPathConfig))
            {
                trainingSetPathConfig = "TrainingRatings.txt";
                Trace.TraceInformation("TrainingSetPath in config file is not set. Default to {0} in current directory.", trainingSetPathConfig);
            }

            if (String.IsNullOrWhiteSpace(testingSetPathConfig))
            {
                testingSetPathConfig = "TestingRatings.txt";
                Trace.TraceInformation("TestingSetPath in config file is not set. Default to {0} in current directory.", testingSetPathConfig);
            }

            if (String.IsNullOrWhiteSpace(numberOfModelsConfig))
            {
                numberOfModelsConfig = "1";
                Trace.TraceInformation("NumberOfModels in config file is not set. Default to {0}.", numberOfModelsConfig);
            }

            if (String.IsNullOrWhiteSpace(randomSeedConfig))
            {
                randomSeed = null;
                Trace.TraceInformation("RandomSeed in config file is not set. Default to null.");
            }
            else
            {
                randomSeed = Int32.Parse(randomSeedConfig);
            }

            Diabetes.Run(trainingSetPathConfig, testingSetPathConfig, Int32.Parse(numberOfModelsConfig), randomSeed);

            Trace.TraceInformation("");

            if (System.Diagnostics.Debugger.IsAttached)
            {
                Console.WriteLine("Done. Press enter to continue.");
                Console.Read();
            }
        }
Beispiel #2
0
        private static List <double[]> CalculateBucketBoundaries(List <double[]> data)
        {
            // Calculate the bucket boundaries for each attribute except the target attribute
            List <double[]> allBucketBoundaries = new List <double[]>();

            for (int i = 0; i < Diabetes.NumAttributes - 1; i++)
            {
                double[] attributeData    = data.Select(x => x[i]).ToArray();
                double[] bucketBoundaries = Diabetes.BucketBoundaries(Diabetes.NumBuckets, attributeData);
                allBucketBoundaries.Add(bucketBoundaries);
            }

            return(allBucketBoundaries);
        }
Beispiel #3
0
        public static void Run(string trainingSetPath, string testingSetPath, int numberOfModels, int?randomSeed)
        {
            Trace.TraceInformation("Starting bagging");
            Trace.TraceInformation("TrainingSetPath: {0}", trainingSetPath);
            Trace.TraceInformation("TestingSetPath: {0}", testingSetPath);
            Trace.TraceInformation("Models: {0}", numberOfModels);

            Instances trainingInstances = Diabetes.LoadData(trainingSetPath, Mode.Train);
            Bagging   bagging           = new Bagging();

            bagging.Train(trainingInstances, numberOfModels, randomSeed);
            Instances testingInstances = Diabetes.LoadData(testingSetPath, Mode.Test);

            bagging.TestNonBagging(testingInstances);
            bagging.Test(testingInstances);
        }
Beispiel #4
0
        public static void GenerateNaiveBayesInputFiles(string trainingInputFile, string testingInputFile, string trainingOutputFile, string testingOutputFile)
        {
            // We don't want to change the naive bayes implementation for spam detection. So, let target attribute 0 = "ham" and 1 = "spam"

            // Training Data

            Instances trainingInstances    = Diabetes.LoadData(trainingInputFile, Mode.Train, Category.Buckets);
            int       targetAttributeIndex = Diabetes.NumAttributes - 1;

            StringBuilder trainingOutput = new StringBuilder();

            for (int i = 0; i < trainingInstances.numInstances(); i++)
            {
                trainingOutput.AppendFormat("{0} {1}", i, trainingInstances.instance(i).value(targetAttributeIndex) == 0 ? "ham" : "spam");

                for (int j = 0; j < trainingInstances.numAttributes() - 1; j++)
                {
                    trainingOutput.AppendFormat(" {0}_{1} {2}", j, trainingInstances.instance(i).value(j), 1);
                    //trainingOutput.AppendFormat(" {0} {1}", j, trainingInstances.instance(i).value(j));
                }

                trainingOutput.AppendLine();
            }

            File.WriteAllText(trainingOutputFile, trainingOutput.ToString());


            // Testing Data

            Instances     testingInstances = Diabetes.LoadData(testingInputFile, Mode.Test, Category.Buckets);
            StringBuilder testingOutput    = new StringBuilder();

            for (int i = 0; i < testingInstances.numInstances(); i++)
            {
                testingOutput.AppendFormat("{0} {1}", i, testingInstances.instance(i).value(targetAttributeIndex) == 0 ? "ham" : "spam");

                for (int j = 0; j < testingInstances.numAttributes() - 1; j++)
                {
                    testingOutput.AppendFormat(" {0}_{1} {2}", j, testingInstances.instance(i).value(j), 1);
                    //testingOutput.AppendFormat(" {0} {1}", j, testingInstances.instance(i).value(j));
                }

                testingOutput.AppendLine();
            }

            File.WriteAllText(testingOutputFile, testingOutput.ToString());
        }
Beispiel #5
0
        public static Instances LoadData(string filePath, Mode mode, Category category = Category.Buckets)
        {
            Trace.TraceInformation("Loading {0} data from {1}", Enum.GetName(mode.GetType(), mode), filePath);

            List <double[]> data = new List <double[]>();

            // Read in each row
            using (StreamReader sr = File.OpenText(filePath))
            {
                string s = String.Empty;
                while ((s = sr.ReadLine()) != null)
                {
                    string[] parts = s.Split(Diabetes.Delimiter);
                    double[] row   = new double[Diabetes.NumAttributes];
                    for (int i = 0; i < parts.Length; i++)
                    {
                        row[i] = Double.Parse(parts[i]);
                    }
                    data.Add(row);
                }
            }

            // Set the attribute values, add to the Instances object
            Instances instances;

            if (category == Category.Buckets)
            {
                instances = Diabetes.DefineBucketAttributes(Diabetes.NumBuckets);

                // Calculate the bucket boundaries for each attribute except the target attribute
                if (mode == Mode.Train)
                {
                    Diabetes.AllBucketBoundaries = Diabetes.CalculateBucketBoundaries(data);
                }

                // Put each value into a quartile/bucket
                for (int i = 0; i < data.Count; i++)
                {
                    Instance instance = new Instance(Diabetes.NumAttributes);
                    instance.setDataset(instances);

                    for (int j = 0; j < Diabetes.NumAttributes - 1; j++)
                    {
                        // Figure out which quartile/bucket the value should get dropped into
                        double[] bucketBoundaries = Diabetes.AllBucketBoundaries[j];
                        for (int k = 0; k < bucketBoundaries.Length; k++)
                        {
                            double value = data[i][j];
                            if (k == 0 && value <= bucketBoundaries[k])
                            {
                                // bucket k
                                instance.setValue(j, k.ToString());
                                break;
                            }
                            else if (k == bucketBoundaries.Length - 1)
                            {
                                // bucket k+1
                                instance.setValue(j, (k + 1).ToString());
                                break;
                            }
                            else if (value > bucketBoundaries[k] && value <= bucketBoundaries[k + 1])
                            {
                                // bucket k+1
                                instance.setValue(j, (k + 1).ToString());
                                break;
                            }
                        }
                    }

                    Diabetes.ValueForDiabetes(instance, Diabetes.NumAttributes - 1, data[i][Diabetes.NumAttributes - 1]);
                    instances.add(instance);
                }
            }
            else
            {
                instances = Diabetes.DefineCategoricalAttributes();

                foreach (double[] row in data)
                {
                    Instance instance = new Instance(Diabetes.NumAttributes);
                    instance.setDataset(instances);
                    Diabetes.ValueForNumberOfTimesPregnant(instance, 0, row[0]);
                    Diabetes.ValueForPlasmaGlucoseConcentrationt(instance, 1, row[1]);
                    Diabetes.ValueForDiastolicBloodPressure(instance, 2, row[2]);
                    Diabetes.ValueForTricepsSkinFoldThickness(instance, 3, row[3]);
                    Diabetes.ValueForTwoHourSerumInsulin(instance, 4, row[4]);
                    Diabetes.ValueForBmi(instance, 5, row[5]);
                    Diabetes.ValueForDiabetesPedigreeFunction(instance, 6, row[6]);
                    Diabetes.ValueForAge(instance, 7, row[7]);
                    Diabetes.ValueForDiabetes(instance, 8, row[8]);
                    instances.add(instance);
                }
            }

            return(instances);
        }