/// <summary>
        /// Test cross validation
        /// </summary>
        public static void Test_CalcProbability_Xval(int data_set_index)
        {
            List <MLDataPoint> X    = MLDataPointUtil.LoadDataSet(string.Format("X{0}.txt", data_set_index));
            List <MLDataPoint> Xval = MLDataPointUtil.LoadDataSet(string.Format("Xval{0}.txt", data_set_index));

            MultiVariateGaussianDistributionAD <MLDataPoint> algorithm = new MultiVariateGaussianDistributionAD <MLDataPoint>();

            algorithm.ComputeGaussianDistribution(X);

            List <MLDataPoint> correct_pval = MLDataPointUtil.LoadDataSet(string.Format("pval{0}.txt", data_set_index));

            int row_count = Xval.Count;

            double total_error = 0;

            for (int i = 0; i < row_count; ++i)
            {
                double pval  = algorithm.CalcProbability(Xval[i]);
                double error = System.Math.Abs(correct_pval[i][0] - pval);
                total_error += error;
                Console.WriteLine("pval={0} correct_pval={1}", pval, correct_pval[i]);
            }

            Console.WriteLine("Total error: {0}", total_error);
        }
        public static void Test_FindOutliers(int data_set_index)
        {
            double threshold, F1Score;

            SelectThreshold(data_set_index, out F1Score, out threshold);

            List <MLDataPoint> X = MLDataPointUtil.LoadDataSet(string.Format("X{0}.txt", data_set_index));
            List <MLDataPoint> correct_outliers = MLDataPointUtil.LoadDataSet(string.Format("outliers{0}.txt", data_set_index));

            Console.WriteLine("Correct Outliers:");
            for (int i = 0; i < correct_outliers.Count; ++i)
            {
                Console.WriteLine("{0}", correct_outliers[i][0]);
            }

            MultiVariateGaussianDistributionAD <MLDataPoint> algorithm = new MultiVariateGaussianDistributionAD <MLDataPoint>();

            algorithm.ComputeGaussianDistribution(X);
            List <int> outliers = algorithm.FindOutliers(X, threshold);

            Console.WriteLine("Predict Outliers:");
            for (int i = 0; i < outliers.Count; ++i)
            {
                Console.WriteLine("{0}", outliers[i] + 1);
            }

            Console.WriteLine("Point Count: {0}", X.Count);

            Console.WriteLine("Threshold: {0}", threshold);

            Console.WriteLine("Predict Outliers Count; {0} Correct Outliers Count: {1}", outliers.Count, correct_outliers.Count);
        }
        public static void Test_LoadDataSet()
        {
            List <MLDataPoint> X1    = MLDataPointUtil.LoadDataSet("X1.txt");
            List <MLDataPoint> Xval1 = MLDataPointUtil.LoadDataSet("Xval1.txt");
            List <MLDataPoint> yval1 = MLDataPointUtil.LoadDataSet("yval1.txt");

            List <MLDataPoint> X2    = MLDataPointUtil.LoadDataSet("X2.txt");
            List <MLDataPoint> Xval2 = MLDataPointUtil.LoadDataSet("Xval2.txt");
            List <MLDataPoint> yval2 = MLDataPointUtil.LoadDataSet("yval2.txt");

            int row_count, col_count;

            MLDataPointUtil.GetSize(X1, out row_count, out col_count);
            Console.WriteLine("size(X1)=[{0} {1}]", row_count, col_count);

            MLDataPointUtil.GetSize(Xval1, out row_count, out col_count);
            Console.WriteLine("size(Xval1)=[{0} {1}]", row_count, col_count);

            MLDataPointUtil.GetSize(yval1, out row_count, out col_count);
            Console.WriteLine("size(yval1)=[{0} {1}]", row_count, col_count);

            MLDataPointUtil.GetSize(X2, out row_count, out col_count);
            Console.WriteLine("size(X2)=[{0} {1}]", row_count, col_count);

            MLDataPointUtil.GetSize(Xval2, out row_count, out col_count);
            Console.WriteLine("size(Xval2)=[{0} {1}]", row_count, col_count);

            MLDataPointUtil.GetSize(yval2, out row_count, out col_count);
            Console.WriteLine("size(yval2)=[{0} {1}]", row_count, col_count);
        }
예제 #4
0
        private void ImportData(string X_filepath, string y_filepath)
        {
            mTrainingPercent        = 100;
            mTestingPercent         = 0;
            mCrossValidationPercent = 0;

            lvTabularData.Items.Clear();
            lvTabularData.Columns.Clear();

            List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet(X_filepath);
            List <List <int> > y_points = IntDataTableUtil.LoadDataSet(y_filepath);

            List <CDataRecord> X = new List <CDataRecord>();

            for (int i = 0; i < X_points.Count; ++i)
            {
                CDataRecord X_i = new CDataRecord(X_points[i].Dimension);
                for (int j = 0; j < X_points[i].Dimension; ++j)
                {
                    X_i[j + 1] = X_points[i][j];
                }
                X_i.Label = y_points[i][0].ToString();
                X.Add(X_i);
            }

            lvTabularData.Columns.Add("#");
            for (int i = 0; i < X[0].Dimension; ++i)
            {
                lvTabularData.Columns.Add(string.Format("X[{0}]", i));
            }
            lvTabularData.Columns.Add("Y");
            lvTabularData.Columns.Add("Data Set");
            lvTabularData.Columns.Add("Predicted Y");

            int m = X.Count;

            for (int i = 0; i < m; ++i)
            {
                CDataRecord X_i = X[i];

                ListViewItem item = new ListViewItem();
                item.Text = (lvTabularData.Items.Count + 1).ToString();
                for (int j = 0; j < X_i.Dimension; ++j)
                {
                    item.SubItems.Add(X_i[j].ToString());
                }

                item.SubItems.Add(X_i.Label.ToString());

                item.SubItems.Add(X_i.DataSetType.ToString());
                item.SubItems.Add(X_i.PredictedLabel.ToString());
                item.Tag = X_i;

                item.ForeColor = Color.Green;
                lvTabularData.Items.Add(item);
            }
        }
        protected static void SelectThreshold(int data_set_index, out double F1Score, out double threshold)
        {
            List <MLDataPoint> X         = MLDataPointUtil.LoadDataSet(string.Format("X{0}.txt", data_set_index));
            List <MLDataPoint> Xval      = MLDataPointUtil.LoadDataSet(string.Format("Xval{0}.txt", data_set_index));
            List <MLDataPoint> yval_temp = MLDataPointUtil.LoadDataSet(string.Format("yval{0}.txt", data_set_index));

            bool[] yval = new bool[yval_temp.Count];

            int row_count = yval_temp.Count;

            for (int i = 0; i < row_count; ++i)
            {
                yval[i] = yval_temp[i][0] > 0.5;
            }

            MultiVariateGaussianDistributionAD <MLDataPoint> algorithm = new MultiVariateGaussianDistributionAD <MLDataPoint>();

            threshold = algorithm.SelectThreshold(X, Xval, yval, out F1Score);
        }
예제 #6
0
        public static void Run_Classify2()
        {
            List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet("X1.txt");
            List <List <int> > y_points = IntDataTableUtil.LoadDataSet("y1.txt");

            List <CDataRecord> X = new List <CDataRecord>();

            for (int i = 0; i < X_points.Count; ++i)
            {
                CDataRecord X_i = new CDataRecord(X_points[i].Dimension);
                for (int j = 0; j < X_points[i].Dimension; ++j)
                {
                    X_i[i + 1] = X_points[i][j]; //X_i index must start at 1
                }
                X_i.Label = y_points[i][0].ToString();
                X.Add(X_i);
            }

            LinearSVM <CDataRecord> algorithm = new LinearSVM <CDataRecord>();

            algorithm.C = 100;

            algorithm.Train(X);
        }