/// <summary> /// Test cross validation /// </summary> public static void Test_CalcProbability_Xval(int data_set_index) { List <MLDataPoint> X = MLDataPointUtil.LoadDataSet(string.Format("X{0}.txt", data_set_index)); List <MLDataPoint> Xval = MLDataPointUtil.LoadDataSet(string.Format("Xval{0}.txt", data_set_index)); MultiVariateGaussianDistributionAD <MLDataPoint> algorithm = new MultiVariateGaussianDistributionAD <MLDataPoint>(); algorithm.ComputeGaussianDistribution(X); List <MLDataPoint> correct_pval = MLDataPointUtil.LoadDataSet(string.Format("pval{0}.txt", data_set_index)); int row_count = Xval.Count; double total_error = 0; for (int i = 0; i < row_count; ++i) { double pval = algorithm.CalcProbability(Xval[i]); double error = System.Math.Abs(correct_pval[i][0] - pval); total_error += error; Console.WriteLine("pval={0} correct_pval={1}", pval, correct_pval[i]); } Console.WriteLine("Total error: {0}", total_error); }
public static void Test_FindOutliers(int data_set_index) { double threshold, F1Score; SelectThreshold(data_set_index, out F1Score, out threshold); List <MLDataPoint> X = MLDataPointUtil.LoadDataSet(string.Format("X{0}.txt", data_set_index)); List <MLDataPoint> correct_outliers = MLDataPointUtil.LoadDataSet(string.Format("outliers{0}.txt", data_set_index)); Console.WriteLine("Correct Outliers:"); for (int i = 0; i < correct_outliers.Count; ++i) { Console.WriteLine("{0}", correct_outliers[i][0]); } MultiVariateGaussianDistributionAD <MLDataPoint> algorithm = new MultiVariateGaussianDistributionAD <MLDataPoint>(); algorithm.ComputeGaussianDistribution(X); List <int> outliers = algorithm.FindOutliers(X, threshold); Console.WriteLine("Predict Outliers:"); for (int i = 0; i < outliers.Count; ++i) { Console.WriteLine("{0}", outliers[i] + 1); } Console.WriteLine("Point Count: {0}", X.Count); Console.WriteLine("Threshold: {0}", threshold); Console.WriteLine("Predict Outliers Count; {0} Correct Outliers Count: {1}", outliers.Count, correct_outliers.Count); }
public static void Test_LoadDataSet() { List <MLDataPoint> X1 = MLDataPointUtil.LoadDataSet("X1.txt"); List <MLDataPoint> Xval1 = MLDataPointUtil.LoadDataSet("Xval1.txt"); List <MLDataPoint> yval1 = MLDataPointUtil.LoadDataSet("yval1.txt"); List <MLDataPoint> X2 = MLDataPointUtil.LoadDataSet("X2.txt"); List <MLDataPoint> Xval2 = MLDataPointUtil.LoadDataSet("Xval2.txt"); List <MLDataPoint> yval2 = MLDataPointUtil.LoadDataSet("yval2.txt"); int row_count, col_count; MLDataPointUtil.GetSize(X1, out row_count, out col_count); Console.WriteLine("size(X1)=[{0} {1}]", row_count, col_count); MLDataPointUtil.GetSize(Xval1, out row_count, out col_count); Console.WriteLine("size(Xval1)=[{0} {1}]", row_count, col_count); MLDataPointUtil.GetSize(yval1, out row_count, out col_count); Console.WriteLine("size(yval1)=[{0} {1}]", row_count, col_count); MLDataPointUtil.GetSize(X2, out row_count, out col_count); Console.WriteLine("size(X2)=[{0} {1}]", row_count, col_count); MLDataPointUtil.GetSize(Xval2, out row_count, out col_count); Console.WriteLine("size(Xval2)=[{0} {1}]", row_count, col_count); MLDataPointUtil.GetSize(yval2, out row_count, out col_count); Console.WriteLine("size(yval2)=[{0} {1}]", row_count, col_count); }
private void ImportData(string X_filepath, string y_filepath) { mTrainingPercent = 100; mTestingPercent = 0; mCrossValidationPercent = 0; lvTabularData.Items.Clear(); lvTabularData.Columns.Clear(); List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet(X_filepath); List <List <int> > y_points = IntDataTableUtil.LoadDataSet(y_filepath); List <CDataRecord> X = new List <CDataRecord>(); for (int i = 0; i < X_points.Count; ++i) { CDataRecord X_i = new CDataRecord(X_points[i].Dimension); for (int j = 0; j < X_points[i].Dimension; ++j) { X_i[j + 1] = X_points[i][j]; } X_i.Label = y_points[i][0].ToString(); X.Add(X_i); } lvTabularData.Columns.Add("#"); for (int i = 0; i < X[0].Dimension; ++i) { lvTabularData.Columns.Add(string.Format("X[{0}]", i)); } lvTabularData.Columns.Add("Y"); lvTabularData.Columns.Add("Data Set"); lvTabularData.Columns.Add("Predicted Y"); int m = X.Count; for (int i = 0; i < m; ++i) { CDataRecord X_i = X[i]; ListViewItem item = new ListViewItem(); item.Text = (lvTabularData.Items.Count + 1).ToString(); for (int j = 0; j < X_i.Dimension; ++j) { item.SubItems.Add(X_i[j].ToString()); } item.SubItems.Add(X_i.Label.ToString()); item.SubItems.Add(X_i.DataSetType.ToString()); item.SubItems.Add(X_i.PredictedLabel.ToString()); item.Tag = X_i; item.ForeColor = Color.Green; lvTabularData.Items.Add(item); } }
protected static void SelectThreshold(int data_set_index, out double F1Score, out double threshold) { List <MLDataPoint> X = MLDataPointUtil.LoadDataSet(string.Format("X{0}.txt", data_set_index)); List <MLDataPoint> Xval = MLDataPointUtil.LoadDataSet(string.Format("Xval{0}.txt", data_set_index)); List <MLDataPoint> yval_temp = MLDataPointUtil.LoadDataSet(string.Format("yval{0}.txt", data_set_index)); bool[] yval = new bool[yval_temp.Count]; int row_count = yval_temp.Count; for (int i = 0; i < row_count; ++i) { yval[i] = yval_temp[i][0] > 0.5; } MultiVariateGaussianDistributionAD <MLDataPoint> algorithm = new MultiVariateGaussianDistributionAD <MLDataPoint>(); threshold = algorithm.SelectThreshold(X, Xval, yval, out F1Score); }
public static void Run_Classify2() { List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet("X1.txt"); List <List <int> > y_points = IntDataTableUtil.LoadDataSet("y1.txt"); List <CDataRecord> X = new List <CDataRecord>(); for (int i = 0; i < X_points.Count; ++i) { CDataRecord X_i = new CDataRecord(X_points[i].Dimension); for (int j = 0; j < X_points[i].Dimension; ++j) { X_i[i + 1] = X_points[i][j]; //X_i index must start at 1 } X_i.Label = y_points[i][0].ToString(); X.Add(X_i); } LinearSVM <CDataRecord> algorithm = new LinearSVM <CDataRecord>(); algorithm.C = 100; algorithm.Train(X); }