public static void Run_Classify() { List <CDataRecord> records = LoadSample(); DataTransformer <CDataRecord> dt = new DataTransformer <CDataRecord>(); dt.DoFeaturesScaling(records); KernelSVM <CDataRecord> algorithm = new KernelSVM <CDataRecord>(); algorithm.C = 20; ((GaussianKernel)algorithm.Kernel).Sigma = 0.01; algorithm.Train(records); Console.WriteLine("SVM(Gaussian Kernel) Model Built!"); for (int i = 0; i < records.Count; i++) { CDataRecord rec = records[i] as CDataRecord; Console.WriteLine("rec: "); for (int j = 0; j < rec.Dimension; ++j) { Console.WriteLine("X[" + j + "] = " + "[" + rec[j] + "] "); } Console.WriteLine("Label: " + "[" + rec.Label + "] "); Console.WriteLine("Predicted Label: " + algorithm.Predict(records[i])); } }
public static void Run_Rank() { List <CDataRecord> records = LoadSample(); DataTransformer <CDataRecord> dt = new DataTransformer <CDataRecord>(); dt.DoFeaturesScaling(records); KernelSVM <CDataRecord> algorithm = new KernelSVM <CDataRecord>(); algorithm.C = 20; //large value, high bias ((GaussianKernel)algorithm.Kernel).Sigma = 0.01; //low value, high bias algorithm.Train(records); Console.WriteLine("SVM (Gaussian Kernel) Model Built!"); for (int i = 0; i < records.Count; i++) { CDataRecord rec = records[i] as CDataRecord; Console.WriteLine("rec: "); for (int j = 0; j < rec.Dimension; ++j) { Console.WriteLine("X[" + j + " = " + rec[j] + "]"); } Console.WriteLine("Label: " + rec.Label); List <KeyValuePair <string, double> > ranks = algorithm.Rank(records, rec); for (int k = 0; k < ranks.Count; ++k) { Console.WriteLine("{0}: score = {1}", ranks[k].Key, ranks[k].Value); } } }
private void ImportData(string X_filepath, string y_filepath) { mTrainingPercent = 100; mTestingPercent = 0; mCrossValidationPercent = 0; lvTabularData.Items.Clear(); lvTabularData.Columns.Clear(); List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet(X_filepath); List <List <int> > y_points = IntDataTableUtil.LoadDataSet(y_filepath); List <CDataRecord> X = new List <CDataRecord>(); for (int i = 0; i < X_points.Count; ++i) { CDataRecord X_i = new CDataRecord(X_points[i].Dimension); for (int j = 0; j < X_points[i].Dimension; ++j) { X_i[j + 1] = X_points[i][j]; } X_i.Label = y_points[i][0].ToString(); X.Add(X_i); } lvTabularData.Columns.Add("#"); for (int i = 0; i < X[0].Dimension; ++i) { lvTabularData.Columns.Add(string.Format("X[{0}]", i)); } lvTabularData.Columns.Add("Y"); lvTabularData.Columns.Add("Data Set"); lvTabularData.Columns.Add("Predicted Y"); int m = X.Count; for (int i = 0; i < m; ++i) { CDataRecord X_i = X[i]; ListViewItem item = new ListViewItem(); item.Text = (lvTabularData.Items.Count + 1).ToString(); for (int j = 0; j < X_i.Dimension; ++j) { item.SubItems.Add(X_i[j].ToString()); } item.SubItems.Add(X_i.Label.ToString()); item.SubItems.Add(X_i.DataSetType.ToString()); item.SubItems.Add(X_i.PredictedLabel.ToString()); item.Tag = X_i; item.ForeColor = Color.Green; lvTabularData.Items.Add(item); } }
private void btnSplitData_Click(object sender, EventArgs e) { FrmDataSplitOption dlg = new FrmDataSplitOption(); dlg.TrainingPercent = mTrainingPercent; dlg.TestingPercent = mTestingPercent; dlg.CrossValidationPercent = mCrossValidationPercent; if (dlg.ShowDialog() == DialogResult.OK) { double training_percent = dlg.TrainingPercent; double testing_percent = dlg.TestingPercent; double crossvalidation_percent = dlg.CrossValidationPercent; double sum = training_percent + testing_percent + crossvalidation_percent; if (sum == 0) { mTrainingPercent = 100; mTestingPercent = 0; mCrossValidationPercent = 0; } else { mTrainingPercent = training_percent * 100 / sum; mTestingPercent = testing_percent * 100 / sum; mCrossValidationPercent = crossvalidation_percent * 100 / sum; } int item_count = lvTabularData.Items.Count; int training_pont = (int)(item_count * mTrainingPercent / 100); int testing_point = (int)(item_count * mTestingPercent / 100) + training_pont; for (int item_index = 0; item_index < item_count; ++item_index) { ListViewItem item = lvTabularData.Items[item_index]; CDataRecord rec = item.Tag as CDataRecord; if (item_index < training_pont) { rec.DataSetType = DataSetTypes.Training; } else if (item_index < testing_point) { rec.DataSetType = DataSetTypes.Testing; } else { rec.DataSetType = DataSetTypes.CrossValidation; } } UpdateTabularDataView(); } }
public void Shuffle(List <CDataRecord> list) { Random rng = new Random(); int n = list.Count; while (n > 1) { n--; int k = rng.Next(n + 1); CDataRecord value = list[k]; list[k] = list[n]; list[n] = value; } }
private List <CDataRecord> LoadDataSet(DataSetTypes dstype) { List <CDataRecord> data_set = new List <CDataRecord>(); foreach (ListViewItem item in lvTabularData.Items) { CDataRecord rec = item.Tag as CDataRecord; if (rec.DataSetType == dstype) { data_set.Add(rec); } } return(data_set); }
private void btnShuffleData_Click(object sender, EventArgs e) { if (lvTabularData.Items.Count == 0) { return; } List <CDataRecord> data_set = new List <CDataRecord>(); foreach (ListViewItem item in lvTabularData.Items) { data_set.Add(item.Tag as CDataRecord); } Shuffle(data_set); int item_count = lvTabularData.Items.Count; for (int k = 0; k < item_count; ++k) { ListViewItem item = lvTabularData.Items[k]; CDataRecord rec = data_set[k]; item.Text = (k + 1).ToString(); int col_index = 1; for (col_index = 1; col_index < rec.Dimension; ++col_index) { item.SubItems[col_index].Text = (rec[col_index].ToString()); } item.SubItems[col_index++].Text = rec.Label.ToString(); item.SubItems[col_index++].Text = rec.DataSetType.ToString(); item.SubItems[col_index++].Text = rec.PredictedLabel.ToString(); if (rec.DataSetType == DataSetTypes.Training) { item.ForeColor = Color.Green; } else if (rec.DataSetType == DataSetTypes.Testing) { item.ForeColor = Color.Red; } else { item.ForeColor = Color.Blue; } item.Tag = rec; } }
public static List <CDataRecord> LoadSample() { XmlDocument doc = new XmlDocument(); doc.Load("database.xml"); List <CDataRecord> records = new List <CDataRecord>(); XmlElement xml_root = doc.DocumentElement; foreach (XmlElement xml_level1 in xml_root.ChildNodes) { if (xml_level1.Name == "record") { string outlook_text = xml_level1.Attributes["outlook"].Value; double outlook = DistributionModel.GetUniform() * 0.333; if (outlook_text == "Overcast") { outlook = 0.333 + DistributionModel.GetUniform() * 0.333; } else if (outlook_text == "Rain") { outlook = 0.666 + DistributionModel.GetUniform() * 0.333; } double temperature = double.Parse(xml_level1.Attributes["temperature"].Value); double humidity = double.Parse(xml_level1.Attributes["humidity"].Value); string windy_text = xml_level1.Attributes["windy"].Value; double windy = windy_text == "true" ? 1 : 0; String class_label = xml_level1.Attributes["class"].Value; CDataRecord rec = new CDataRecord(4); //index must start at 1 rec[1] = outlook; rec[2] = windy; rec[3] = temperature; rec[4] = humidity; rec.Label = class_label; records.Add(rec); } } return(records); }
public override double ComputeCost(List <T> data_set) { int sample_count = data_set.Count; if (sample_count == 0) { return(-1); } int dimension = data_set[0].Dimension; double[,] X = new double[sample_count, dimension]; int[] Y = new int[sample_count]; for (int i = 0; i < sample_count; ++i) { T rec = data_set[i]; for (int d = 0; d < dimension; ++d) { X[i, d] = rec[d]; } } double total_error = 0; foreach (string class_label in mClassFieldLabels) { for (int i = 0; i < sample_count; ++i) { CDataRecord rec = data_set[i] as CDataRecord; Y[i] = rec.Label == class_label ? 1 : 0; } LinearSVMCostFunction f = new LinearSVMCostFunction(X, Y, dimension, sample_count); f.C = mC; double error = f.Evaluate(mTheta[class_label]); total_error += error; } return(total_error); }
private void UpdateTabularDataView() { int item_count = lvTabularData.Items.Count; int training_pont = (int)(item_count * mTrainingPercent / 100); int testing_point = (int)(item_count * mTestingPercent / 100) + training_pont; for (int item_index = 0; item_index < item_count; ++item_index) { ListViewItem item = lvTabularData.Items[item_index]; CDataRecord rec = item.Tag as CDataRecord; item.Text = (item_index + 1).ToString(); int col_index = 1; for (col_index = 0; col_index < rec.Dimension; ++col_index) { item.SubItems[col_index + 1].Text = (rec[col_index].ToString()); } col_index++; item.SubItems[col_index++].Text = rec.Label.ToString(); item.SubItems[col_index++].Text = rec.DataSetType.ToString(); item.SubItems[col_index++].Text = rec.PredictedLabel.ToString(); if (rec.DataSetType == DataSetTypes.Training) { item.ForeColor = Color.Green; } else if (rec.DataSetType == DataSetTypes.Testing) { item.ForeColor = Color.Red; } else { item.ForeColor = Color.Blue; } } }
public static void Run_Classify2() { List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet("X1.txt"); List <List <int> > y_points = IntDataTableUtil.LoadDataSet("y1.txt"); List <CDataRecord> X = new List <CDataRecord>(); for (int i = 0; i < X_points.Count; ++i) { CDataRecord X_i = new CDataRecord(X_points[i].Dimension); for (int j = 0; j < X_points[i].Dimension; ++j) { X_i[i + 1] = X_points[i][j]; //X_i index must start at 1 } X_i.Label = y_points[i][0].ToString(); X.Add(X_i); } LinearSVM <CDataRecord> algorithm = new LinearSVM <CDataRecord>(); algorithm.C = 100; algorithm.Train(X); }