示例#1
0
        public static void Run_Classify()
        {
            List <CDataRecord> records = LoadSample();

            DataTransformer <CDataRecord> dt = new DataTransformer <CDataRecord>();

            dt.DoFeaturesScaling(records);

            KernelSVM <CDataRecord> algorithm = new KernelSVM <CDataRecord>();

            algorithm.C = 20;
            ((GaussianKernel)algorithm.Kernel).Sigma = 0.01;

            algorithm.Train(records);

            Console.WriteLine("SVM(Gaussian Kernel) Model Built!");

            for (int i = 0; i < records.Count; i++)
            {
                CDataRecord rec = records[i] as CDataRecord;
                Console.WriteLine("rec: ");
                for (int j = 0; j < rec.Dimension; ++j)
                {
                    Console.WriteLine("X[" + j + "] = " + "[" + rec[j] + "] ");
                }
                Console.WriteLine("Label: " + "[" + rec.Label + "] ");
                Console.WriteLine("Predicted Label: " + algorithm.Predict(records[i]));
            }
        }
示例#2
0
        public static void Run_Rank()
        {
            List <CDataRecord> records = LoadSample();

            DataTransformer <CDataRecord> dt = new DataTransformer <CDataRecord>();

            dt.DoFeaturesScaling(records);

            KernelSVM <CDataRecord> algorithm = new KernelSVM <CDataRecord>();

            algorithm.C = 20;                                //large value, high bias
            ((GaussianKernel)algorithm.Kernel).Sigma = 0.01; //low value, high bias

            algorithm.Train(records);

            Console.WriteLine("SVM (Gaussian Kernel) Model Built!");

            for (int i = 0; i < records.Count; i++)
            {
                CDataRecord rec = records[i] as CDataRecord;
                Console.WriteLine("rec: ");
                for (int j = 0; j < rec.Dimension; ++j)
                {
                    Console.WriteLine("X[" + j + " = " + rec[j] + "]");
                }
                Console.WriteLine("Label: " + rec.Label);

                List <KeyValuePair <string, double> > ranks = algorithm.Rank(records, rec);
                for (int k = 0; k < ranks.Count; ++k)
                {
                    Console.WriteLine("{0}: score = {1}", ranks[k].Key, ranks[k].Value);
                }
            }
        }
示例#3
0
        private void ImportData(string X_filepath, string y_filepath)
        {
            mTrainingPercent        = 100;
            mTestingPercent         = 0;
            mCrossValidationPercent = 0;

            lvTabularData.Items.Clear();
            lvTabularData.Columns.Clear();

            List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet(X_filepath);
            List <List <int> > y_points = IntDataTableUtil.LoadDataSet(y_filepath);

            List <CDataRecord> X = new List <CDataRecord>();

            for (int i = 0; i < X_points.Count; ++i)
            {
                CDataRecord X_i = new CDataRecord(X_points[i].Dimension);
                for (int j = 0; j < X_points[i].Dimension; ++j)
                {
                    X_i[j + 1] = X_points[i][j];
                }
                X_i.Label = y_points[i][0].ToString();
                X.Add(X_i);
            }

            lvTabularData.Columns.Add("#");
            for (int i = 0; i < X[0].Dimension; ++i)
            {
                lvTabularData.Columns.Add(string.Format("X[{0}]", i));
            }
            lvTabularData.Columns.Add("Y");
            lvTabularData.Columns.Add("Data Set");
            lvTabularData.Columns.Add("Predicted Y");

            int m = X.Count;

            for (int i = 0; i < m; ++i)
            {
                CDataRecord X_i = X[i];

                ListViewItem item = new ListViewItem();
                item.Text = (lvTabularData.Items.Count + 1).ToString();
                for (int j = 0; j < X_i.Dimension; ++j)
                {
                    item.SubItems.Add(X_i[j].ToString());
                }

                item.SubItems.Add(X_i.Label.ToString());

                item.SubItems.Add(X_i.DataSetType.ToString());
                item.SubItems.Add(X_i.PredictedLabel.ToString());
                item.Tag = X_i;

                item.ForeColor = Color.Green;
                lvTabularData.Items.Add(item);
            }
        }
示例#4
0
        private void btnSplitData_Click(object sender, EventArgs e)
        {
            FrmDataSplitOption dlg = new FrmDataSplitOption();

            dlg.TrainingPercent        = mTrainingPercent;
            dlg.TestingPercent         = mTestingPercent;
            dlg.CrossValidationPercent = mCrossValidationPercent;

            if (dlg.ShowDialog() == DialogResult.OK)
            {
                double training_percent        = dlg.TrainingPercent;
                double testing_percent         = dlg.TestingPercent;
                double crossvalidation_percent = dlg.CrossValidationPercent;
                double sum = training_percent + testing_percent + crossvalidation_percent;
                if (sum == 0)
                {
                    mTrainingPercent        = 100;
                    mTestingPercent         = 0;
                    mCrossValidationPercent = 0;
                }
                else
                {
                    mTrainingPercent        = training_percent * 100 / sum;
                    mTestingPercent         = testing_percent * 100 / sum;
                    mCrossValidationPercent = crossvalidation_percent * 100 / sum;
                }

                int item_count    = lvTabularData.Items.Count;
                int training_pont = (int)(item_count * mTrainingPercent / 100);
                int testing_point = (int)(item_count * mTestingPercent / 100) + training_pont;
                for (int item_index = 0; item_index < item_count; ++item_index)
                {
                    ListViewItem item = lvTabularData.Items[item_index];
                    CDataRecord  rec  = item.Tag as CDataRecord;
                    if (item_index < training_pont)
                    {
                        rec.DataSetType = DataSetTypes.Training;
                    }
                    else if (item_index < testing_point)
                    {
                        rec.DataSetType = DataSetTypes.Testing;
                    }
                    else
                    {
                        rec.DataSetType = DataSetTypes.CrossValidation;
                    }
                }

                UpdateTabularDataView();
            }
        }
示例#5
0
        public void Shuffle(List <CDataRecord> list)
        {
            Random rng = new Random();
            int    n   = list.Count;

            while (n > 1)
            {
                n--;
                int         k     = rng.Next(n + 1);
                CDataRecord value = list[k];
                list[k] = list[n];
                list[n] = value;
            }
        }
示例#6
0
        private List <CDataRecord> LoadDataSet(DataSetTypes dstype)
        {
            List <CDataRecord> data_set = new List <CDataRecord>();

            foreach (ListViewItem item in lvTabularData.Items)
            {
                CDataRecord rec = item.Tag as CDataRecord;
                if (rec.DataSetType == dstype)
                {
                    data_set.Add(rec);
                }
            }
            return(data_set);
        }
示例#7
0
        private void btnShuffleData_Click(object sender, EventArgs e)
        {
            if (lvTabularData.Items.Count == 0)
            {
                return;
            }

            List <CDataRecord> data_set = new List <CDataRecord>();

            foreach (ListViewItem item in lvTabularData.Items)
            {
                data_set.Add(item.Tag as CDataRecord);
            }

            Shuffle(data_set);

            int item_count = lvTabularData.Items.Count;

            for (int k = 0; k < item_count; ++k)
            {
                ListViewItem item = lvTabularData.Items[k];
                CDataRecord  rec  = data_set[k];

                item.Text = (k + 1).ToString();
                int col_index = 1;
                for (col_index = 1; col_index < rec.Dimension; ++col_index)
                {
                    item.SubItems[col_index].Text = (rec[col_index].ToString());
                }

                item.SubItems[col_index++].Text = rec.Label.ToString();
                item.SubItems[col_index++].Text = rec.DataSetType.ToString();
                item.SubItems[col_index++].Text = rec.PredictedLabel.ToString();
                if (rec.DataSetType == DataSetTypes.Training)
                {
                    item.ForeColor = Color.Green;
                }
                else if (rec.DataSetType == DataSetTypes.Testing)
                {
                    item.ForeColor = Color.Red;
                }
                else
                {
                    item.ForeColor = Color.Blue;
                }
                item.Tag = rec;
            }
        }
示例#8
0
        public static List <CDataRecord> LoadSample()
        {
            XmlDocument doc = new XmlDocument();

            doc.Load("database.xml");

            List <CDataRecord> records = new List <CDataRecord>();

            XmlElement xml_root = doc.DocumentElement;

            foreach (XmlElement xml_level1 in xml_root.ChildNodes)
            {
                if (xml_level1.Name == "record")
                {
                    string outlook_text = xml_level1.Attributes["outlook"].Value;

                    double outlook = DistributionModel.GetUniform() * 0.333;
                    if (outlook_text == "Overcast")
                    {
                        outlook = 0.333 + DistributionModel.GetUniform() * 0.333;
                    }
                    else if (outlook_text == "Rain")
                    {
                        outlook = 0.666 + DistributionModel.GetUniform() * 0.333;
                    }

                    double temperature = double.Parse(xml_level1.Attributes["temperature"].Value);
                    double humidity    = double.Parse(xml_level1.Attributes["humidity"].Value);
                    string windy_text  = xml_level1.Attributes["windy"].Value;
                    double windy       = windy_text == "true" ? 1 : 0;

                    String      class_label = xml_level1.Attributes["class"].Value;
                    CDataRecord rec         = new CDataRecord(4);

                    //index must start at 1
                    rec[1] = outlook;
                    rec[2] = windy;
                    rec[3] = temperature;
                    rec[4] = humidity;

                    rec.Label = class_label;

                    records.Add(rec);
                }
            }
            return(records);
        }
示例#9
0
        public override double ComputeCost(List <T> data_set)
        {
            int sample_count = data_set.Count;

            if (sample_count == 0)
            {
                return(-1);
            }

            int dimension = data_set[0].Dimension;

            double[,] X = new double[sample_count, dimension];
            int[] Y = new int[sample_count];
            for (int i = 0; i < sample_count; ++i)
            {
                T rec = data_set[i];
                for (int d = 0; d < dimension; ++d)
                {
                    X[i, d] = rec[d];
                }
            }

            double total_error = 0;

            foreach (string class_label in mClassFieldLabels)
            {
                for (int i = 0; i < sample_count; ++i)
                {
                    CDataRecord rec = data_set[i] as CDataRecord;
                    Y[i] = rec.Label == class_label ? 1 : 0;
                }

                LinearSVMCostFunction f = new LinearSVMCostFunction(X, Y, dimension, sample_count);
                f.C = mC;

                double error = f.Evaluate(mTheta[class_label]);
                total_error += error;
            }
            return(total_error);
        }
示例#10
0
        private void UpdateTabularDataView()
        {
            int item_count    = lvTabularData.Items.Count;
            int training_pont = (int)(item_count * mTrainingPercent / 100);
            int testing_point = (int)(item_count * mTestingPercent / 100) + training_pont;

            for (int item_index = 0; item_index < item_count; ++item_index)
            {
                ListViewItem item = lvTabularData.Items[item_index];
                CDataRecord  rec  = item.Tag as CDataRecord;

                item.Text = (item_index + 1).ToString();
                int col_index = 1;
                for (col_index = 0; col_index < rec.Dimension; ++col_index)
                {
                    item.SubItems[col_index + 1].Text = (rec[col_index].ToString());
                }
                col_index++;

                item.SubItems[col_index++].Text = rec.Label.ToString();
                item.SubItems[col_index++].Text = rec.DataSetType.ToString();
                item.SubItems[col_index++].Text = rec.PredictedLabel.ToString();

                if (rec.DataSetType == DataSetTypes.Training)
                {
                    item.ForeColor = Color.Green;
                }
                else if (rec.DataSetType == DataSetTypes.Testing)
                {
                    item.ForeColor = Color.Red;
                }
                else
                {
                    item.ForeColor = Color.Blue;
                }
            }
        }
示例#11
0
        public static void Run_Classify2()
        {
            List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet("X1.txt");
            List <List <int> > y_points = IntDataTableUtil.LoadDataSet("y1.txt");

            List <CDataRecord> X = new List <CDataRecord>();

            for (int i = 0; i < X_points.Count; ++i)
            {
                CDataRecord X_i = new CDataRecord(X_points[i].Dimension);
                for (int j = 0; j < X_points[i].Dimension; ++j)
                {
                    X_i[i + 1] = X_points[i][j]; //X_i index must start at 1
                }
                X_i.Label = y_points[i][0].ToString();
                X.Add(X_i);
            }

            LinearSVM <CDataRecord> algorithm = new LinearSVM <CDataRecord>();

            algorithm.C = 100;

            algorithm.Train(X);
        }