Example #1
0
        // check the accuracy so far
        private double GetAccuracy(int instance, VMatrix features, VMatrix labels)
        {
            var eCount = 0;

            for (var row = 0; row < features.Rows(); row++)
            {
                double net = 0;

                for (var col = 0; col < features.Cols(); col++)
                {
                    net += m_weights[instance][col] * features.Row(row)[col];
                }

                // add the bias
                net += m_weights[instance][m_weights[instance].Length - 1];

                var z = (net > 0 ? 1.0 : 0);
                var t = labels.Row(row)[0];
                if (m_count > 2)
                {
                    t = (t == instance) ? 1.0 : 0;
                }

                if (t != z)
                {
                    eCount++;
                }
            }

            return(1.0 - (1.0 * eCount / features.Rows()));
        }
Example #2
0
        private double TrainK()
        {
            if (_outputFile != null)
            {
                _outputFile.WriteLine("Assigning each row to the cluster of the nearest centroid...");
                _outputFile.WriteLine("The cluster assignments are:");
            }

            // add the training set elements to the clusters
            for (var row = 0; row < _features.Rows(); row++)
            {
                var cluster = GetNearestCluster(_features.Row(row));
                cluster.AddInstance(row);

                if (_outputFile != null)
                {
                    if (row % 10 == 0)
                    {
                        _outputFile.WriteLine();
                        _outputFile.Write("\t");
                    }
                    else
                    {
                        _outputFile.Write(", ");
                    }
                    _outputFile.Write(string.Format("{0}={1}", row, cluster.Number));
                }
            }

            if (_outputFile != null)
            {
                _outputFile.WriteLine();
            }

            double sse = 0;

            foreach (var cluster in _clusters)
            {
                sse += cluster.GetSSE();
            }

            return(sse);
        }
Example #3
0
        public override void VTrain(VMatrix features, VMatrix labels)
        {
            _features = new VMatrix(features, 0, 0, features.Rows(), features.Cols());
            if (labels.Data != null)
            {
                _labels = new VMatrix(labels, 0, 0, labels.Rows(), labels.Cols());
            }
            _clusters = new List <Cluster>();

            Console.Write("Algorithm: ");

            if (_algorithm == "k")
            {
                Console.WriteLine("k-means (k = " + _k + ")");

//				Features.Shuffle(Rand, Labels);

                // create the initial clusters
                for (var k = 0; k < _k; k++)
                {
                    var cluster = new Cluster(k, _features, k, _ignore);
                    _clusters.Add(cluster);
                    if (_outputFile != null)
                    {
                        cluster.PrintCentroid(_outputFile);
                    }
                }

                double lastSsd = double.MinValue;

                for (;;)
                {
                    var ssd = TrainK();
                    if (_outputFile != null)
                    {
                        _outputFile.WriteLine(string.Format("Sum squared-distance of each row with its centroid={0}", ssd));
                    }

                    if (ssd != lastSsd)
                    {
                        lastSsd = ssd;
                        if (_outputFile != null)
                        {
                            _outputFile.WriteLine("Recomputing the centroids of each cluster...");
                        }
                        foreach (var cluster in _clusters)
                        {
                            cluster.Recalculate();
                            cluster.ClearInstances();
                            if (_outputFile != null)
                            {
                                cluster.PrintCentroid(_outputFile);
                            }
                        }
                    }
                    else
                    {
                        break;
                    }
                }
            }
            else if (_algorithm == "single")
            {
                if (_outputFile != null)
                {
                    _outputFile.WriteLine("HAC single (k = " + _k + ")");
                }

                // create the initial clusters
                for (var row = 0; row < _features.Rows(); row++)
                {
                    var cluster = new Cluster(0, _features, row, _ignore);
                    cluster.AddInstance(row);
                    _clusters.Add(cluster);
                }

                // create the distance matrix
                _distances = new double[_features.Rows(), _features.Rows()];

                for (var row = 0; row < _features.Rows(); row++)
                {
                    for (var row2 = row; row2 < _features.Rows(); row2++)
                    {
                        double distance = 0;
                        if (row2 > row)
                        {
                            distance = _clusters[row].GetDistance(_features.Row(row2));
                        }
                        _distances[row, row2] = distance;
                        if (row != row2)
                        {
                            _distances[row2, row] = distance;
                        }
                    }
                }

                int iteration = 0;

                do
                {
                    TrainSingle(iteration++);
                } while (_clusters.Count > _k);
            }
            else if (_algorithm == "complete")
            {
                if (_outputFile != null)
                {
                    _outputFile.WriteLine("HAC complete (k = " + _k + ")");
                }

                // create the initial clusters
                for (var row = 0; row < _features.Rows(); row++)
                {
                    var cluster = new Cluster(0, _features, row, _ignore);
                    cluster.AddInstance(row);
                    _clusters.Add(cluster);
                }

                // create the distance matrix
                _distances = new double[_features.Rows(), _features.Rows()];

                for (var row = 0; row < _features.Rows(); row++)
                {
                    for (var row2 = row; row2 < _features.Rows(); row2++)
                    {
                        double distance = 0;
                        if (row2 > row)
                        {
                            distance = _clusters[row].GetDistance(_features.Row(row2));
                        }
                        _distances[row, row2] = distance;
                        if (row != row2)
                        {
                            _distances[row2, row] = distance;
                        }
                    }
                }

                int iteration = 0;

                do
                {
                    TrainComplete(iteration++);
                } while (_clusters.Count > _k);
            }
            else if (_algorithm == "average")
            {
                if (_outputFile != null)
                {
                    _outputFile.WriteLine("HAC average (k = " + _k + ")");
                }

                // create the initial clusters
                for (var row = 0; row < _features.Rows(); row++)
                {
                    var cluster = new Cluster(0, _features, row, _ignore);
                    cluster.AddInstance(row);
                    _clusters.Add(cluster);
                }

                // create the distance matrix
                _distances = new double[_features.Rows(), _features.Rows()];

                for (var row = 0; row < _features.Rows(); row++)
                {
                    for (var row2 = row; row2 < _features.Rows(); row2++)
                    {
                        double distance = 0;
                        if (row2 > row)
                        {
                            distance = _clusters[row].GetDistance(_features.Row(row2));
                        }
                        _distances[row, row2] = distance;
                        if (row != row2)
                        {
                            _distances[row2, row] = distance;
                        }
                    }
                }

                int iteration = 0;

                do
                {
                    TrainAverage(iteration++);
                } while (_clusters.Count > _k);
            }
            else
            {
                throw new Exception("Inavlid Algorithm - " + _algorithm);
            }

            if (_outputFile != null)
            {
                _outputFile.WriteLine();
                _outputFile.WriteLine("Cluster centroids:");

                _outputFile.Write("Cluster#\t\t\t");
                for (var c = 0; c < _clusters.Count; c++)
                {
                    _outputFile.Write("\t\t" + c);
                }
                _outputFile.WriteLine();

                _outputFile.Write("# of instances:\t\t\t");
                for (var c = 0; c < _clusters.Count; c++)
                {
                    _outputFile.Write("\t\t" + _clusters[c].Instances.Count);
                }
                _outputFile.WriteLine();

                _outputFile.WriteLine("==========================================================================================================");
                for (var col = 0; col < _features.Cols(); col++)
                {
                    if (!_ignore.Contains(col))
                    {
                        _outputFile.Write(_features.AttrName(col));
                        foreach (var cluster in _clusters)
                        {
                            if (cluster.Centroid[col] == Matrix.MISSING)
                            {
                                _outputFile.Write("\t?");
                            }
                            else if (_features.ValueCount(col) < 2)
                            {
                                // continuous
                                _outputFile.Write(string.Format("\t{0:0.#####}", cluster.Centroid[col]));
                            }
                            else
                            {
                                _outputFile.Write("\t" + _features.AttrValue(col, (int)cluster.Centroid[col]));
                            }
                        }
                        _outputFile.WriteLine();
                    }
                }

                double sse = 0;
                _outputFile.Write("Sum squared error:\t");
                foreach (var cluster in _clusters)
                {
                    var error = cluster.GetSSE();
                    sse += error;
                    _outputFile.Write(string.Format("\t{0:0.#####}", error));
                }
                _outputFile.WriteLine();

                _outputFile.WriteLine("Number of clusters: " + _clusters.Count);
                _outputFile.WriteLine(string.Format("Total sum squared error: {0:0.#####}", sse));
                _outputFile.WriteLine(string.Format("DBI: {0}", GetDBI()));
            }

            if (_outputFile != null)
            {
                _outputFile.Close();
            }
        }
Example #4
0
 // Move the inputs down one slot
 private void SetInputs(VMatrix features, int row)
 {
     SetInputs(features.Row(row));
 }
        public double VMeasureAccuracy(VMatrix features, VMatrix labels, Matrix confusion)
        {
            if (features.Rows() != labels.Rows())
            {
                throw (new Exception("Expected the features and labels to have the same number of rows"));
            }
            if (labels.Cols() != 1)
            {
                throw (new Exception("Sorry, this method currently only supports one-dimensional labels"));
            }
            if (features.Rows() == 0)
            {
                throw (new Exception("Expected at least one row"));
            }

            var cl = 0;

            if (Parameters.Verbose)
            {
                Console.Write("VMeasureAccuracy ");
                cl = Console.CursorLeft;
            }

            var count  = features.Rows();
            var begRow = 0;

            if (this is BPTT)
            {
                var learner = this as BPTT;
                begRow = learner.m_k - 1;
                count -= begRow;
            }

            var labelValues = labels.ValueCount(0);

            if (labelValues == 0)             // If the label is continuous...
            {
                // The label is continuous, so measure root mean squared error
                var pred = new double[1];
                var sse  = 0.0;
                for (var i = 0; i < features.Rows(); i++)
                {
                    if (Parameters.Verbose)
                    {
                        Console.SetCursorPosition(cl, Console.CursorTop);
                        Console.Write(i);
                    }

                    var feat = features.Row(i);
                    var targ = labels.Row(i);
                    pred[0] = 0.0;                     // make sure the prediction is not biased by a previous prediction
                    Predict(feat, pred);
                    if (i >= begRow)
                    {
                        var delta = targ[0] - pred[0];
                        sse += (delta * delta);
                    }
                }

                if (Parameters.Verbose)
                {
                    Console.WriteLine();
                }

                return(Math.Sqrt(sse / count));
            }
            else
            {
                // The label is nominal, so measure predictive accuracy
                if (confusion != null)
                {
                    confusion.SetSize(labelValues, labelValues);
                    for (var i = 0; i < labelValues; i++)
                    {
                        confusion.SetAttrName(i, labels.AttrValue(0, i));
                    }
                }
                var correctCount = 0;
                var prediction   = new double[1];
                for (var i = 0; i < features.Rows(); i++)
                {
                    if (Parameters.Verbose)
                    {
                        Console.SetCursorPosition(cl, Console.CursorTop);
                        Console.Write(i);
                    }

                    var feat = features.Row(i);
                    var lab  = labels.Get(i, 0);
                    if (lab != Matrix.MISSING)
                    {
                        var targ = (int)lab;
                        if (targ >= labelValues)
                        {
                            throw new Exception("The label is out of range");
                        }
                        Predict(feat, prediction);
                        if (i >= begRow)
                        {
                            var pred = (int)prediction[0];
                            if (confusion != null)
                            {
                                confusion.Set(targ, pred, confusion.Get(targ, pred) + 1);
                            }
                            if (pred == targ)
                            {
                                correctCount++;
                            }
                        }
                    }
                    else
                    {
                        count--;
                    }
                }

                if (Parameters.Verbose)
                {
                    Console.WriteLine();
                }

                return((double)correctCount / count);
            }
        }
Example #6
0
        private double TrainEpoch(int instance, int epoch, VMatrix features, VMatrix labels)
        {
            if (m_outputFile == null)
            {
                Console.WriteLine(epoch);
            }

            var eCount = 0;

            for (var row = 0; row < features.Rows(); row++)
            {
                double net = 0;

                // calculate the net value
                for (var col = 0; col < features.Cols(); col++)
                {
                    net += m_weights[instance][col] * features.Row(row)[col];
                }

                // add the bias
                net += m_weights[instance][m_weights[instance].Length - 1];

                var z = (net > 0 ? 1.0 : 0);
                var t = labels.Row(row)[0];
                if (m_count > 2)
                {
                    t = (t == instance) ? 1.0 : 0;
                }

                // check to see if the predicted matches the actual
                if (z != t)
                {
                    eCount++;
                    double delta;

                    // adjust the weights
                    for (var i = 0; i < m_weights[instance].Length - 1; i++)
                    {
                        delta = (t - z) * m_rate * features.Row(row)[i];
                        //Console.Write(string.Format("{0}\t", delta));
                        m_weights[instance][i] += delta;
                    }
                    // adjust the bias weight
                    delta = (t - z) * m_rate;
                    //Console.WriteLine(delta);
                    m_weights[instance][m_weights[instance].Length - 1] += delta;
                }
            }

            // print the new weights
            if (m_outputFile == null)
            {
                for (var i = 0; i < m_weights[instance].Length - 1; i++)
                {
                    Console.Write(string.Format("{0}\t", m_weights[instance][i]));
                }
                Console.WriteLine(m_weights[instance][m_weights[instance].Length - 1]);
            }

            var error = 1.0 * eCount / features.Rows();

            if (m_outputFile == null)
            {
                Console.WriteLine(error);
                Console.WriteLine();
            }
            else
            {
                m_outputFile.WriteLine(string.Format("{0}\t{1}", epoch, error));
            }

            return(error);
        }