Reader for data files containing samples in libsvm's sparse format.
Inheritance: IDisposable
Exemplo n.º 1
0
        public void ReadSampleTest()
        {
            // http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#iris

            MemoryStream file = new MemoryStream(
                Encoding.Default.GetBytes(Accord.Tests.IO.Properties.Resources.iris_scale));

            // Suppose we are going to read a sparse sample file containing
            //  samples which have an actual dimension of 4. Since the samples
            //  are in a sparse format, each entry in the file will probably
            //  have a much lesser number of elements.
            int sampleSize = 4;

            // Create a new Sparse Sample Reader to read any given file,
            //  passing the correct dense sample size in the constructor
            SparseReader reader = new SparseReader(file, Encoding.Default, sampleSize);

            // Declare some variables to receive each current sample
            int label = 0;
            string description;
            double[] sample;

            // Read a sample from the file
            var r = reader.ReadDense();
            sample = r.Item1;
            label = (int)r.Item2;
            description = reader.SampleDescriptions[0];

            Assert.AreEqual(1, label);
            Assert.AreEqual(String.Empty, description);

            Assert.AreEqual(4, sample.Length);
            Assert.AreEqual(-0.555556, sample[0], 0.0001);
            Assert.AreEqual(+0.250000, sample[1], 0.0001);
            Assert.AreEqual(-0.864407, sample[2], 0.0001);
            Assert.AreEqual(-0.916667, sample[3], 0.0001);

            var s = reader.ReadSparse();
            sample = s.Item1.ToSparse();
            label = (int)s.Item2;
            description = reader.SampleDescriptions[0];

            Assert.AreEqual(1, label);
            Assert.AreEqual(String.Empty, description);

            Assert.AreEqual(8, sample.Length);
            Assert.AreEqual(0, sample[0], 0.0001);
            Assert.AreEqual(-0.666667, sample[1], 0.0001);
            Assert.AreEqual(1, sample[2], 0.0001);
            Assert.AreEqual(-0.166667, sample[3], 0.0001);
            Assert.AreEqual(2, sample[4], 0.0001);
            Assert.AreEqual(-0.864407, sample[5], 0.0001);
            Assert.AreEqual(3, sample[6], 0.0001);
            Assert.AreEqual(-0.916667, sample[7], 0.0001);


            int count = 2;

            // Read all samples from the file
            while (!reader.EndOfStream)
            {
                reader.SampleDescriptions.Clear();
                r = reader.ReadDense();
                sample = r.Item1;
                label = (int)r.Item2;
                description = reader.SampleDescriptions[0];
                Assert.IsTrue(label >= 0 && label <= 3);
                Assert.IsTrue(description == String.Empty);
                Assert.AreEqual(4, sample.Length);
                count++;
            }

            Assert.AreEqual(150, count);
        }
Exemplo n.º 2
0
        public void GuessNumberOfDimensionsTest()
        {
            MemoryStream file = new MemoryStream(
                Encoding.Default.GetBytes(Resources.iris_scale));

            SparseReader reader = new SparseReader(file, Encoding.Default);

            Assert.AreEqual(4, reader.Dimensions);


            var r = reader.ReadDenseToEnd();
            double[][] samples = r.Item1;
            int[] labels = r.Item2.ToInt32();
            string[] descriptions = reader.SampleDescriptions.ToArray();


            Assert.AreEqual(150, samples.Length);

            for (int i = 0; i < 150; i++)
            {
                Assert.IsTrue(labels[i] >= 0 && labels[i] <= 3);
                Assert.IsTrue(descriptions[i] == String.Empty);
                Assert.AreEqual(4, samples[i].Length);
            }
        }
Exemplo n.º 3
0
        public void GuessDimensionsInMiddleRunTest()
        {
            MemoryStream file = new MemoryStream(
                Encoding.Default.GetBytes(Resources.a9a_train));

            SparseReader reader = new SparseReader(file, Encoding.Default);

            var r = reader.ReadDenseToEnd();
            double[][] samples = r.Item1;
            int[] labels = r.Item2.ToInt32();
            string[] descriptions = reader.SampleDescriptions.ToArray();

            Assert.AreEqual(26049, samples.Length);
            for (int i = 0; i < labels.Length; i++)
            {
                Assert.IsTrue(labels[i] == -1 || labels[i] == 1);
                Assert.IsTrue(descriptions[i] == String.Empty);
                Assert.AreEqual(123, samples[i].Length);
            }
        }
Exemplo n.º 4
0
        public void DimensionsTest()
        {
            MemoryStream file = new MemoryStream(
                Encoding.Default.GetBytes(Resources.a9a_train));

            SparseReader reader = new SparseReader(file, Encoding.Default);

            Assert.AreEqual(123, reader.Dimensions);

            int[] labels = null;
            string[] descriptions = null;

            double[][] samples = reader.ReadToEnd(out labels, out descriptions);

            Assert.AreEqual(26049, samples.Length);
            for (int i = 0; i < labels.Length; i++)
            {
                Assert.IsTrue(labels[i] == -1 || labels[i] == 1);
                Assert.IsTrue(descriptions[i] == String.Empty);
                Assert.AreEqual(123, samples[i].Length);
            }
        }
Exemplo n.º 5
0
        public void ReadAllTest()
        {
            MemoryStream file = new MemoryStream(
                Encoding.Default.GetBytes(Resources.iris_scale));

            // Suppose we are going to read a sparse sample file containing
            //  samples which have an actual dimension of 4. Since the samples
            //  are in a sparse format, each entry in the file will probably
            //  have a much lesser number of elements.
            int sampleSize = 4;

            // Create a new Sparse Sample Reader to read any given file,
            //  passing the correct dense sample size in the constructor
            SparseReader reader = new SparseReader(file, Encoding.Default, sampleSize);

            // Declare a vector to obtain the label
            //  of each of the samples in the file

            // Declare a vector to obtain the description (or comments)
            //  about each of the samples in the file, if present.

            // Read the sparse samples and store them in a dense vector array
            var r = reader.ReadDenseToEnd();
            double[][] samples = r.Item1;
            int[] labels = r.Item2.ToInt32();
            string[] descriptions = reader.SampleDescriptions.ToArray();

            Assert.AreEqual(150, samples.Length);

            for (int i = 0; i < 150; i++)
            {
                Assert.IsTrue(labels[i] >= 0 && labels[i] <= 3);
                Assert.IsTrue(descriptions[i] == String.Empty);
                Assert.AreEqual(4, samples[i].Length);
            }
        }
Exemplo n.º 6
0
        /// <summary>
        ///   Reads a problem specified in LibSVM's sparse format.
        /// </summary>
        /// 
        public static Problem read_problem(string filename, double bias)
        {
            // Create a LibSVM's sparse data reader
            var reader = new SparseReader(filename);

            if (bias > 0)
                reader.Intercept = bias;

            double[] y; // read inputs and labels
            double[][] x = reader.ReadToEnd(out y);

            return new Problem()
            {
                Dimensions = reader.Dimensions,
                Inputs = x,
                Outputs = y,
            };
        }
        public void SparseLinearTest()
        {
            MulticlassSupportVectorMachine<Linear> svm1;
            MulticlassSupportVectorMachine<Linear, Sparse<double>> svm2;

            {
                Accord.Math.Random.Generator.Seed = 0;
                MemoryStream file = new MemoryStream(
                    Encoding.Default.GetBytes(Resources.iris_scale));
                var reader = new SparseReader(file, Encoding.Default);

                var samples = reader.ReadDenseToEnd();
                double[][] x = samples.Item1;
                int[] y = samples.Item2.ToMulticlass();

                var learner = new MulticlassSupportVectorLearning<Linear>()
                {
                    Learner = (p) => new LinearDualCoordinateDescent<Linear>()
                };

                svm1 = learner.Learn(x, y);
            }

            {
                Accord.Math.Random.Generator.Seed = 0;
                MemoryStream file = new MemoryStream(
                    Encoding.Default.GetBytes(Resources.iris_scale));

                // Create a new Sparse Sample Reader to read any given file,
                //  passing the correct dense sample size in the constructor
                var reader = new SparseReader(file, Encoding.Default);

                var samples = reader.ReadSparseToEnd();
                Sparse<double>[] x = samples.Item1;
                int[] y = samples.Item2.ToMulticlass();

                var learner = new MulticlassSupportVectorLearning<Linear, Sparse<double>>()
                {
                    Learner = (p) => new LinearDualCoordinateDescent<Linear, Sparse<double>>()
                };

                svm2 = learner.Learn(x, y);
            }

            Assert.AreEqual(svm1.Models.Length, svm2.Models.Length);
            for (int i = 0; i < svm1.Models.Length; i++)
            {
                var ma = svm1[i].Value;
                var mb = svm2[i].Value;

                Assert.IsTrue(ma.Weights.IsEqual(mb.Weights));
                Assert.AreEqual(ma.SupportVectors.Length, mb.SupportVectors.Length);
                for (int j = 0; j < ma.SupportVectors.Length; j++)
                {
                    double[] expected = ma.SupportVectors[j];
                    double[] actual = mb.SupportVectors[j].ToDense(4);
                    Assert.IsTrue(expected.IsEqual(actual, 1e-5));
                }
            }
        }
Exemplo n.º 8
0
        public static DataTable Load(Stream stream, TableFormat format)
        {
            switch (format)
            {
                case TableFormat.SerializedXml:
                    {
                        XmlSerializer serializer = new XmlSerializer(typeof(DataTable));
                        return (DataTable)serializer.Deserialize(stream);
                    }

                case TableFormat.SerializedBin:
                    {
                        BinaryFormatter serializer = new BinaryFormatter();
                        return (DataTable)serializer.Deserialize(stream);
                    }

                case TableFormat.OctaveMatFile:
                    {
                        MatReader reader = new MatReader(stream);
                        return reader.Fields.First().Value.GetValue<double[,]>().ToTable();
                    }

                case TableFormat.OpenDocument:
                    {
                        ExcelReader reader = new ExcelReader(stream, true);
                        string ws = reader.GetWorksheetList().First();
                        return reader.GetWorksheet(ws);
                    }

                case TableFormat.OlderExcel:
                    {
                        ExcelReader reader = new ExcelReader(stream, false);
                        string ws = reader.GetWorksheetList().First();
                        return reader.GetWorksheet(ws);
                    }

                case TableFormat.Csv:
                    {
                        CsvReader reader = new CsvReader(stream, true);
                        return reader.ToTable();
                    }

                case TableFormat.Tsv:
                    {
                        CsvReader reader = new CsvReader(stream, true, '\t');
                        return reader.ToTable();
                    }

                case TableFormat.LibSVM:
                    {
                        SparseReader reader = new SparseReader(stream);
                        return reader.ToTable();
                    }

                case TableFormat.Idx:
                    {
                        IdxReader reader = new IdxReader(stream);
                        return reader.ReadToEndAsVectors().ToTable();
                    }

                case TableFormat.CSharp:
                    throw new NotSupportedException();
            }
        }
Exemplo n.º 9
0
        /// <summary>
        ///   Reads a problem specified in LibSVM's sparse format.
        /// </summary>
        /// 
        public static Problem read_problem(string filename, double bias)
        {
            // Create a LibSVM's sparse data reader
            var reader = new SparseReader(filename);

            if (bias > 0)
                reader.Intercept = bias;

            var r = reader.ReadDenseToEnd();
            double[][] x = r.Item1;
            double[] y = r.Item2;

            return new Problem()
            {
                Dimensions = reader.Dimensions,
                Inputs = x,
                Outputs = y,
            };
        }
Exemplo n.º 10
0
        private static void cancer()
        {
            // Create a new LibSVM sparse format data reader
            // to read the Wisconsin's Breast Cancer dataset
            //
            var reader = new SparseReader("examples-sparse.txt");

            int[] outputs; // Read the classification problem into dense memory
            double[][] inputs = reader.ReadToEnd(sparse: false, labels: out outputs);

            // The dataset has output labels as 4 and 2. We have to convert them
            // into negative and positive labels so they can be properly processed.
            //
            outputs = outputs.Apply(x => x == 2 ? -1 : +1);

            // Create a new linear-SVM for the problem dimensions
            var svm = new SupportVectorMachine(inputs: reader.Dimensions);

            // Create a learning algorithm for the problem's dimensions
            var teacher = new LinearDualCoordinateDescent(svm, inputs, outputs)
            {
                Loss = Loss.L2,
                Complexity = 1000,
                Tolerance = 1e-5
            };

            // Learn the classification
            double error = teacher.Run();

            // Compute the machine's answers for the learned inputs
            int[] answers = inputs.Apply(x => Math.Sign(svm.Compute(x)));

            // Create a confusion matrix to show the machine's performance
            var m = new ConfusionMatrix(predicted: answers, expected: outputs);

            // Show it onscreen
            DataGridBox.Show(new ConfusionMatrixView(m));
        }