public void ReadSampleTest() { // http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#iris MemoryStream file = new MemoryStream( Encoding.Default.GetBytes(Accord.Tests.IO.Properties.Resources.iris_scale)); // Suppose we are going to read a sparse sample file containing // samples which have an actual dimension of 4. Since the samples // are in a sparse format, each entry in the file will probably // have a much lesser number of elements. int sampleSize = 4; // Create a new Sparse Sample Reader to read any given file, // passing the correct dense sample size in the constructor SparseReader reader = new SparseReader(file, Encoding.Default, sampleSize); // Declare some variables to receive each current sample int label = 0; string description; double[] sample; // Read a sample from the file var r = reader.ReadDense(); sample = r.Item1; label = (int)r.Item2; description = reader.SampleDescriptions[0]; Assert.AreEqual(1, label); Assert.AreEqual(String.Empty, description); Assert.AreEqual(4, sample.Length); Assert.AreEqual(-0.555556, sample[0], 0.0001); Assert.AreEqual(+0.250000, sample[1], 0.0001); Assert.AreEqual(-0.864407, sample[2], 0.0001); Assert.AreEqual(-0.916667, sample[3], 0.0001); var s = reader.ReadSparse(); sample = s.Item1.ToSparse(); label = (int)s.Item2; description = reader.SampleDescriptions[0]; Assert.AreEqual(1, label); Assert.AreEqual(String.Empty, description); Assert.AreEqual(8, sample.Length); Assert.AreEqual(0, sample[0], 0.0001); Assert.AreEqual(-0.666667, sample[1], 0.0001); Assert.AreEqual(1, sample[2], 0.0001); Assert.AreEqual(-0.166667, sample[3], 0.0001); Assert.AreEqual(2, sample[4], 0.0001); Assert.AreEqual(-0.864407, sample[5], 0.0001); Assert.AreEqual(3, sample[6], 0.0001); Assert.AreEqual(-0.916667, sample[7], 0.0001); int count = 2; // Read all samples from the file while (!reader.EndOfStream) { reader.SampleDescriptions.Clear(); r = reader.ReadDense(); sample = r.Item1; label = (int)r.Item2; description = reader.SampleDescriptions[0]; Assert.IsTrue(label >= 0 && label <= 3); Assert.IsTrue(description == String.Empty); Assert.AreEqual(4, sample.Length); count++; } Assert.AreEqual(150, count); }
public void GuessNumberOfDimensionsTest() { MemoryStream file = new MemoryStream( Encoding.Default.GetBytes(Resources.iris_scale)); SparseReader reader = new SparseReader(file, Encoding.Default); Assert.AreEqual(4, reader.Dimensions); var r = reader.ReadDenseToEnd(); double[][] samples = r.Item1; int[] labels = r.Item2.ToInt32(); string[] descriptions = reader.SampleDescriptions.ToArray(); Assert.AreEqual(150, samples.Length); for (int i = 0; i < 150; i++) { Assert.IsTrue(labels[i] >= 0 && labels[i] <= 3); Assert.IsTrue(descriptions[i] == String.Empty); Assert.AreEqual(4, samples[i].Length); } }
public void GuessDimensionsInMiddleRunTest() { MemoryStream file = new MemoryStream( Encoding.Default.GetBytes(Resources.a9a_train)); SparseReader reader = new SparseReader(file, Encoding.Default); var r = reader.ReadDenseToEnd(); double[][] samples = r.Item1; int[] labels = r.Item2.ToInt32(); string[] descriptions = reader.SampleDescriptions.ToArray(); Assert.AreEqual(26049, samples.Length); for (int i = 0; i < labels.Length; i++) { Assert.IsTrue(labels[i] == -1 || labels[i] == 1); Assert.IsTrue(descriptions[i] == String.Empty); Assert.AreEqual(123, samples[i].Length); } }
public void DimensionsTest() { MemoryStream file = new MemoryStream( Encoding.Default.GetBytes(Resources.a9a_train)); SparseReader reader = new SparseReader(file, Encoding.Default); Assert.AreEqual(123, reader.Dimensions); int[] labels = null; string[] descriptions = null; double[][] samples = reader.ReadToEnd(out labels, out descriptions); Assert.AreEqual(26049, samples.Length); for (int i = 0; i < labels.Length; i++) { Assert.IsTrue(labels[i] == -1 || labels[i] == 1); Assert.IsTrue(descriptions[i] == String.Empty); Assert.AreEqual(123, samples[i].Length); } }
public void ReadAllTest() { MemoryStream file = new MemoryStream( Encoding.Default.GetBytes(Resources.iris_scale)); // Suppose we are going to read a sparse sample file containing // samples which have an actual dimension of 4. Since the samples // are in a sparse format, each entry in the file will probably // have a much lesser number of elements. int sampleSize = 4; // Create a new Sparse Sample Reader to read any given file, // passing the correct dense sample size in the constructor SparseReader reader = new SparseReader(file, Encoding.Default, sampleSize); // Declare a vector to obtain the label // of each of the samples in the file // Declare a vector to obtain the description (or comments) // about each of the samples in the file, if present. // Read the sparse samples and store them in a dense vector array var r = reader.ReadDenseToEnd(); double[][] samples = r.Item1; int[] labels = r.Item2.ToInt32(); string[] descriptions = reader.SampleDescriptions.ToArray(); Assert.AreEqual(150, samples.Length); for (int i = 0; i < 150; i++) { Assert.IsTrue(labels[i] >= 0 && labels[i] <= 3); Assert.IsTrue(descriptions[i] == String.Empty); Assert.AreEqual(4, samples[i].Length); } }
/// <summary> /// Reads a problem specified in LibSVM's sparse format. /// </summary> /// public static Problem read_problem(string filename, double bias) { // Create a LibSVM's sparse data reader var reader = new SparseReader(filename); if (bias > 0) reader.Intercept = bias; double[] y; // read inputs and labels double[][] x = reader.ReadToEnd(out y); return new Problem() { Dimensions = reader.Dimensions, Inputs = x, Outputs = y, }; }
public void SparseLinearTest() { MulticlassSupportVectorMachine<Linear> svm1; MulticlassSupportVectorMachine<Linear, Sparse<double>> svm2; { Accord.Math.Random.Generator.Seed = 0; MemoryStream file = new MemoryStream( Encoding.Default.GetBytes(Resources.iris_scale)); var reader = new SparseReader(file, Encoding.Default); var samples = reader.ReadDenseToEnd(); double[][] x = samples.Item1; int[] y = samples.Item2.ToMulticlass(); var learner = new MulticlassSupportVectorLearning<Linear>() { Learner = (p) => new LinearDualCoordinateDescent<Linear>() }; svm1 = learner.Learn(x, y); } { Accord.Math.Random.Generator.Seed = 0; MemoryStream file = new MemoryStream( Encoding.Default.GetBytes(Resources.iris_scale)); // Create a new Sparse Sample Reader to read any given file, // passing the correct dense sample size in the constructor var reader = new SparseReader(file, Encoding.Default); var samples = reader.ReadSparseToEnd(); Sparse<double>[] x = samples.Item1; int[] y = samples.Item2.ToMulticlass(); var learner = new MulticlassSupportVectorLearning<Linear, Sparse<double>>() { Learner = (p) => new LinearDualCoordinateDescent<Linear, Sparse<double>>() }; svm2 = learner.Learn(x, y); } Assert.AreEqual(svm1.Models.Length, svm2.Models.Length); for (int i = 0; i < svm1.Models.Length; i++) { var ma = svm1[i].Value; var mb = svm2[i].Value; Assert.IsTrue(ma.Weights.IsEqual(mb.Weights)); Assert.AreEqual(ma.SupportVectors.Length, mb.SupportVectors.Length); for (int j = 0; j < ma.SupportVectors.Length; j++) { double[] expected = ma.SupportVectors[j]; double[] actual = mb.SupportVectors[j].ToDense(4); Assert.IsTrue(expected.IsEqual(actual, 1e-5)); } } }
public static DataTable Load(Stream stream, TableFormat format) { switch (format) { case TableFormat.SerializedXml: { XmlSerializer serializer = new XmlSerializer(typeof(DataTable)); return (DataTable)serializer.Deserialize(stream); } case TableFormat.SerializedBin: { BinaryFormatter serializer = new BinaryFormatter(); return (DataTable)serializer.Deserialize(stream); } case TableFormat.OctaveMatFile: { MatReader reader = new MatReader(stream); return reader.Fields.First().Value.GetValue<double[,]>().ToTable(); } case TableFormat.OpenDocument: { ExcelReader reader = new ExcelReader(stream, true); string ws = reader.GetWorksheetList().First(); return reader.GetWorksheet(ws); } case TableFormat.OlderExcel: { ExcelReader reader = new ExcelReader(stream, false); string ws = reader.GetWorksheetList().First(); return reader.GetWorksheet(ws); } case TableFormat.Csv: { CsvReader reader = new CsvReader(stream, true); return reader.ToTable(); } case TableFormat.Tsv: { CsvReader reader = new CsvReader(stream, true, '\t'); return reader.ToTable(); } case TableFormat.LibSVM: { SparseReader reader = new SparseReader(stream); return reader.ToTable(); } case TableFormat.Idx: { IdxReader reader = new IdxReader(stream); return reader.ReadToEndAsVectors().ToTable(); } case TableFormat.CSharp: throw new NotSupportedException(); } }
/// <summary> /// Reads a problem specified in LibSVM's sparse format. /// </summary> /// public static Problem read_problem(string filename, double bias) { // Create a LibSVM's sparse data reader var reader = new SparseReader(filename); if (bias > 0) reader.Intercept = bias; var r = reader.ReadDenseToEnd(); double[][] x = r.Item1; double[] y = r.Item2; return new Problem() { Dimensions = reader.Dimensions, Inputs = x, Outputs = y, }; }
private static void cancer() { // Create a new LibSVM sparse format data reader // to read the Wisconsin's Breast Cancer dataset // var reader = new SparseReader("examples-sparse.txt"); int[] outputs; // Read the classification problem into dense memory double[][] inputs = reader.ReadToEnd(sparse: false, labels: out outputs); // The dataset has output labels as 4 and 2. We have to convert them // into negative and positive labels so they can be properly processed. // outputs = outputs.Apply(x => x == 2 ? -1 : +1); // Create a new linear-SVM for the problem dimensions var svm = new SupportVectorMachine(inputs: reader.Dimensions); // Create a learning algorithm for the problem's dimensions var teacher = new LinearDualCoordinateDescent(svm, inputs, outputs) { Loss = Loss.L2, Complexity = 1000, Tolerance = 1e-5 }; // Learn the classification double error = teacher.Run(); // Compute the machine's answers for the learned inputs int[] answers = inputs.Apply(x => Math.Sign(svm.Compute(x))); // Create a confusion matrix to show the machine's performance var m = new ConfusionMatrix(predicted: answers, expected: outputs); // Show it onscreen DataGridBox.Show(new ConfusionMatrixView(m)); }