Example #1
0
 /// <summary>
 /// Creates a new data table with the vectorised weighted classification outputs linked with each row's classification
 /// </summary>
 /// <param name="output">The output stream to write the table to (optional)</param>
 public IDataTable GetTable(Stream output = null)
 {
     if (_trainingData.Any())
     {
         var fieldCount = _trainingData.First().Item1.Length;
         var builder    = new DataTableBuilder();
         for (var i = 0; i < fieldCount; i++)
         {
             builder.AddColumn(ColumnType.Float, "v" + i);
         }
         builder.AddColumn(ColumnType.String, "target", true);
         foreach (var item in _trainingData)
         {
             var data = new object[fieldCount + 1];
             for (var i = 0; i < fieldCount; i++)
             {
                 data[i] = item.Item1[i];
             }
             data[fieldCount] = item.Item2;
             builder.AddRow(data);
         }
         return(builder.Build(output));
     }
     return(null);
 }
Example #2
0
        public void DataTableProvider()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Float, "val1");
            builder.AddColumn(ColumnType.Double, "val2");
            builder.AddColumn(ColumnType.String, "val3");
            builder.AddColumn(ColumnType.String, "cls", true);

            builder.Add(0.5f, 1.1, "d", "a");
            builder.Add(0.2f, 1.5, "c", "b");
            builder.Add(0.7f, 0.5, "b", "c");
            builder.Add(0.2f, 0.6, "a", "d");

            var table          = builder.Build();
            var vectoriser     = table.GetVectoriser();
            var dataProvider   = _lap.NN.CreateTrainingDataProvider(table, vectoriser);
            var miniBatch      = dataProvider.GetTrainingData(new[] { 1 });
            var input          = miniBatch.Input.Row(0).AsIndexable();
            var expectedOutput = miniBatch.ExpectedOutput.Row(0).AsIndexable();

            Assert.AreEqual(input[0], 0.2f);
            Assert.AreEqual(input[1], 1.5f);
            Assert.AreEqual(expectedOutput.Count, 4);

            Assert.AreEqual(vectoriser.GetOutputLabel(2, expectedOutput.MaximumIndex()), "b");
        }
Example #3
0
        public void SelectColumns()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Float, "val1");
            builder.AddColumn(ColumnType.Double, "val2");
            builder.AddColumn(ColumnType.String, "cls", true);
            builder.AddColumn(ColumnType.String, "cls2");

            builder.Add(0.5f, 1.1, "a", "a2");
            builder.Add(0.2f, 1.5, "b", "b2");
            builder.Add(0.7f, 0.5, "c", "c2");
            builder.Add(0.2f, 0.6, "d", "d2");

            var table  = builder.Build();
            var table2 = table.SelectColumns(new[] { 1, 2, 3 });

            Assert.AreEqual(table2.TargetColumnIndex, 1);
            Assert.AreEqual(table2.RowCount, 4);
            Assert.AreEqual(table2.ColumnCount, 3);

            var column = table2.GetNumericColumns(_lap, new[] { 0 }).First().AsIndexable();

            Assert.AreEqual(column[0], 1.1f);
            Assert.AreEqual(column[1], 1.5f);
        }
Example #4
0
        public void TestColumnTypes()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Boolean, "boolean");
            builder.AddColumn(ColumnType.Byte, "byte");
            builder.AddColumn(ColumnType.Date, "date");
            builder.AddColumn(ColumnType.Double, "double");
            builder.AddColumn(ColumnType.Float, "float");
            builder.AddColumn(ColumnType.Int, "int");
            builder.AddColumn(ColumnType.Long, "long");
            builder.AddColumn(ColumnType.Null, "null");
            builder.AddColumn(ColumnType.String, "string");

            var now = DateTime.Now;

            builder.Add(true, (byte)100, now, 1.0 / 3, 0.5f, int.MaxValue, long.MaxValue, null, "test");
            var dataTable = builder.Build();

            var firstRow = dataTable.GetRow(0);

            Assert.AreEqual(firstRow.GetField <bool>(0), true);
            Assert.AreEqual(firstRow.GetField <byte>(1), 100);
            Assert.AreEqual(firstRow.GetField <DateTime>(2), now);
            Assert.AreEqual(firstRow.GetField <double>(3), 1.0 / 3);
            Assert.AreEqual(firstRow.GetField <float>(4), 0.5f);
            Assert.AreEqual(firstRow.GetField <int>(5), int.MaxValue);
            Assert.AreEqual(firstRow.GetField <long>(6), long.MaxValue);
            Assert.AreEqual(firstRow.GetField <object>(7), null);
            Assert.AreEqual(firstRow.GetField <string>(8), "test");
        }
Example #5
0
        public void TestIndexHydration()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Boolean, "target", true);
            builder.AddColumn(ColumnType.Int, "val");
            builder.AddColumn(ColumnType.String, "label");
            for (var i = 0; i < 33000; i++)
            {
                builder.Add(i % 2 == 0, i, i.ToString());
            }

            using (var dataStream = new MemoryStream())
                using (var indexStream = new MemoryStream()) {
                    var table = builder.Build(dataStream);
                    table.WriteIndexTo(indexStream);

                    dataStream.Seek(0, SeekOrigin.Begin);
                    indexStream.Seek(0, SeekOrigin.Begin);
                    var newTable = Provider.CreateDataTable(dataStream, indexStream);
                    _CompareTables(table, newTable);

                    dataStream.Seek(0, SeekOrigin.Begin);
                    var newTable2 = Provider.CreateDataTable(dataStream, null);
                    _CompareTables(table, newTable2);
                }
        }
Example #6
0
        /// <summary>
        /// Converts the sparse vectors to a data table
        /// </summary>
        /// <param name="stream">Optional stream to write the data table to</param>
        public IDataTable ConvertToTable(Stream stream = null)
        {
            var max       = GetMaximumIndex();
            var dataTable = new DataTableBuilder();

            for (var i = 0; i < max; i++)
            {
                dataTable.AddColumn(ColumnType.Float, "term " + i.ToString());
            }
            dataTable.AddColumn(ColumnType.String, "classification", true);

            foreach (var item in Classification)
            {
                var data = new object[max + 1];
                for (var i = 0; i < max; i++)
                {
                    data[i] = 0f;
                }
                foreach (var index in item.Data)
                {
                    data[index.Index] = index.Weight;
                }
                data[max] = item.Name;
                dataTable.AddRow(data);
            }

            return(dataTable.Build(stream));
        }
Example #7
0
        public void TestRegression()
        {
            var dataTable = new DataTableBuilder();

            dataTable.AddColumn(ColumnType.Float, "value");
            dataTable.AddColumn(ColumnType.Float, "result", true);

            // simple linear relationship: result is twice value
            dataTable.AddRow(new object[] { 1f, 2f });
            dataTable.AddRow(new object[] { 2f, 4f });
            dataTable.AddRow(new object[] { 4f, 8f });
            dataTable.AddRow(new object[] { 8f, 16f });
            var index = dataTable.Build();

            var classifier = index.CreateLinearRegressionTrainer(_lap);
            //var theta = classifier.Solve();
            //var predictor = theta.CreatePredictor(_lap);

            //var prediction = predictor.Predict(3f);
            //Assert.IsTrue(Math.Round(prediction) == 6f);

            var theta      = classifier.GradientDescent(20, 0.01f);
            var predictor  = theta.CreatePredictor(_lap);
            var prediction = predictor.Predict(3f);

            Assert.IsTrue(Math.Round(prediction) == 6f);

            var prediction3 = predictor.Predict(new[] {
                new float[] { 10f },
                new float[] { 3f }
            });

            Assert.IsTrue(Math.Round(prediction3[1]) == 6f);
        }
Example #8
0
        public void TestMultinomialLogisticRegression()
        {
            var dataTable = new DataTableBuilder();

            dataTable.AddColumn(ColumnType.Float, "height");
            dataTable.AddColumn(ColumnType.Int, "weight").IsContinuous    = true;
            dataTable.AddColumn(ColumnType.Int, "foot-size").IsContinuous = true;
            dataTable.AddColumn(ColumnType.String, "gender", true);

            // sample data from: https://en.wikipedia.org/wiki/Naive_Bayes_classifier
            dataTable.Add(6f, 180, 12, "male");
            dataTable.Add(5.92f, 190, 11, "male");
            dataTable.Add(5.58f, 170, 12, "male");
            dataTable.Add(5.92f, 165, 10, "male");
            dataTable.Add(5f, 100, 6, "female");
            dataTable.Add(5.5f, 150, 8, "female");
            dataTable.Add(5.42f, 130, 7, "female");
            dataTable.Add(5.75f, 150, 9, "female");
            var index = dataTable.Build();

            var testData = new DataTableBuilder(dataTable.Columns);
            var row      = testData.Add(6f, 130, 8, "?");

            var model          = index.TrainMultinomialLogisticRegression(_lap, 100, 0.1f);
            var classifier     = model.CreateClassifier(_lap);
            var classification = classifier.Classify(row);

            Assert.IsTrue(classification.First() == "female");
        }
Example #9
0
        public void TestLogisticRegression()
        {
            var dataTable = new DataTableBuilder();

            dataTable.AddColumn(ColumnType.Float, "hours");
            dataTable.AddColumn(ColumnType.Boolean, "pass", true);

            // sample data from: https://en.wikipedia.org/wiki/Logistic_regression
            dataTable.AddRow(new object[] { 0.5f, false });
            dataTable.AddRow(new object[] { 0.75f, false });
            dataTable.AddRow(new object[] { 1f, false });
            dataTable.AddRow(new object[] { 1.25f, false });
            dataTable.AddRow(new object[] { 1.5f, false });
            dataTable.AddRow(new object[] { 1.75f, false });
            dataTable.AddRow(new object[] { 1.75f, true });
            dataTable.AddRow(new object[] { 2f, false });
            dataTable.AddRow(new object[] { 2.25f, true });
            dataTable.AddRow(new object[] { 2.5f, false });
            dataTable.AddRow(new object[] { 2.75f, true });
            dataTable.AddRow(new object[] { 3f, false });
            dataTable.AddRow(new object[] { 3.25f, true });
            dataTable.AddRow(new object[] { 3.5f, false });
            dataTable.AddRow(new object[] { 4f, true });
            dataTable.AddRow(new object[] { 4.25f, true });
            dataTable.AddRow(new object[] { 4.5f, true });
            dataTable.AddRow(new object[] { 4.75f, true });
            dataTable.AddRow(new object[] { 5f, true });
            dataTable.AddRow(new object[] { 5.5f, true });
            var index = dataTable.Build();

            var trainer      = index.CreateLogisticRegressionTrainer(_lap);
            var theta        = trainer.GradientDescent(1000, 0.1f);
            var predictor    = theta.CreatePredictor(_lap);
            var probability1 = predictor.Predict(2f);

            Assert.IsTrue(probability1 < 0.5f);

            var probability2 = predictor.Predict(4f);

            Assert.IsTrue(probability2 >= 0.5f);

            var probability3 = predictor.Predict(new[] {
                new float[] { 1f },
                new float[] { 2f },
                new float[] { 3f },
                new float[] { 4f },
                new float[] { 5f },
            });

            Assert.IsTrue(probability3[0] <= 0.5f);
            Assert.IsTrue(probability3[1] <= 0.5f);
            Assert.IsTrue(probability3[2] >= 0.5f);
            Assert.IsTrue(probability3[3] >= 0.5f);
            Assert.IsTrue(probability3[4] >= 0.5f);
        }
Example #10
0
        IDataTable _GetSimpleTable()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Int, "val");

            for (var i = 0; i < 10000; i++)
            {
                builder.Add(i);
            }
            return(builder.Build());
        }
Example #11
0
        /// <summary>
        /// Generates a data table containing XOR training data
        /// </summary>
        /// <returns></returns>
        public static IDataTable Get()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Float, "X");
            builder.AddColumn(ColumnType.Float, "Y");
            builder.AddColumn(ColumnType.Float, "XOR", true);
            builder.Add(0.0f, 0.0f, 0.0f);
            builder.Add(1.0f, 0.0f, 1.0f);
            builder.Add(0.0f, 1.0f, 1.0f);
            builder.Add(1.0f, 1.0f, 0.0f);
            return(builder.Build());
        }
Example #12
0
        public void TestTargetColumnIndex()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.String, "a");
            builder.AddColumn(ColumnType.String, "b", true);
            builder.AddColumn(ColumnType.String, "c");
            builder.Add("a", "b", "c");
            var table = builder.Build();

            Assert.AreEqual(table.TargetColumnIndex, 1);
            Assert.AreEqual(table.RowCount, 1);
            Assert.AreEqual(table.ColumnCount, 3);
        }
Example #13
0
        public void TableFilter()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Float, "val1");
            builder.AddColumn(ColumnType.Double, "val2");
            builder.AddColumn(ColumnType.String, "cls", true);

            builder.Add(0.5f, 1.1, "a");
            builder.Add(0.2f, 1.5, "b");
            builder.Add(0.7f, 0.5, "c");
            builder.Add(0.2f, 0.6, "d");

            var table          = builder.Build();
            var projectedTable = table.Project(r => r.GetField <string>(2) == "b" ? null : r.Data);

            Assert.AreEqual(projectedTable.ColumnCount, table.ColumnCount);
            Assert.AreEqual(projectedTable.RowCount, 3);
        }
Example #14
0
        public void Fold()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Float, "val1");
            builder.AddColumn(ColumnType.Double, "val2");
            builder.AddColumn(ColumnType.String, "cls", true);

            builder.Add(0.5f, 1.1, "a");
            builder.Add(0.2f, 1.5, "b");
            builder.Add(0.7f, 0.5, "c");
            builder.Add(0.2f, 0.6, "d");

            var table = builder.Build();
            var folds = table.Fold(4, 0, false).ToList();

            Assert.AreEqual(folds.Count, 4);
            Assert.IsTrue(folds.All(r => r.Training.RowCount == 3 && r.Validation.RowCount == 1));
        }
Example #15
0
        public void GetNumericColumns2()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Float, "val1");
            builder.AddColumn(ColumnType.Double, "val2");
            builder.AddColumn(ColumnType.String, "cls", true);

            builder.Add(0.5f, 1.1, "a");
            builder.Add(0.2f, 1.5, "b");
            builder.Add(0.7f, 0.5, "c");
            builder.Add(0.2f, 0.6, "d");

            var table  = builder.Build();
            var column = table.GetNumericColumns(new[] { 1 }).First();

            Assert.AreEqual(column[0], 1.1f);
            Assert.AreEqual(column[1], 1.5f);
        }
Example #16
0
        public void GetNumericRows()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Float, "val1");
            builder.AddColumn(ColumnType.Double, "val2");
            builder.AddColumn(ColumnType.String, "cls", true);

            builder.Add(0.5f, 1.1, "a");
            builder.Add(0.2f, 1.5, "b");
            builder.Add(0.7f, 0.5, "c");
            builder.Add(0.2f, 0.6, "d");

            var table = builder.Build();
            var rows  = table.GetNumericRows(_lap, new[] { 1 }).Select(r => r.AsIndexable()).ToList();

            Assert.AreEqual(rows[0][0], 1.1f);
            Assert.AreEqual(rows[1][0], 1.5f);
        }
Example #17
0
        public void TestDataTableAnalysis()
        {
            var builder = new DataTableBuilder();

            builder.AddColumn(ColumnType.Boolean, "boolean");
            builder.AddColumn(ColumnType.Byte, "byte");
            builder.AddColumn(ColumnType.Date, "date");
            builder.AddColumn(ColumnType.Double, "double");
            builder.AddColumn(ColumnType.Float, "float");
            builder.AddColumn(ColumnType.Int, "int");
            builder.AddColumn(ColumnType.Long, "long");
            builder.AddColumn(ColumnType.Null, "null");
            builder.AddColumn(ColumnType.String, "string");

            for (var i = 1; i <= 10; i++)
            {
                builder.Add(i % 2 == 0, (byte)i, DateTime.Now, (double)i, (float)i, i, (long)i, null, i.ToString());
            }
            var table    = builder.Build();
            var analysis = table.GetAnalysis();
            var xml      = analysis.AsXml;

            var boolAnalysis = analysis[0] as INumericColumnInfo;

            Assert.IsTrue(boolAnalysis.NumDistinct == 2);
            Assert.IsTrue(boolAnalysis.Mean == 0.5);

            var numericAnalysis = new[] { 1, 3, 4, 5, 6 }.Select(i => analysis[i] as INumericColumnInfo).ToList();

            Assert.IsTrue(numericAnalysis.All(a => a.NumDistinct == 10));
            Assert.IsTrue(numericAnalysis.All(a => a.Min == 1));
            Assert.IsTrue(numericAnalysis.All(a => a.Max == 10));
            Assert.IsTrue(numericAnalysis.All(a => a.Mean == 5.5));
            Assert.IsTrue(numericAnalysis.All(a => a.Median.Value == 5));
            Assert.IsTrue(numericAnalysis.All(a => Math.Round(a.StdDev.Value) == 3));

            var stringAnalysis = analysis[8] as IStringColumnInfo;

            Assert.IsTrue(stringAnalysis.NumDistinct == 10);
            Assert.IsTrue(stringAnalysis.MaxLength == 2);
        }