public void DataTableProvider() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "val3"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "d", "a"); builder.Add(0.2f, 1.5, "c", "b"); builder.Add(0.7f, 0.5, "b", "c"); builder.Add(0.2f, 0.6, "a", "d"); var table = builder.Build(); var vectoriser = table.GetVectoriser(); var dataProvider = _lap.NN.CreateTrainingDataProvider(table, vectoriser); var miniBatch = dataProvider.GetTrainingData(new[] { 1 }); var input = miniBatch.Input.Row(0).AsIndexable(); var expectedOutput = miniBatch.ExpectedOutput.Row(0).AsIndexable(); Assert.AreEqual(input[0], 0.2f); Assert.AreEqual(input[1], 1.5f); Assert.AreEqual(expectedOutput.Count, 4); Assert.AreEqual(vectoriser.GetOutputLabel(2, expectedOutput.MaximumIndex()), "b"); }
public void SelectColumns() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.AddColumn(ColumnType.String, "cls2"); builder.Add(0.5f, 1.1, "a", "a2"); builder.Add(0.2f, 1.5, "b", "b2"); builder.Add(0.7f, 0.5, "c", "c2"); builder.Add(0.2f, 0.6, "d", "d2"); var table = builder.Build(); var table2 = table.SelectColumns(new[] { 1, 2, 3 }); Assert.AreEqual(table2.TargetColumnIndex, 1); Assert.AreEqual(table2.RowCount, 4); Assert.AreEqual(table2.ColumnCount, 3); var column = table2.GetNumericColumns(_lap, new[] { 0 }).First().AsIndexable(); Assert.AreEqual(column[0], 1.1f); Assert.AreEqual(column[1], 1.5f); }
/// <summary> /// Generates a data table containing XOR training data /// </summary> /// <returns></returns> public static IDataTable Get() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "X"); builder.AddColumn(ColumnType.Float, "Y"); builder.AddColumn(ColumnType.Float, "XOR", true); builder.Add(0.0f, 0.0f, 0.0f); builder.Add(1.0f, 0.0f, 1.0f); builder.Add(0.0f, 1.0f, 1.0f); builder.Add(1.0f, 1.0f, 0.0f); return(builder.Build()); }
public void TestMultinomialLogisticRegression() { var dataTable = new DataTableBuilder(); dataTable.AddColumn(ColumnType.Float, "height"); dataTable.AddColumn(ColumnType.Int, "weight").IsContinuous = true; dataTable.AddColumn(ColumnType.Int, "foot-size").IsContinuous = true; dataTable.AddColumn(ColumnType.String, "gender", true); // sample data from: https://en.wikipedia.org/wiki/Naive_Bayes_classifier dataTable.Add(6f, 180, 12, "male"); dataTable.Add(5.92f, 190, 11, "male"); dataTable.Add(5.58f, 170, 12, "male"); dataTable.Add(5.92f, 165, 10, "male"); dataTable.Add(5f, 100, 6, "female"); dataTable.Add(5.5f, 150, 8, "female"); dataTable.Add(5.42f, 130, 7, "female"); dataTable.Add(5.75f, 150, 9, "female"); var index = dataTable.Build(); var testData = new DataTableBuilder(dataTable.Columns); var row = testData.Add(6f, 130, 8, "?"); var model = index.TrainMultinomialLogisticRegression(_lap, 100, 0.1f); var classifier = model.CreateClassifier(_lap); var classification = classifier.Classify(row); Assert.IsTrue(classification.First() == "female"); }
public void TestColumnTypes() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Boolean, "boolean"); builder.AddColumn(ColumnType.Byte, "byte"); builder.AddColumn(ColumnType.Date, "date"); builder.AddColumn(ColumnType.Double, "double"); builder.AddColumn(ColumnType.Float, "float"); builder.AddColumn(ColumnType.Int, "int"); builder.AddColumn(ColumnType.Long, "long"); builder.AddColumn(ColumnType.Null, "null"); builder.AddColumn(ColumnType.String, "string"); var now = DateTime.Now; builder.Add(true, (byte)100, now, 1.0 / 3, 0.5f, int.MaxValue, long.MaxValue, null, "test"); var dataTable = builder.Build(); var firstRow = dataTable.GetRow(0); Assert.AreEqual(firstRow.GetField <bool>(0), true); Assert.AreEqual(firstRow.GetField <byte>(1), 100); Assert.AreEqual(firstRow.GetField <DateTime>(2), now); Assert.AreEqual(firstRow.GetField <double>(3), 1.0 / 3); Assert.AreEqual(firstRow.GetField <float>(4), 0.5f); Assert.AreEqual(firstRow.GetField <int>(5), int.MaxValue); Assert.AreEqual(firstRow.GetField <long>(6), long.MaxValue); Assert.AreEqual(firstRow.GetField <object>(7), null); Assert.AreEqual(firstRow.GetField <string>(8), "test"); }
public void TestIndexHydration() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Boolean, "target", true); builder.AddColumn(ColumnType.Int, "val"); builder.AddColumn(ColumnType.String, "label"); for (var i = 0; i < 33000; i++) { builder.Add(i % 2 == 0, i, i.ToString()); } using (var dataStream = new MemoryStream()) using (var indexStream = new MemoryStream()) { var table = builder.Build(dataStream); table.WriteIndexTo(indexStream); dataStream.Seek(0, SeekOrigin.Begin); indexStream.Seek(0, SeekOrigin.Begin); var newTable = Provider.CreateDataTable(dataStream, indexStream); _CompareTables(table, newTable); dataStream.Seek(0, SeekOrigin.Begin); var newTable2 = Provider.CreateDataTable(dataStream, null); _CompareTables(table, newTable2); } }
public void TableFilter() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "a"); builder.Add(0.2f, 1.5, "b"); builder.Add(0.7f, 0.5, "c"); builder.Add(0.2f, 0.6, "d"); var table = builder.Build(); var projectedTable = table.Project(r => r.GetField <string>(2) == "b" ? null : r.Data); Assert.AreEqual(projectedTable.ColumnCount, table.ColumnCount); Assert.AreEqual(projectedTable.RowCount, 3); }
public void Fold() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "a"); builder.Add(0.2f, 1.5, "b"); builder.Add(0.7f, 0.5, "c"); builder.Add(0.2f, 0.6, "d"); var table = builder.Build(); var folds = table.Fold(4, 0, false).ToList(); Assert.AreEqual(folds.Count, 4); Assert.IsTrue(folds.All(r => r.Training.RowCount == 3 && r.Validation.RowCount == 1)); }
public void GetNumericColumns2() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "a"); builder.Add(0.2f, 1.5, "b"); builder.Add(0.7f, 0.5, "c"); builder.Add(0.2f, 0.6, "d"); var table = builder.Build(); var column = table.GetNumericColumns(new[] { 1 }).First(); Assert.AreEqual(column[0], 1.1f); Assert.AreEqual(column[1], 1.5f); }
public void GetNumericRows() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "a"); builder.Add(0.2f, 1.5, "b"); builder.Add(0.7f, 0.5, "c"); builder.Add(0.2f, 0.6, "d"); var table = builder.Build(); var rows = table.GetNumericRows(_lap, new[] { 1 }).Select(r => r.AsIndexable()).ToList(); Assert.AreEqual(rows[0][0], 1.1f); Assert.AreEqual(rows[1][0], 1.5f); }
IDataTable _GetSimpleTable() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Int, "val"); for (var i = 0; i < 10000; i++) { builder.Add(i); } return(builder.Build()); }
public void TestTargetColumnIndex() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.String, "a"); builder.AddColumn(ColumnType.String, "b", true); builder.AddColumn(ColumnType.String, "c"); builder.Add("a", "b", "c"); var table = builder.Build(); Assert.AreEqual(table.TargetColumnIndex, 1); Assert.AreEqual(table.RowCount, 1); Assert.AreEqual(table.ColumnCount, 3); }
public void TestDataTableAnalysis() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Boolean, "boolean"); builder.AddColumn(ColumnType.Byte, "byte"); builder.AddColumn(ColumnType.Date, "date"); builder.AddColumn(ColumnType.Double, "double"); builder.AddColumn(ColumnType.Float, "float"); builder.AddColumn(ColumnType.Int, "int"); builder.AddColumn(ColumnType.Long, "long"); builder.AddColumn(ColumnType.Null, "null"); builder.AddColumn(ColumnType.String, "string"); for (var i = 1; i <= 10; i++) { builder.Add(i % 2 == 0, (byte)i, DateTime.Now, (double)i, (float)i, i, (long)i, null, i.ToString()); } var table = builder.Build(); var analysis = table.GetAnalysis(); var xml = analysis.AsXml; var boolAnalysis = analysis[0] as INumericColumnInfo; Assert.IsTrue(boolAnalysis.NumDistinct == 2); Assert.IsTrue(boolAnalysis.Mean == 0.5); var numericAnalysis = new[] { 1, 3, 4, 5, 6 }.Select(i => analysis[i] as INumericColumnInfo).ToList(); Assert.IsTrue(numericAnalysis.All(a => a.NumDistinct == 10)); Assert.IsTrue(numericAnalysis.All(a => a.Min == 1)); Assert.IsTrue(numericAnalysis.All(a => a.Max == 10)); Assert.IsTrue(numericAnalysis.All(a => a.Mean == 5.5)); Assert.IsTrue(numericAnalysis.All(a => a.Median.Value == 5)); Assert.IsTrue(numericAnalysis.All(a => Math.Round(a.StdDev.Value) == 3)); var stringAnalysis = analysis[8] as IStringColumnInfo; Assert.IsTrue(stringAnalysis.NumDistinct == 10); Assert.IsTrue(stringAnalysis.MaxLength == 2); }