public void TestIndexHydration() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Boolean, "target", true); builder.AddColumn(ColumnType.Int, "val"); builder.AddColumn(ColumnType.String, "label"); for (var i = 0; i < 33000; i++) { builder.Add(i % 2 == 0, i, i.ToString()); } using (var dataStream = new MemoryStream()) using (var indexStream = new MemoryStream()) { var table = builder.Build(dataStream); table.WriteIndexTo(indexStream); dataStream.Seek(0, SeekOrigin.Begin); indexStream.Seek(0, SeekOrigin.Begin); var newTable = Provider.CreateDataTable(dataStream, indexStream); _CompareTables(table, newTable); dataStream.Seek(0, SeekOrigin.Begin); var newTable2 = Provider.CreateDataTable(dataStream, null); _CompareTables(table, newTable2); } }
/// <summary> /// Creates a new data table with the vectorised weighted classification outputs linked with each row's classification /// </summary> /// <param name="output">The output stream to write the table to (optional)</param> public IDataTable GetTable(Stream output = null) { if (_trainingData.Any()) { var fieldCount = _trainingData.First().Item1.Length; var builder = new DataTableBuilder(); for (var i = 0; i < fieldCount; i++) { builder.AddColumn(ColumnType.Float, "v" + i); } builder.AddColumn(ColumnType.String, "target", true); foreach (var item in _trainingData) { var data = new object[fieldCount + 1]; for (var i = 0; i < fieldCount; i++) { data[i] = item.Item1[i]; } data[fieldCount] = item.Item2; builder.AddRow(data); } return(builder.Build(output)); } return(null); }
public void SelectColumns() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.AddColumn(ColumnType.String, "cls2"); builder.Add(0.5f, 1.1, "a", "a2"); builder.Add(0.2f, 1.5, "b", "b2"); builder.Add(0.7f, 0.5, "c", "c2"); builder.Add(0.2f, 0.6, "d", "d2"); var table = builder.Build(); var table2 = table.SelectColumns(new[] { 1, 2, 3 }); Assert.AreEqual(table2.TargetColumnIndex, 1); Assert.AreEqual(table2.RowCount, 4); Assert.AreEqual(table2.ColumnCount, 3); var column = table2.GetNumericColumns(_lap, new[] { 0 }).First().AsIndexable(); Assert.AreEqual(column[0], 1.1f); Assert.AreEqual(column[1], 1.5f); }
public void DataTableProvider() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "val3"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "d", "a"); builder.Add(0.2f, 1.5, "c", "b"); builder.Add(0.7f, 0.5, "b", "c"); builder.Add(0.2f, 0.6, "a", "d"); var table = builder.Build(); var vectoriser = table.GetVectoriser(); var dataProvider = _lap.NN.CreateTrainingDataProvider(table, vectoriser); var miniBatch = dataProvider.GetTrainingData(new[] { 1 }); var input = miniBatch.Input.Row(0).AsIndexable(); var expectedOutput = miniBatch.ExpectedOutput.Row(0).AsIndexable(); Assert.AreEqual(input[0], 0.2f); Assert.AreEqual(input[1], 1.5f); Assert.AreEqual(expectedOutput.Count, 4); Assert.AreEqual(vectoriser.GetOutputLabel(2, expectedOutput.MaximumIndex()), "b"); }
public void TestMultinomialLogisticRegression() { var dataTable = new DataTableBuilder(); dataTable.AddColumn(ColumnType.Float, "height"); dataTable.AddColumn(ColumnType.Int, "weight").IsContinuous = true; dataTable.AddColumn(ColumnType.Int, "foot-size").IsContinuous = true; dataTable.AddColumn(ColumnType.String, "gender", true); // sample data from: https://en.wikipedia.org/wiki/Naive_Bayes_classifier dataTable.Add(6f, 180, 12, "male"); dataTable.Add(5.92f, 190, 11, "male"); dataTable.Add(5.58f, 170, 12, "male"); dataTable.Add(5.92f, 165, 10, "male"); dataTable.Add(5f, 100, 6, "female"); dataTable.Add(5.5f, 150, 8, "female"); dataTable.Add(5.42f, 130, 7, "female"); dataTable.Add(5.75f, 150, 9, "female"); var index = dataTable.Build(); var testData = new DataTableBuilder(dataTable.Columns); var row = testData.Add(6f, 130, 8, "?"); var model = index.TrainMultinomialLogisticRegression(_lap, 100, 0.1f); var classifier = model.CreateClassifier(_lap); var classification = classifier.Classify(row); Assert.IsTrue(classification.First() == "female"); }
public void TestRegression() { var dataTable = new DataTableBuilder(); dataTable.AddColumn(ColumnType.Float, "value"); dataTable.AddColumn(ColumnType.Float, "result", true); // simple linear relationship: result is twice value dataTable.AddRow(new object[] { 1f, 2f }); dataTable.AddRow(new object[] { 2f, 4f }); dataTable.AddRow(new object[] { 4f, 8f }); dataTable.AddRow(new object[] { 8f, 16f }); var index = dataTable.Build(); var classifier = index.CreateLinearRegressionTrainer(_lap); //var theta = classifier.Solve(); //var predictor = theta.CreatePredictor(_lap); //var prediction = predictor.Predict(3f); //Assert.IsTrue(Math.Round(prediction) == 6f); var theta = classifier.GradientDescent(20, 0.01f); var predictor = theta.CreatePredictor(_lap); var prediction = predictor.Predict(3f); Assert.IsTrue(Math.Round(prediction) == 6f); var prediction3 = predictor.Predict(new[] { new float[] { 10f }, new float[] { 3f } }); Assert.IsTrue(Math.Round(prediction3[1]) == 6f); }
/// <summary> /// Converts the sparse vectors to a data table /// </summary> /// <param name="stream">Optional stream to write the data table to</param> public IDataTable ConvertToTable(Stream stream = null) { var max = GetMaximumIndex(); var dataTable = new DataTableBuilder(); for (var i = 0; i < max; i++) { dataTable.AddColumn(ColumnType.Float, "term " + i.ToString()); } dataTable.AddColumn(ColumnType.String, "classification", true); foreach (var item in Classification) { var data = new object[max + 1]; for (var i = 0; i < max; i++) { data[i] = 0f; } foreach (var index in item.Data) { data[index.Index] = index.Weight; } data[max] = item.Name; dataTable.AddRow(data); } return(dataTable.Build(stream)); }
public void AddColumn(string name, ColumnType type, bool isTarget = false) { if (_hasWrittenHeader) { throw new Exception(); } _dataTableBuilder.AddColumn(type, name, isTarget); }
public void TestColumnTypes() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Boolean, "boolean"); builder.AddColumn(ColumnType.Byte, "byte"); builder.AddColumn(ColumnType.Date, "date"); builder.AddColumn(ColumnType.Double, "double"); builder.AddColumn(ColumnType.Float, "float"); builder.AddColumn(ColumnType.Int, "int"); builder.AddColumn(ColumnType.Long, "long"); builder.AddColumn(ColumnType.Null, "null"); builder.AddColumn(ColumnType.String, "string"); var now = DateTime.Now; builder.Add(true, (byte)100, now, 1.0 / 3, 0.5f, int.MaxValue, long.MaxValue, null, "test"); var dataTable = builder.Build(); var firstRow = dataTable.GetRow(0); Assert.AreEqual(firstRow.GetField <bool>(0), true); Assert.AreEqual(firstRow.GetField <byte>(1), 100); Assert.AreEqual(firstRow.GetField <DateTime>(2), now); Assert.AreEqual(firstRow.GetField <double>(3), 1.0 / 3); Assert.AreEqual(firstRow.GetField <float>(4), 0.5f); Assert.AreEqual(firstRow.GetField <int>(5), int.MaxValue); Assert.AreEqual(firstRow.GetField <long>(6), long.MaxValue); Assert.AreEqual(firstRow.GetField <object>(7), null); Assert.AreEqual(firstRow.GetField <string>(8), "test"); }
public void TestLogisticRegression() { var dataTable = new DataTableBuilder(); dataTable.AddColumn(ColumnType.Float, "hours"); dataTable.AddColumn(ColumnType.Boolean, "pass", true); // sample data from: https://en.wikipedia.org/wiki/Logistic_regression dataTable.AddRow(new object[] { 0.5f, false }); dataTable.AddRow(new object[] { 0.75f, false }); dataTable.AddRow(new object[] { 1f, false }); dataTable.AddRow(new object[] { 1.25f, false }); dataTable.AddRow(new object[] { 1.5f, false }); dataTable.AddRow(new object[] { 1.75f, false }); dataTable.AddRow(new object[] { 1.75f, true }); dataTable.AddRow(new object[] { 2f, false }); dataTable.AddRow(new object[] { 2.25f, true }); dataTable.AddRow(new object[] { 2.5f, false }); dataTable.AddRow(new object[] { 2.75f, true }); dataTable.AddRow(new object[] { 3f, false }); dataTable.AddRow(new object[] { 3.25f, true }); dataTable.AddRow(new object[] { 3.5f, false }); dataTable.AddRow(new object[] { 4f, true }); dataTable.AddRow(new object[] { 4.25f, true }); dataTable.AddRow(new object[] { 4.5f, true }); dataTable.AddRow(new object[] { 4.75f, true }); dataTable.AddRow(new object[] { 5f, true }); dataTable.AddRow(new object[] { 5.5f, true }); var index = dataTable.Build(); var trainer = index.CreateLogisticRegressionTrainer(_lap); var theta = trainer.GradientDescent(1000, 0.1f); var predictor = theta.CreatePredictor(_lap); var probability1 = predictor.Predict(2f); Assert.IsTrue(probability1 < 0.5f); var probability2 = predictor.Predict(4f); Assert.IsTrue(probability2 >= 0.5f); var probability3 = predictor.Predict(new[] { new float[] { 1f }, new float[] { 2f }, new float[] { 3f }, new float[] { 4f }, new float[] { 5f }, }); Assert.IsTrue(probability3[0] <= 0.5f); Assert.IsTrue(probability3[1] <= 0.5f); Assert.IsTrue(probability3[2] >= 0.5f); Assert.IsTrue(probability3[3] >= 0.5f); Assert.IsTrue(probability3[4] >= 0.5f); }
/// <summary> /// Generates a data table containing XOR training data /// </summary> /// <returns></returns> public static IDataTable Get() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "X"); builder.AddColumn(ColumnType.Float, "Y"); builder.AddColumn(ColumnType.Float, "XOR", true); builder.Add(0.0f, 0.0f, 0.0f); builder.Add(1.0f, 0.0f, 1.0f); builder.Add(0.0f, 1.0f, 1.0f); builder.Add(1.0f, 1.0f, 0.0f); return(builder.Build()); }
public void TestTargetColumnIndex() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.String, "a"); builder.AddColumn(ColumnType.String, "b", true); builder.AddColumn(ColumnType.String, "c"); builder.Add("a", "b", "c"); var table = builder.Build(); Assert.AreEqual(table.TargetColumnIndex, 1); Assert.AreEqual(table.RowCount, 1); Assert.AreEqual(table.ColumnCount, 3); }
public void CanSetupResultForSimpleForQuery() { DataTable customers; using (DataTableBuilder builder = new DataTableBuilder()) { builder.AddColumn("userid", typeof(int)) .AddColumn("email", typeof(string)) .AddRow(1, "*****@*****.**") .AddRow(10, "*****@*****.**"); customers = builder.DataTable; } var behavior = new MockCommandBehavior() .When(c => c.CommandText.StartsWith("select *")) .ReturnsData(customers); Factory.AddBehavior(behavior); var table = SUT.DataAccess.GetAllUsers(); Assert.AreEqual(2, table.Rows.Count); Assert.AreEqual(1, table.Rows[0][0]); Assert.AreEqual("*****@*****.**", table.Rows[0][1]); Assert.AreEqual(10, table.Rows[1][0]); Assert.AreEqual("*****@*****.**", table.Rows[1][1]); }
public void GetNumericColumns2() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "a"); builder.Add(0.2f, 1.5, "b"); builder.Add(0.7f, 0.5, "c"); builder.Add(0.2f, 0.6, "d"); var table = builder.Build(); var column = table.GetNumericColumns(new[] { 1 }).First(); Assert.AreEqual(column[0], 1.1f); Assert.AreEqual(column[1], 1.5f); }
public void GetNumericRows() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "a"); builder.Add(0.2f, 1.5, "b"); builder.Add(0.7f, 0.5, "c"); builder.Add(0.2f, 0.6, "d"); var table = builder.Build(); var rows = table.GetNumericRows(_lap, new[] { 1 }).Select(r => r.AsIndexable()).ToList(); Assert.AreEqual(rows[0][0], 1.1f); Assert.AreEqual(rows[1][0], 1.5f); }
public void TableFilter() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "a"); builder.Add(0.2f, 1.5, "b"); builder.Add(0.7f, 0.5, "c"); builder.Add(0.2f, 0.6, "d"); var table = builder.Build(); var projectedTable = table.Project(r => r.GetField <string>(2) == "b" ? null : r.Data); Assert.AreEqual(projectedTable.ColumnCount, table.ColumnCount); Assert.AreEqual(projectedTable.RowCount, 3); }
public void Fold() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Float, "val1"); builder.AddColumn(ColumnType.Double, "val2"); builder.AddColumn(ColumnType.String, "cls", true); builder.Add(0.5f, 1.1, "a"); builder.Add(0.2f, 1.5, "b"); builder.Add(0.7f, 0.5, "c"); builder.Add(0.2f, 0.6, "d"); var table = builder.Build(); var folds = table.Fold(4, 0, false).ToList(); Assert.AreEqual(folds.Count, 4); Assert.IsTrue(folds.All(r => r.Training.RowCount == 3 && r.Validation.RowCount == 1)); }
IDataTable _GetSimpleTable() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Int, "val"); for (var i = 0; i < 10000; i++) { builder.Add(i); } return(builder.Build()); }
public void TestDataTableAnalysis() { var builder = new DataTableBuilder(); builder.AddColumn(ColumnType.Boolean, "boolean"); builder.AddColumn(ColumnType.Byte, "byte"); builder.AddColumn(ColumnType.Date, "date"); builder.AddColumn(ColumnType.Double, "double"); builder.AddColumn(ColumnType.Float, "float"); builder.AddColumn(ColumnType.Int, "int"); builder.AddColumn(ColumnType.Long, "long"); builder.AddColumn(ColumnType.Null, "null"); builder.AddColumn(ColumnType.String, "string"); for (var i = 1; i <= 10; i++) { builder.Add(i % 2 == 0, (byte)i, DateTime.Now, (double)i, (float)i, i, (long)i, null, i.ToString()); } var table = builder.Build(); var analysis = table.GetAnalysis(); var xml = analysis.AsXml; var boolAnalysis = analysis[0] as INumericColumnInfo; Assert.IsTrue(boolAnalysis.NumDistinct == 2); Assert.IsTrue(boolAnalysis.Mean == 0.5); var numericAnalysis = new[] { 1, 3, 4, 5, 6 }.Select(i => analysis[i] as INumericColumnInfo).ToList(); Assert.IsTrue(numericAnalysis.All(a => a.NumDistinct == 10)); Assert.IsTrue(numericAnalysis.All(a => a.Min == 1)); Assert.IsTrue(numericAnalysis.All(a => a.Max == 10)); Assert.IsTrue(numericAnalysis.All(a => a.Mean == 5.5)); Assert.IsTrue(numericAnalysis.All(a => a.Median.Value == 5)); Assert.IsTrue(numericAnalysis.All(a => Math.Round(a.StdDev.Value) == 3)); var stringAnalysis = analysis[8] as IStringColumnInfo; Assert.IsTrue(stringAnalysis.NumDistinct == 10); Assert.IsTrue(stringAnalysis.MaxLength == 2); }
public void CanFillDataSet() { DataTable users, orders; using (var builder = new DataTableBuilder()) { builder .AddColumn("customerid", typeof(int)) .AddColumn("firstname", typeof(string)) .AddColumn("lastname", typeof(string)) .AddRow(1, "joe", "black") .AddRow(1, "kurt", "vonnegut"); users = builder.DataTable; } using (var builder = new DataTableBuilder()) { builder .AddColumn("orderid", typeof(int)) .AddColumn("userid", typeof(int)) .AddColumn("total", typeof(double)) .AddRow(100, 1, 10.10) .AddRow(101, 1, 10.20) .AddRow(202, 2, 20.10) .AddRow(203, 2, 20.20); orders = builder.DataTable; } Factory.AddBehavior(new MockCommandBehavior() .When(cmd => cmd.CommandText.Contains("from customers")) .ReturnsData(users)); Factory.AddBehavior(new MockCommandBehavior() .When(cmd => cmd.CommandText.Contains("from orders")) .ReturnsData(orders)); using (var result = SUT.DataAccess.GetAllOrders()) { Assert.AreEqual(2, result.Tables.Count); Assert.AreEqual(2, result.Tables["customers"].Rows.Count); Assert.AreEqual(4, result.Tables["orders"].Rows.Count); } }
public DataColumn AddColumn <Type>(string name, Func <EntityType, Type> getDataTableValue, Func <EntityType, object> getFitlerValue, Func <EntityType, object, FilterBase> getFilter, Func <EntityType, bool> isEmpty) { var f = new Dictionary <object, object>(); var dc = _builder.AddColumn(name, typeof(Type), () => { var r = getDataTableValue(_instance); if (!f.ContainsKey(r)) { var x = getFitlerValue(_instance); var c = x as ColumnBase; if (c != null) { x = c.Value; } f.Add(r, x); } return(r); }, () => isEmpty(_instance), delegate { }); filterValues.Add(dc, f); filters.Add(dc, getFilter); return(dc); }