public IDataTable ConvertToNumeric(IDataTableVectoriser vectoriser = null, bool useTargetColumnIndex = true, Stream output = null) { var writer = new DataTableWriter(output); vectoriser = vectoriser ?? GetVectoriser(useTargetColumnIndex); // add the numeric columns foreach (var name in vectoriser.ColumnNames) { writer.AddColumn(name, ColumnType.Float); } // add the classification label column var classColumnIndex = TargetColumnIndex; if (useTargetColumnIndex) { var classColumn = _column[classColumnIndex]; writer.AddColumn(classColumn.Name, ColumnType.String, true); } // vectorise each row _Iterate((row, i) => { var rowData = vectoriser.GetInput(row).Data.AsEnumerable().Cast <object>(); if (useTargetColumnIndex) { rowData = rowData.Concat(new object[] { row.GetField <string>(classColumnIndex) }); } writer.AddRow(new DataTableRow(this, rowData.ToArray(), _rowConverter)); return(true); }); return(writer.GetDataTable()); }
private void _Add(string line, DataTableWriter writer) { var convertedData = writer.Columns.Zip(_Parse(line), (c, str) => _Convert(ref c._type, str)). ToList(); writer.AddRow(convertedData); }
public IDataTable CopyWithRows(IEnumerable <int> rowIndex, Stream output = null) { var writer = new DataTableWriter(_column, output); foreach (var row in GetRows(rowIndex)) { writer.AddRow(row); } return(writer.GetDataTable()); }
public IDataTable Zip(IDataTable dataTable, Stream output = null) { var writer = new DataTableWriter(_column.Concat(dataTable.Columns), output); _Iterate((row, i) => { if (i >= dataTable.RowCount) { return(false); } writer.AddRow(row.Data.Concat(dataTable.GetRow(i).Data).ToList()); return(true); }); return(writer.GetDataTable()); }
public IDataTable ConvertToNumeric(IDataTableVectoriser vectoriser = null, Stream output = null) { var writer = new DataTableWriter(output); vectoriser = vectoriser ?? GetVectoriser(); foreach (var name in vectoriser.ColumnNames) { writer.AddColumn(name, ColumnType.Float); } var classColumnIndex = TargetColumnIndex; var classColumn = _column[classColumnIndex]; writer.AddColumn(classColumn.Name, ColumnType.String, true); _Iterate(row => { var data = vectoriser.GetInput(row).AsEnumerable().Cast <object>().Concat(new object[] { row.GetField <string>(classColumnIndex) }); writer.AddRow(data); return(true); }); return(writer.GetDataTable()); }
public IDataTable Project(Func <IRow, IReadOnlyList <object> > mutator, Stream output = null) { var isFirst = true; DataTableWriter writer = new DataTableWriter(output); _Iterate((row, i) => { var mutatedRow = mutator(row); if (mutatedRow != null) { if (isFirst) { int index = 0; foreach (var item in mutatedRow) { var column = Columns[index]; if (item == null) { writer.AddColumn(column.Name, ColumnType.Null, column.IsTarget); } else { var type = item.GetType(); ColumnType columnType; if (type == typeof(string)) { columnType = ColumnType.String; } else if (type == typeof(double)) { columnType = ColumnType.Double; } else if (type == typeof(float)) { columnType = ColumnType.Float; } else if (type == typeof(long)) { columnType = ColumnType.Long; } else if (type == typeof(int)) { columnType = ColumnType.Int; } else if (type == typeof(byte)) { columnType = ColumnType.Byte; } else if (type == typeof(DateTime)) { columnType = ColumnType.Date; } else if (type == typeof(bool)) { columnType = ColumnType.Boolean; } else if (type == typeof(FloatVector)) { columnType = ColumnType.Vector; } else if (type == typeof(FloatMatrix)) { columnType = ColumnType.Matrix; } else if (type == typeof(FloatTensor)) { columnType = ColumnType.Tensor; } else if (type == typeof(WeightedIndexList)) { columnType = ColumnType.WeightedIndexList; } else if (type == typeof(IndexList)) { columnType = ColumnType.IndexList; } else { throw new FormatException(); } writer.AddColumn(column.Name, columnType, column.IsTarget); } ++index; } isFirst = false; } writer.AddRow(new DataTableRow(this, mutatedRow, _rowConverter)); } return(true); }); return(writer.GetDataTable()); }
public IDataTable Project(Func <IRow, IReadOnlyList <object> > mutator, Stream output = null) { var isFirst = true; DataTableWriter writer = new DataTableWriter(output); _Iterate(row => { var row2 = mutator(row); if (row2 != null) { if (isFirst) { int index = 0; foreach (var item in row2) { var column = Columns[index]; if (item == null) { writer.AddColumn(column.Name, ColumnType.Null, column.IsTarget); } else { var type = item.GetType(); ColumnType columnType; if (type == typeof(string)) { columnType = ColumnType.String; } else if (type == typeof(double)) { columnType = ColumnType.Double; } else if (type == typeof(float)) { columnType = ColumnType.Float; } else if (type == typeof(long)) { columnType = ColumnType.Long; } else if (type == typeof(int)) { columnType = ColumnType.Int; } else if (type == typeof(byte)) { columnType = ColumnType.Byte; } else if (type == typeof(DateTime)) { columnType = ColumnType.Date; } else if (type == typeof(bool)) { columnType = ColumnType.Boolean; } else { throw new FormatException(); } writer.AddColumn(column.Name, columnType, column.IsTarget); } ++index; } isFirst = false; } writer.AddRow(row2); } return(true); }); return(writer.GetDataTable()); }