public IDataTable ConvertToNumeric(IDataTableVectoriser vectoriser = null, bool useTargetColumnIndex = true, Stream output = null) { var writer = new DataTableWriter(output); vectoriser = vectoriser ?? GetVectoriser(useTargetColumnIndex); // add the numeric columns foreach (var name in vectoriser.ColumnNames) { writer.AddColumn(name, ColumnType.Float); } // add the classification label column var classColumnIndex = TargetColumnIndex; if (useTargetColumnIndex) { var classColumn = _column[classColumnIndex]; writer.AddColumn(classColumn.Name, ColumnType.String, true); } // vectorise each row _Iterate((row, i) => { var rowData = vectoriser.GetInput(row).Data.AsEnumerable().Cast <object>(); if (useTargetColumnIndex) { rowData = rowData.Concat(new object[] { row.GetField <string>(classColumnIndex) }); } writer.AddRow(new DataTableRow(this, rowData.ToArray(), _rowConverter)); return(true); }); return(writer.GetDataTable()); }
public IDataTable ConvertToNumeric(IDataTableVectoriser vectoriser = null, Stream output = null) { var writer = new DataTableWriter(output); vectoriser = vectoriser ?? GetVectoriser(); foreach (var name in vectoriser.ColumnNames) { writer.AddColumn(name, ColumnType.Float); } var classColumnIndex = TargetColumnIndex; var classColumn = _column[classColumnIndex]; writer.AddColumn(classColumn.Name, ColumnType.String, true); _Iterate(row => { var data = vectoriser.GetInput(row).AsEnumerable().Cast <object>().Concat(new object[] { row.GetField <string>(classColumnIndex) }); writer.AddRow(data); return(true); }); return(writer.GetDataTable()); }
private DataTableWriter _DetermineHeaders(Stream stream, List <string> lines, bool checkForHeader, ref bool hasHeader) { // see if there is a header (all strings) var firstLineTypes = _Parse(lines.First()).ToList(); if (checkForHeader) { hasHeader = firstLineTypes.All(str => _DetermineType(str) == ColumnType.String); } // get the list of header names var headerNames = new List <string>(); int index = 0; foreach (var item in firstLineTypes) { headerNames.Add(hasHeader ? item : "_col" + index++); } // get the list of column types IReadOnlyList <ColumnType> columnTypes; if (_parseAsText) { columnTypes = firstLineTypes.Select(c => ColumnType.String).ToList(); } else { columnTypes = lines .Skip(hasHeader ? 1 : 0) .SelectMany(line => _Parse(line).Select((str, pos) => Tuple.Create(str, pos))) .GroupBy(l => l.Item2, l => _DetermineType(l.Item1)) .OrderBy(g => g.Key) .Select(_GetColumnType) .ToList() ; } // add the columns var ret = new DataTableWriter(stream); foreach (var column in headerNames.Zip(columnTypes, (name, type) => Tuple.Create(name, type))) { ret.AddColumn(column.Item1, column.Item2); } return(ret); }
public IDataTable Project(Func <IRow, IReadOnlyList <object> > mutator, Stream output = null) { var isFirst = true; DataTableWriter writer = new DataTableWriter(output); _Iterate((row, i) => { var mutatedRow = mutator(row); if (mutatedRow != null) { if (isFirst) { int index = 0; foreach (var item in mutatedRow) { var column = Columns[index]; if (item == null) { writer.AddColumn(column.Name, ColumnType.Null, column.IsTarget); } else { var type = item.GetType(); ColumnType columnType; if (type == typeof(string)) { columnType = ColumnType.String; } else if (type == typeof(double)) { columnType = ColumnType.Double; } else if (type == typeof(float)) { columnType = ColumnType.Float; } else if (type == typeof(long)) { columnType = ColumnType.Long; } else if (type == typeof(int)) { columnType = ColumnType.Int; } else if (type == typeof(byte)) { columnType = ColumnType.Byte; } else if (type == typeof(DateTime)) { columnType = ColumnType.Date; } else if (type == typeof(bool)) { columnType = ColumnType.Boolean; } else if (type == typeof(FloatVector)) { columnType = ColumnType.Vector; } else if (type == typeof(FloatMatrix)) { columnType = ColumnType.Matrix; } else if (type == typeof(FloatTensor)) { columnType = ColumnType.Tensor; } else if (type == typeof(WeightedIndexList)) { columnType = ColumnType.WeightedIndexList; } else if (type == typeof(IndexList)) { columnType = ColumnType.IndexList; } else { throw new FormatException(); } writer.AddColumn(column.Name, columnType, column.IsTarget); } ++index; } isFirst = false; } writer.AddRow(new DataTableRow(this, mutatedRow, _rowConverter)); } return(true); }); return(writer.GetDataTable()); }
public IDataTable Project(Func <IRow, IReadOnlyList <object> > mutator, Stream output = null) { var isFirst = true; DataTableWriter writer = new DataTableWriter(output); _Iterate(row => { var row2 = mutator(row); if (row2 != null) { if (isFirst) { int index = 0; foreach (var item in row2) { var column = Columns[index]; if (item == null) { writer.AddColumn(column.Name, ColumnType.Null, column.IsTarget); } else { var type = item.GetType(); ColumnType columnType; if (type == typeof(string)) { columnType = ColumnType.String; } else if (type == typeof(double)) { columnType = ColumnType.Double; } else if (type == typeof(float)) { columnType = ColumnType.Float; } else if (type == typeof(long)) { columnType = ColumnType.Long; } else if (type == typeof(int)) { columnType = ColumnType.Int; } else if (type == typeof(byte)) { columnType = ColumnType.Byte; } else if (type == typeof(DateTime)) { columnType = ColumnType.Date; } else if (type == typeof(bool)) { columnType = ColumnType.Boolean; } else { throw new FormatException(); } writer.AddColumn(column.Name, columnType, column.IsTarget); } ++index; } isFirst = false; } writer.AddRow(row2); } return(true); }); return(writer.GetDataTable()); }