Пример #1
0
        public IDataTable ConvertToNumeric(IDataTableVectoriser vectoriser = null, bool useTargetColumnIndex = true, Stream output = null)
        {
            var writer = new DataTableWriter(output);

            vectoriser = vectoriser ?? GetVectoriser(useTargetColumnIndex);

            // add the numeric columns
            foreach (var name in vectoriser.ColumnNames)
            {
                writer.AddColumn(name, ColumnType.Float);
            }

            // add the classification label column
            var classColumnIndex = TargetColumnIndex;

            if (useTargetColumnIndex)
            {
                var classColumn = _column[classColumnIndex];
                writer.AddColumn(classColumn.Name, ColumnType.String, true);
            }

            // vectorise each row
            _Iterate((row, i) => {
                var rowData = vectoriser.GetInput(row).Data.AsEnumerable().Cast <object>();
                if (useTargetColumnIndex)
                {
                    rowData = rowData.Concat(new object[] { row.GetField <string>(classColumnIndex) });
                }

                writer.AddRow(new DataTableRow(this, rowData.ToArray(), _rowConverter));
                return(true);
            });
            return(writer.GetDataTable());
        }
Пример #2
0
        private void _Add(string line, DataTableWriter writer)
        {
            var convertedData = writer.Columns.Zip(_Parse(line), (c, str) => _Convert(ref c._type, str)).
                                ToList();

            writer.AddRow(convertedData);
        }
Пример #3
0
        public IDataTable CopyWithRows(IEnumerable <int> rowIndex, Stream output = null)
        {
            var writer = new DataTableWriter(_column, output);

            foreach (var row in GetRows(rowIndex))
            {
                writer.AddRow(row);
            }
            return(writer.GetDataTable());
        }
Пример #4
0
        public IDataTable Zip(IDataTable dataTable, Stream output = null)
        {
            var writer = new DataTableWriter(_column.Concat(dataTable.Columns), output);

            _Iterate((row, i) =>
            {
                if (i >= dataTable.RowCount)
                {
                    return(false);
                }
                writer.AddRow(row.Data.Concat(dataTable.GetRow(i).Data).ToList());
                return(true);
            });
            return(writer.GetDataTable());
        }
Пример #5
0
        public IDataTable ConvertToNumeric(IDataTableVectoriser vectoriser = null, Stream output = null)
        {
            var writer = new DataTableWriter(output);

            vectoriser = vectoriser ?? GetVectoriser();
            foreach (var name in vectoriser.ColumnNames)
            {
                writer.AddColumn(name, ColumnType.Float);
            }
            var classColumnIndex = TargetColumnIndex;
            var classColumn      = _column[classColumnIndex];

            writer.AddColumn(classColumn.Name, ColumnType.String, true);

            _Iterate(row => {
                var data = vectoriser.GetInput(row).AsEnumerable().Cast <object>().Concat(new object[] { row.GetField <string>(classColumnIndex) });
                writer.AddRow(data);
                return(true);
            });
            return(writer.GetDataTable());
        }
Пример #6
0
        public IDataTable Project(Func <IRow, IReadOnlyList <object> > mutator, Stream output = null)
        {
            var             isFirst = true;
            DataTableWriter writer  = new DataTableWriter(output);

            _Iterate((row, i) => {
                var mutatedRow = mutator(row);
                if (mutatedRow != null)
                {
                    if (isFirst)
                    {
                        int index = 0;
                        foreach (var item in mutatedRow)
                        {
                            var column = Columns[index];
                            if (item == null)
                            {
                                writer.AddColumn(column.Name, ColumnType.Null, column.IsTarget);
                            }
                            else
                            {
                                var type = item.GetType();
                                ColumnType columnType;
                                if (type == typeof(string))
                                {
                                    columnType = ColumnType.String;
                                }
                                else if (type == typeof(double))
                                {
                                    columnType = ColumnType.Double;
                                }
                                else if (type == typeof(float))
                                {
                                    columnType = ColumnType.Float;
                                }
                                else if (type == typeof(long))
                                {
                                    columnType = ColumnType.Long;
                                }
                                else if (type == typeof(int))
                                {
                                    columnType = ColumnType.Int;
                                }
                                else if (type == typeof(byte))
                                {
                                    columnType = ColumnType.Byte;
                                }
                                else if (type == typeof(DateTime))
                                {
                                    columnType = ColumnType.Date;
                                }
                                else if (type == typeof(bool))
                                {
                                    columnType = ColumnType.Boolean;
                                }
                                else if (type == typeof(FloatVector))
                                {
                                    columnType = ColumnType.Vector;
                                }
                                else if (type == typeof(FloatMatrix))
                                {
                                    columnType = ColumnType.Matrix;
                                }
                                else if (type == typeof(FloatTensor))
                                {
                                    columnType = ColumnType.Tensor;
                                }
                                else if (type == typeof(WeightedIndexList))
                                {
                                    columnType = ColumnType.WeightedIndexList;
                                }
                                else if (type == typeof(IndexList))
                                {
                                    columnType = ColumnType.IndexList;
                                }
                                else
                                {
                                    throw new FormatException();
                                }

                                writer.AddColumn(column.Name, columnType, column.IsTarget);
                            }
                            ++index;
                        }
                        isFirst = false;
                    }
                    writer.AddRow(new DataTableRow(this, mutatedRow, _rowConverter));
                }
                return(true);
            });
            return(writer.GetDataTable());
        }
Пример #7
0
        public IDataTable Project(Func <IRow, IReadOnlyList <object> > mutator, Stream output = null)
        {
            var             isFirst = true;
            DataTableWriter writer  = new DataTableWriter(output);

            _Iterate(row => {
                var row2 = mutator(row);
                if (row2 != null)
                {
                    if (isFirst)
                    {
                        int index = 0;
                        foreach (var item in row2)
                        {
                            var column = Columns[index];
                            if (item == null)
                            {
                                writer.AddColumn(column.Name, ColumnType.Null, column.IsTarget);
                            }
                            else
                            {
                                var type = item.GetType();
                                ColumnType columnType;
                                if (type == typeof(string))
                                {
                                    columnType = ColumnType.String;
                                }
                                else if (type == typeof(double))
                                {
                                    columnType = ColumnType.Double;
                                }
                                else if (type == typeof(float))
                                {
                                    columnType = ColumnType.Float;
                                }
                                else if (type == typeof(long))
                                {
                                    columnType = ColumnType.Long;
                                }
                                else if (type == typeof(int))
                                {
                                    columnType = ColumnType.Int;
                                }
                                else if (type == typeof(byte))
                                {
                                    columnType = ColumnType.Byte;
                                }
                                else if (type == typeof(DateTime))
                                {
                                    columnType = ColumnType.Date;
                                }
                                else if (type == typeof(bool))
                                {
                                    columnType = ColumnType.Boolean;
                                }
                                else
                                {
                                    throw new FormatException();
                                }
                                writer.AddColumn(column.Name, columnType, column.IsTarget);
                            }
                            ++index;
                        }
                        isFirst = false;
                    }
                    writer.AddRow(row2);
                }
                return(true);
            });
            return(writer.GetDataTable());
        }