Esempio n. 1
0
        public IDataTable ConvertToNumeric(IDataTableVectoriser vectoriser = null, bool useTargetColumnIndex = true, Stream output = null)
        {
            var writer = new DataTableWriter(output);

            vectoriser = vectoriser ?? GetVectoriser(useTargetColumnIndex);

            // add the numeric columns
            foreach (var name in vectoriser.ColumnNames)
            {
                writer.AddColumn(name, ColumnType.Float);
            }

            // add the classification label column
            var classColumnIndex = TargetColumnIndex;

            if (useTargetColumnIndex)
            {
                var classColumn = _column[classColumnIndex];
                writer.AddColumn(classColumn.Name, ColumnType.String, true);
            }

            // vectorise each row
            _Iterate((row, i) => {
                var rowData = vectoriser.GetInput(row).Data.AsEnumerable().Cast <object>();
                if (useTargetColumnIndex)
                {
                    rowData = rowData.Concat(new object[] { row.GetField <string>(classColumnIndex) });
                }

                writer.AddRow(new DataTableRow(this, rowData.ToArray(), _rowConverter));
                return(true);
            });
            return(writer.GetDataTable());
        }
Esempio n. 2
0
        public IDataTable ConvertToNumeric(IDataTableVectoriser vectoriser = null, Stream output = null)
        {
            var writer = new DataTableWriter(output);

            vectoriser = vectoriser ?? GetVectoriser();
            foreach (var name in vectoriser.ColumnNames)
            {
                writer.AddColumn(name, ColumnType.Float);
            }
            var classColumnIndex = TargetColumnIndex;
            var classColumn      = _column[classColumnIndex];

            writer.AddColumn(classColumn.Name, ColumnType.String, true);

            _Iterate(row => {
                var data = vectoriser.GetInput(row).AsEnumerable().Cast <object>().Concat(new object[] { row.GetField <string>(classColumnIndex) });
                writer.AddRow(data);
                return(true);
            });
            return(writer.GetDataTable());
        }
Esempio n. 3
0
        private DataTableWriter _DetermineHeaders(Stream stream, List <string> lines, bool checkForHeader, ref bool hasHeader)
        {
            // see if there is a header (all strings)
            var firstLineTypes = _Parse(lines.First()).ToList();

            if (checkForHeader)
            {
                hasHeader = firstLineTypes.All(str => _DetermineType(str) == ColumnType.String);
            }

            // get the list of header names
            var headerNames = new List <string>();
            int index       = 0;

            foreach (var item in firstLineTypes)
            {
                headerNames.Add(hasHeader ? item : "_col" + index++);
            }

            // get the list of column types
            IReadOnlyList <ColumnType> columnTypes;

            if (_parseAsText)
            {
                columnTypes = firstLineTypes.Select(c => ColumnType.String).ToList();
            }
            else
            {
                columnTypes = lines
                              .Skip(hasHeader ? 1 : 0)
                              .SelectMany(line => _Parse(line).Select((str, pos) => Tuple.Create(str, pos)))
                              .GroupBy(l => l.Item2, l => _DetermineType(l.Item1))
                              .OrderBy(g => g.Key)
                              .Select(_GetColumnType)
                              .ToList()
                ;
            }

            // add the columns
            var ret = new DataTableWriter(stream);

            foreach (var column in headerNames.Zip(columnTypes, (name, type) => Tuple.Create(name, type)))
            {
                ret.AddColumn(column.Item1, column.Item2);
            }

            return(ret);
        }
Esempio n. 4
0
        public IDataTable Project(Func <IRow, IReadOnlyList <object> > mutator, Stream output = null)
        {
            var             isFirst = true;
            DataTableWriter writer  = new DataTableWriter(output);

            _Iterate((row, i) => {
                var mutatedRow = mutator(row);
                if (mutatedRow != null)
                {
                    if (isFirst)
                    {
                        int index = 0;
                        foreach (var item in mutatedRow)
                        {
                            var column = Columns[index];
                            if (item == null)
                            {
                                writer.AddColumn(column.Name, ColumnType.Null, column.IsTarget);
                            }
                            else
                            {
                                var type = item.GetType();
                                ColumnType columnType;
                                if (type == typeof(string))
                                {
                                    columnType = ColumnType.String;
                                }
                                else if (type == typeof(double))
                                {
                                    columnType = ColumnType.Double;
                                }
                                else if (type == typeof(float))
                                {
                                    columnType = ColumnType.Float;
                                }
                                else if (type == typeof(long))
                                {
                                    columnType = ColumnType.Long;
                                }
                                else if (type == typeof(int))
                                {
                                    columnType = ColumnType.Int;
                                }
                                else if (type == typeof(byte))
                                {
                                    columnType = ColumnType.Byte;
                                }
                                else if (type == typeof(DateTime))
                                {
                                    columnType = ColumnType.Date;
                                }
                                else if (type == typeof(bool))
                                {
                                    columnType = ColumnType.Boolean;
                                }
                                else if (type == typeof(FloatVector))
                                {
                                    columnType = ColumnType.Vector;
                                }
                                else if (type == typeof(FloatMatrix))
                                {
                                    columnType = ColumnType.Matrix;
                                }
                                else if (type == typeof(FloatTensor))
                                {
                                    columnType = ColumnType.Tensor;
                                }
                                else if (type == typeof(WeightedIndexList))
                                {
                                    columnType = ColumnType.WeightedIndexList;
                                }
                                else if (type == typeof(IndexList))
                                {
                                    columnType = ColumnType.IndexList;
                                }
                                else
                                {
                                    throw new FormatException();
                                }

                                writer.AddColumn(column.Name, columnType, column.IsTarget);
                            }
                            ++index;
                        }
                        isFirst = false;
                    }
                    writer.AddRow(new DataTableRow(this, mutatedRow, _rowConverter));
                }
                return(true);
            });
            return(writer.GetDataTable());
        }
Esempio n. 5
0
        public IDataTable Project(Func <IRow, IReadOnlyList <object> > mutator, Stream output = null)
        {
            var             isFirst = true;
            DataTableWriter writer  = new DataTableWriter(output);

            _Iterate(row => {
                var row2 = mutator(row);
                if (row2 != null)
                {
                    if (isFirst)
                    {
                        int index = 0;
                        foreach (var item in row2)
                        {
                            var column = Columns[index];
                            if (item == null)
                            {
                                writer.AddColumn(column.Name, ColumnType.Null, column.IsTarget);
                            }
                            else
                            {
                                var type = item.GetType();
                                ColumnType columnType;
                                if (type == typeof(string))
                                {
                                    columnType = ColumnType.String;
                                }
                                else if (type == typeof(double))
                                {
                                    columnType = ColumnType.Double;
                                }
                                else if (type == typeof(float))
                                {
                                    columnType = ColumnType.Float;
                                }
                                else if (type == typeof(long))
                                {
                                    columnType = ColumnType.Long;
                                }
                                else if (type == typeof(int))
                                {
                                    columnType = ColumnType.Int;
                                }
                                else if (type == typeof(byte))
                                {
                                    columnType = ColumnType.Byte;
                                }
                                else if (type == typeof(DateTime))
                                {
                                    columnType = ColumnType.Date;
                                }
                                else if (type == typeof(bool))
                                {
                                    columnType = ColumnType.Boolean;
                                }
                                else
                                {
                                    throw new FormatException();
                                }
                                writer.AddColumn(column.Name, columnType, column.IsTarget);
                            }
                            ++index;
                        }
                        isFirst = false;
                    }
                    writer.AddRow(row2);
                }
                return(true);
            });
            return(writer.GetDataTable());
        }