Пример #1
0
        public IDataTable Normalise(DataTableNormalisation normalisationModel, Stream output = null)
        {
            var normaliser = new DataTableNormaliser(this, normalisationModel.Type, output, normalisationModel);

            Process(normaliser);
            return(normaliser.GetDataTable());
        }
Пример #2
0
        public DataTableNormaliser(IDataTable dataTable, NormalisationType type, Stream output, IEnumerable <int> columnIndices)
        {
            _writer = new DataTableWriter(dataTable.Columns, output);

            var analysis       = dataTable.GetAnalysis();
            var columnNormList = new List <DataTableNormalisation.Column>();
            var columns        = analysis.ColumnInfo.AsQueryable();

            if (columnIndices != null)
            {
                var columnSet = new HashSet <int>(columnIndices);
                columns = columns.Where(ci => columnSet.Contains(ci.ColumnIndex));
            }

            var vectorColumns = new List <(int ColumnIndex, int Size)>();

            foreach (var columnInfo in columns)
            {
                var column = dataTable.Columns[columnInfo.ColumnIndex];
                if (column.IsContinuous && columnInfo is INumericColumnInfo numericInfo)
                {
                    var columnNorm = _GetColumn(type, numericInfo, columnInfo.ColumnIndex, column.Type);
                    if (columnNorm != null)
                    {
                        columnNormList.Add(columnNorm);
                    }
                }
                else if (column.Type == ColumnType.Vector && columnInfo is IDimensionsColumnInfo vector && vector.XDimension.HasValue && vector.XDimension.Value > 0)
                {
                    vectorColumns.Add((column.Index, vector.XDimension.Value));
                }
            }

            DataTableNormalisation.VectorColumn[] vectorColumnNormList = null;
            if (vectorColumns.Any())
            {
                var collectors = vectorColumns.Select(vc => Enumerable.Range(0, vc.Size).Select(i => new NumberCollector(i)).ToList()).ToList();
                dataTable.ForEach(row => {
                    foreach (var column in vectorColumns.Zip(collectors, (vc, c) => (vc, c)))
                    {
                        var vectorAsRow = row.GetField <FloatVector>(column.Item1.ColumnIndex).AsRow();
                        foreach (var collector in column.Item2)
                        {
                            collector.Process(vectorAsRow);
                        }
                    }
                });
                vectorColumnNormList = collectors.Select((c, i) => new DataTableNormalisation.VectorColumn {
                    ColumnIndex   = vectorColumns[i].ColumnIndex,
                    VectorColumns = c.Select((nc, j) => _GetColumn(type, nc, j, ColumnType.Float)).ToArray()
                }).ToArray();
            }

            _normalisationModel = new DataTableNormalisation {
                Type = type,
                ColumnNormalisation       = columnNormList.ToArray(),
                VectorColumnNormalisation = vectorColumnNormList
            };
        }
Пример #3
0
        public DataTableNormaliser(IDataTable dataTable, NormalisationType type, Stream output = null, DataTableNormalisation model = null)
        {
            _table  = dataTable;
            _writer = new DataTableWriter(dataTable.Columns, output);

            if (model != null)
            {
                _normalisationModel = model;
            }
            else
            {
                var analysis       = dataTable.GetAnalysis();
                var columnNormList = new List <DataTableNormalisation.Column>();
                foreach (var columnInfo in analysis.ColumnInfo)
                {
                    var column = dataTable.Columns[columnInfo.ColumnIndex];
                    if (column.IsContinuous)
                    {
                        var numericInfo = columnInfo as INumericColumnInfo;
                        if (numericInfo != null)
                        {
                            if (type == NormalisationType.Standard && !numericInfo.StdDev.HasValue)
                            {
                                continue;
                            }

                            DataTableNormalisation.Column columnNorm;
                            if (type == NormalisationType.Standard)
                            {
                                columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.StdDev.Value, numericInfo.Mean);
                            }
                            else if (type == NormalisationType.Euclidean)
                            {
                                columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L2Norm);
                            }
                            else if (type == NormalisationType.Manhattan)
                            {
                                columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L1Norm);
                            }
                            else if (type == NormalisationType.FeatureScale)
                            {
                                columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.Max - numericInfo.Min, numericInfo.Min);
                            }
                            else
                            {
                                throw new NotImplementedException();
                            }
                            columnNormList.Add(columnNorm);
                        }
                    }
                }
                _normalisationModel = new DataTableNormalisation {
                    Type = type,
                    ColumnNormalisation = columnNormList.ToArray()
                };
            }
        }
Пример #4
0
        public DataTableNormaliser(IDataTable dataTable, NormalisationType type, Stream output, IEnumerable <int> columnIndices)
        {
            _writer = new DataTableWriter(dataTable.Columns, output);

            var analysis       = dataTable.GetAnalysis();
            var columnNormList = new List <DataTableNormalisation.Column>();
            var columns        = analysis.ColumnInfo.AsQueryable();

            if (columnIndices != null)
            {
                var columnSet = new HashSet <int>(columnIndices);
                columns = columns.Where(ci => columnSet.Contains(ci.ColumnIndex));
            }

            foreach (var columnInfo in columns)
            {
                var column = dataTable.Columns[columnInfo.ColumnIndex];
                if (column.IsContinuous)
                {
                    if (columnInfo is INumericColumnInfo numericInfo)
                    {
                        if (type == NormalisationType.Standard && !numericInfo.StdDev.HasValue)
                        {
                            continue;
                        }

                        DataTableNormalisation.Column columnNorm;
                        if (type == NormalisationType.Standard)
                        {
                            columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.StdDev ?? 1, numericInfo.Mean);
                        }
                        else if (type == NormalisationType.Euclidean)
                        {
                            columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L2Norm);
                        }
                        else if (type == NormalisationType.Manhattan)
                        {
                            columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L1Norm);
                        }
                        else if (type == NormalisationType.FeatureScale)
                        {
                            columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.Max - numericInfo.Min, numericInfo.Min);
                        }
                        else
                        {
                            throw new NotImplementedException();
                        }
                        columnNormList.Add(columnNorm);
                    }
                }
            }
            _normalisationModel = new DataTableNormalisation {
                Type = type,
                ColumnNormalisation = columnNormList.ToArray()
            };
        }
Пример #5
0
 public DataTableNormaliser(IDataTable dataTable, Stream output, DataTableNormalisation model)
 {
     _writer             = new DataTableWriter(dataTable.Columns, output);
     _normalisationModel = model;
 }