Ejemplo n.º 1
0
        public DataTableNormaliser(IDataTable dataTable, NormalisationType type, Stream output = null, DataTableNormalisation model = null)
        {
            _table  = dataTable;
            _writer = new DataTableWriter(dataTable.Columns, output);

            if (model != null)
            {
                _normalisationModel = model;
            }
            else
            {
                var analysis       = dataTable.GetAnalysis();
                var columnNormList = new List <DataTableNormalisation.Column>();
                foreach (var columnInfo in analysis.ColumnInfo)
                {
                    var column = dataTable.Columns[columnInfo.ColumnIndex];
                    if (column.IsContinuous)
                    {
                        var numericInfo = columnInfo as INumericColumnInfo;
                        if (numericInfo != null)
                        {
                            if (type == NormalisationType.Standard && !numericInfo.StdDev.HasValue)
                            {
                                continue;
                            }

                            DataTableNormalisation.Column columnNorm;
                            if (type == NormalisationType.Standard)
                            {
                                columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.StdDev.Value, numericInfo.Mean);
                            }
                            else if (type == NormalisationType.Euclidean)
                            {
                                columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L2Norm);
                            }
                            else if (type == NormalisationType.Manhattan)
                            {
                                columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L1Norm);
                            }
                            else if (type == NormalisationType.FeatureScale)
                            {
                                columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.Max - numericInfo.Min, numericInfo.Min);
                            }
                            else
                            {
                                throw new NotImplementedException();
                            }
                            columnNormList.Add(columnNorm);
                        }
                    }
                }
                _normalisationModel = new DataTableNormalisation {
                    Type = type,
                    ColumnNormalisation = columnNormList.ToArray()
                };
            }
        }
Ejemplo n.º 2
0
        public DataTableNormaliser(IDataTable dataTable, NormalisationType type, Stream output, IEnumerable <int> columnIndices)
        {
            _writer = new DataTableWriter(dataTable.Columns, output);

            var analysis       = dataTable.GetAnalysis();
            var columnNormList = new List <DataTableNormalisation.Column>();
            var columns        = analysis.ColumnInfo.AsQueryable();

            if (columnIndices != null)
            {
                var columnSet = new HashSet <int>(columnIndices);
                columns = columns.Where(ci => columnSet.Contains(ci.ColumnIndex));
            }

            foreach (var columnInfo in columns)
            {
                var column = dataTable.Columns[columnInfo.ColumnIndex];
                if (column.IsContinuous)
                {
                    if (columnInfo is INumericColumnInfo numericInfo)
                    {
                        if (type == NormalisationType.Standard && !numericInfo.StdDev.HasValue)
                        {
                            continue;
                        }

                        DataTableNormalisation.Column columnNorm;
                        if (type == NormalisationType.Standard)
                        {
                            columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.StdDev ?? 1, numericInfo.Mean);
                        }
                        else if (type == NormalisationType.Euclidean)
                        {
                            columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L2Norm);
                        }
                        else if (type == NormalisationType.Manhattan)
                        {
                            columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L1Norm);
                        }
                        else if (type == NormalisationType.FeatureScale)
                        {
                            columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.Max - numericInfo.Min, numericInfo.Min);
                        }
                        else
                        {
                            throw new NotImplementedException();
                        }
                        columnNormList.Add(columnNorm);
                    }
                }
            }
            _normalisationModel = new DataTableNormalisation {
                Type = type,
                ColumnNormalisation = columnNormList.ToArray()
            };
        }