public DataTableNormaliser(IDataTable dataTable, NormalisationType type, Stream output = null, DataTableNormalisation model = null) { _table = dataTable; _writer = new DataTableWriter(dataTable.Columns, output); if (model != null) { _normalisationModel = model; } else { var analysis = dataTable.GetAnalysis(); var columnNormList = new List <DataTableNormalisation.Column>(); foreach (var columnInfo in analysis.ColumnInfo) { var column = dataTable.Columns[columnInfo.ColumnIndex]; if (column.IsContinuous) { var numericInfo = columnInfo as INumericColumnInfo; if (numericInfo != null) { if (type == NormalisationType.Standard && !numericInfo.StdDev.HasValue) { continue; } DataTableNormalisation.Column columnNorm; if (type == NormalisationType.Standard) { columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.StdDev.Value, numericInfo.Mean); } else if (type == NormalisationType.Euclidean) { columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L2Norm); } else if (type == NormalisationType.Manhattan) { columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L1Norm); } else if (type == NormalisationType.FeatureScale) { columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.Max - numericInfo.Min, numericInfo.Min); } else { throw new NotImplementedException(); } columnNormList.Add(columnNorm); } } } _normalisationModel = new DataTableNormalisation { Type = type, ColumnNormalisation = columnNormList.ToArray() }; } }
public DataTableNormaliser(IDataTable dataTable, NormalisationType type, Stream output, IEnumerable <int> columnIndices) { _writer = new DataTableWriter(dataTable.Columns, output); var analysis = dataTable.GetAnalysis(); var columnNormList = new List <DataTableNormalisation.Column>(); var columns = analysis.ColumnInfo.AsQueryable(); if (columnIndices != null) { var columnSet = new HashSet <int>(columnIndices); columns = columns.Where(ci => columnSet.Contains(ci.ColumnIndex)); } foreach (var columnInfo in columns) { var column = dataTable.Columns[columnInfo.ColumnIndex]; if (column.IsContinuous) { if (columnInfo is INumericColumnInfo numericInfo) { if (type == NormalisationType.Standard && !numericInfo.StdDev.HasValue) { continue; } DataTableNormalisation.Column columnNorm; if (type == NormalisationType.Standard) { columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.StdDev ?? 1, numericInfo.Mean); } else if (type == NormalisationType.Euclidean) { columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L2Norm); } else if (type == NormalisationType.Manhattan) { columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.L1Norm); } else if (type == NormalisationType.FeatureScale) { columnNorm = new DataTableNormalisation.Column(columnInfo.ColumnIndex, column.Type, numericInfo.Max - numericInfo.Min, numericInfo.Min); } else { throw new NotImplementedException(); } columnNormList.Add(columnNorm); } } } _normalisationModel = new DataTableNormalisation { Type = type, ColumnNormalisation = columnNormList.ToArray() }; }