Пример #1
0
        private static (DataFrame edf, float[] fValues) ScaleColumn(DataFrame dff, string colName, ColumnTransformer transformer, bool transformedColumnsOnly)
        {
            var   newColName = colName + "_scaled";
            var   df         = dff[dff.Columns.ToArray()];
            var   s          = Series.FromDataFrame(df, colName);
            float param1;
            float param2;

            if (transformer == ColumnTransformer.MinMax)
            {
                var minObj = s.Aggregate <float>(Aggregation.Min);
                param1 = Convert.ToSingle(minObj);
                var maxObj = s.Aggregate <float>(Aggregation.Max);
                param2 = Convert.ToSingle(maxObj);

                df.AddCalculatedColumn(newColName, (IDictionary <string, object> row, int i) =>
                {
                    var val = Convert.ToDouble(row[colName]);
                    return((val - param1) / (param2 - param1));
                });
            }
            else if (transformer == ColumnTransformer.MinMax ||
                     transformer == ColumnTransformer.Standardizer)
            {
                var avgObj = s.Aggregate <float>(Aggregation.Avg);
                param1 = Convert.ToSingle(avgObj);
                var stdObj = s.Aggregate <float>(Aggregation.Std);
                param2 = Convert.ToSingle(stdObj);

                df.AddCalculatedColumn(colName + "_scaled", (IDictionary <string, object> row, int i) =>
                {
                    var val = Convert.ToDouble(row[colName]);
                    return((val - param1) / (param2));
                });
            }
            else
            {
                throw new NotSupportedException("Column transformation is not supported.");
            }

            if (transformedColumnsOnly)
            {
                var ddf = df.Create((newColName, null));
                return(ddf, new float[] { param1, param2 });
            }
            else
            {
                return(df, new float[] { param1, param2 });
            }
        }
Пример #2
0
        public static (DataFrame df, float[] scaledValues, string[] labels) TransformColumn(this DataFrame df, string colName, ColumnTransformer transformer, bool transformedColumnsOnly = false)
        {
            switch (transformer)
            {
            case ColumnTransformer.None:
                return(df, null, null);

            case ColumnTransformer.Binary1:
                (var edf, var cValues) = BinaryEncoding(df, colName, transformedColumnsOnly);
                return(edf, null, cValues);

            case ColumnTransformer.Binary2:
                (edf, cValues) = BinaryEncoding2(df, colName, transformedColumnsOnly);
                return(edf, null, cValues);

            case ColumnTransformer.Ordinal:
                (edf, cValues) = OrdinalEncoding(df, colName, transformedColumnsOnly);
                return(edf, null, cValues);

            case ColumnTransformer.OneHot:
                (edf, cValues) = OneHotEncodeColumn(df, colName, transformedColumnsOnly);
                return(edf, null, cValues);

            case ColumnTransformer.Dummy:
                (edf, cValues) = DummyEncodeColumn(df, colName, transformedColumnsOnly);
                return(edf, null, cValues);

            case ColumnTransformer.MinMax:
            case ColumnTransformer.Standardizer:
                (var tdf, float[] fValues) = ScaleColumn(df, colName, transformer, transformedColumnsOnly);
                return(tdf, fValues, null);

            default:
                throw new NotSupportedException("Data normalization is not supported.");
            }

            throw new NotImplementedException();
        }