예제 #1
0
        // Normal constructor.
        internal TimeSeriesImputerTransformer(IHostEnvironment host, TimeSeriesImputerEstimator.Options options, IDataView input)
        {
            _host               = host.Register(nameof(TimeSeriesImputerTransformer));
            _timeSeriesColumn   = options.TimeSeriesColumn;
            _grainColumns       = options.GrainColumns;
            _imputeMode         = options.ImputeMode;
            _suppressTypeErrors = options.SupressTypeErrors;

            IEnumerable <string> tempDataColumns;

            if (options.FilterMode == TimeSeriesImputerEstimator.FilterMode.Exclude)
            {
                tempDataColumns = input.Schema.Where(x => !options.FilterColumns.Contains(x.Name)).Select(x => x.Name);
            }
            else if (options.FilterMode == TimeSeriesImputerEstimator.FilterMode.Include)
            {
                tempDataColumns = input.Schema.Where(x => options.FilterColumns.Contains(x.Name)).Select(x => x.Name);
            }
            else
            {
                tempDataColumns = input.Schema.Select(x => x.Name);
            }

            // Time series and Grain columns should never be included in the data columns
            _dataColumns = tempDataColumns.Where(x => x != _timeSeriesColumn && !_grainColumns.Contains(x)).ToArray();

            // 1 is for the time series column. Make one array in the correct order of all the columns.
            // Order is Timeseries column, All grain columns, All data columns.
            _allColumnNames    = new string[1 + _grainColumns.Length + _dataColumns.Length];
            _allColumnNames[0] = _timeSeriesColumn;
            Array.Copy(_grainColumns, 0, _allColumnNames, 1, _grainColumns.Length);
            Array.Copy(_dataColumns, 0, _allColumnNames, 1 + _grainColumns.Length, _dataColumns.Length);

            TransformerHandle = CreateTransformerFromEstimator(input);
        }
예제 #2
0
        // Factory method for SignatureLoadModel.
        internal TimeSeriesImputerTransformer(IHostEnvironment host, ModelLoadContext ctx)
        {
            _host = host.Register(nameof(TimeSeriesImputerTransformer));
            _host.Check(!CommonExtensions.OsIsCentOS7(), "CentOS7 is not supported");

            // *** Binary format ***
            // name of time series column
            // length of grain column array
            // all column names in grain column array
            // length of filter column array
            // all column names in filter column array
            // byte value of filter mode
            // byte value of impute mode
            // length of C++ state array
            // C++ byte state array

            _timeSeriesColumn = ctx.Reader.ReadString();

            _grainColumns = new string[ctx.Reader.ReadInt32()];
            for (int i = 0; i < _grainColumns.Length; i++)
            {
                _grainColumns[i] = ctx.Reader.ReadString();
            }

            _dataColumns = new string[ctx.Reader.ReadInt32()];
            for (int i = 0; i < _dataColumns.Length; i++)
            {
                _dataColumns[i] = ctx.Reader.ReadString();
            }

            _imputeMode = (TimeSeriesImputerEstimator.ImputationStrategy)ctx.Reader.ReadByte();

            _allColumnNames    = new string[1 + _grainColumns.Length + _dataColumns.Length];
            _allColumnNames[0] = _timeSeriesColumn;
            Array.Copy(_grainColumns, 0, _allColumnNames, 1, _grainColumns.Length);
            Array.Copy(_dataColumns, 0, _allColumnNames, 1 + _grainColumns.Length, _dataColumns.Length);

            var nativeState = ctx.Reader.ReadByteArray();

            TransformerHandle = CreateTransformerFromSavedData(nativeState);
        }
예제 #3
0
 /// <summary>
 /// Create a <see cref="TimeSeriesImputerEstimator"/>, Imputes missing rows and column data per grain. Applies the imputation strategy on
 /// a filtered list of columns in the IDataView. Columns that are excluded will have the default value for that data type used when a row
 /// is imputed. Currently only float/double/string columns are supported for imputation strategies, and an empty string is considered "missing" for the
 /// purpose of this estimator.
 /// </summary>
 /// <param name="catalog">The transform catalog.</param>
 /// <param name="timeSeriesColumn">Column representing the time series. Should be of type <see cref="long"/> or <see cref="System.DateTime"/></param>
 /// <param name="grainColumns">List of columns to use as grains</param>
 /// <param name="filterColumns">List of columns to filter. If <paramref name="filterMode"/> is <see cref="TimeSeriesImputerEstimator.FilterMode.Exclude"/> than columns in the list will be ignored.
 /// If <paramref name="filterMode"/> is <see cref="TimeSeriesImputerEstimator.FilterMode.Include"/> than values in the list are the only columns imputed.</param>
 /// <param name="filterMode">Whether the list <paramref name="filterColumns"/> should include or exclude those columns.</param>
 /// <param name="imputeMode">Mode of imputation for missing values in column. If not passed defaults to forward fill</param>
 /// <param name="suppressTypeErrors">Suppress the errors that would occur if a column and impute mode are incompatible. If true, will skip the column and use the default value. If false, will stop and throw an error.</param>
 public static TimeSeriesImputerEstimator ReplaceMissingTimeSeriesValues(this TransformsCatalog catalog, string timeSeriesColumn,
                                                                         string[] grainColumns, string[] filterColumns, TimeSeriesImputerEstimator.FilterMode filterMode = TimeSeriesImputerEstimator.FilterMode.Exclude,
                                                                         TimeSeriesImputerEstimator.ImputationStrategy imputeMode = TimeSeriesImputerEstimator.ImputationStrategy.ForwardFill,
                                                                         bool suppressTypeErrors = false)
 => new TimeSeriesImputerEstimator(CatalogUtils.GetEnvironment(catalog), timeSeriesColumn, grainColumns, filterColumns, filterMode, imputeMode, suppressTypeErrors);
예제 #4
0
 /// <summary>
 /// Create a <see cref="TimeSeriesImputerEstimator"/>, Imputes missing rows and column data per grain. Operates on all columns in the IDataView.
 /// Currently only float/double/string columns are supported for imputation strategies, and an empty string is considered "missing" for the
 /// purpose of this estimator. Other column types will have the default value placed if a row is imputed.
 /// </summary>
 /// <param name="catalog">The transform catalog.</param>
 /// <param name="timeSeriesColumn">Column representing the time series. Should be of type <see cref="long"/> or <see cref="System.DateTime"/></param>
 /// <param name="grainColumns">List of columns to use as grains</param>
 /// <param name="imputeMode">Mode of imputation for missing values in column. If not passed defaults to forward fill</param>
 public static TimeSeriesImputerEstimator ReplaceMissingTimeSeriesValues(this TransformsCatalog catalog, string timeSeriesColumn, string[] grainColumns,
                                                                         TimeSeriesImputerEstimator.ImputationStrategy imputeMode = TimeSeriesImputerEstimator.ImputationStrategy.ForwardFill)
 => new TimeSeriesImputerEstimator(CatalogUtils.GetEnvironment(catalog), timeSeriesColumn, grainColumns, null, TimeSeriesImputerEstimator.FilterMode.NoFilter, imputeMode, true);