// Normal constructor. internal TimeSeriesImputerTransformer(IHostEnvironment host, TimeSeriesImputerEstimator.Options options, IDataView input) { _host = host.Register(nameof(TimeSeriesImputerTransformer)); _timeSeriesColumn = options.TimeSeriesColumn; _grainColumns = options.GrainColumns; _imputeMode = options.ImputeMode; _suppressTypeErrors = options.SupressTypeErrors; IEnumerable <string> tempDataColumns; if (options.FilterMode == TimeSeriesImputerEstimator.FilterMode.Exclude) { tempDataColumns = input.Schema.Where(x => !options.FilterColumns.Contains(x.Name)).Select(x => x.Name); } else if (options.FilterMode == TimeSeriesImputerEstimator.FilterMode.Include) { tempDataColumns = input.Schema.Where(x => options.FilterColumns.Contains(x.Name)).Select(x => x.Name); } else { tempDataColumns = input.Schema.Select(x => x.Name); } // Time series and Grain columns should never be included in the data columns _dataColumns = tempDataColumns.Where(x => x != _timeSeriesColumn && !_grainColumns.Contains(x)).ToArray(); // 1 is for the time series column. Make one array in the correct order of all the columns. // Order is Timeseries column, All grain columns, All data columns. _allColumnNames = new string[1 + _grainColumns.Length + _dataColumns.Length]; _allColumnNames[0] = _timeSeriesColumn; Array.Copy(_grainColumns, 0, _allColumnNames, 1, _grainColumns.Length); Array.Copy(_dataColumns, 0, _allColumnNames, 1 + _grainColumns.Length, _dataColumns.Length); TransformerHandle = CreateTransformerFromEstimator(input); }
// Factory method for SignatureLoadModel. internal TimeSeriesImputerTransformer(IHostEnvironment host, ModelLoadContext ctx) { _host = host.Register(nameof(TimeSeriesImputerTransformer)); _host.Check(!CommonExtensions.OsIsCentOS7(), "CentOS7 is not supported"); // *** Binary format *** // name of time series column // length of grain column array // all column names in grain column array // length of filter column array // all column names in filter column array // byte value of filter mode // byte value of impute mode // length of C++ state array // C++ byte state array _timeSeriesColumn = ctx.Reader.ReadString(); _grainColumns = new string[ctx.Reader.ReadInt32()]; for (int i = 0; i < _grainColumns.Length; i++) { _grainColumns[i] = ctx.Reader.ReadString(); } _dataColumns = new string[ctx.Reader.ReadInt32()]; for (int i = 0; i < _dataColumns.Length; i++) { _dataColumns[i] = ctx.Reader.ReadString(); } _imputeMode = (TimeSeriesImputerEstimator.ImputationStrategy)ctx.Reader.ReadByte(); _allColumnNames = new string[1 + _grainColumns.Length + _dataColumns.Length]; _allColumnNames[0] = _timeSeriesColumn; Array.Copy(_grainColumns, 0, _allColumnNames, 1, _grainColumns.Length); Array.Copy(_dataColumns, 0, _allColumnNames, 1 + _grainColumns.Length, _dataColumns.Length); var nativeState = ctx.Reader.ReadByteArray(); TransformerHandle = CreateTransformerFromSavedData(nativeState); }
/// <summary> /// Create a <see cref="TimeSeriesImputerEstimator"/>, Imputes missing rows and column data per grain. Applies the imputation strategy on /// a filtered list of columns in the IDataView. Columns that are excluded will have the default value for that data type used when a row /// is imputed. Currently only float/double/string columns are supported for imputation strategies, and an empty string is considered "missing" for the /// purpose of this estimator. /// </summary> /// <param name="catalog">The transform catalog.</param> /// <param name="timeSeriesColumn">Column representing the time series. Should be of type <see cref="long"/> or <see cref="System.DateTime"/></param> /// <param name="grainColumns">List of columns to use as grains</param> /// <param name="filterColumns">List of columns to filter. If <paramref name="filterMode"/> is <see cref="TimeSeriesImputerEstimator.FilterMode.Exclude"/> than columns in the list will be ignored. /// If <paramref name="filterMode"/> is <see cref="TimeSeriesImputerEstimator.FilterMode.Include"/> than values in the list are the only columns imputed.</param> /// <param name="filterMode">Whether the list <paramref name="filterColumns"/> should include or exclude those columns.</param> /// <param name="imputeMode">Mode of imputation for missing values in column. If not passed defaults to forward fill</param> /// <param name="suppressTypeErrors">Suppress the errors that would occur if a column and impute mode are incompatible. If true, will skip the column and use the default value. If false, will stop and throw an error.</param> public static TimeSeriesImputerEstimator ReplaceMissingTimeSeriesValues(this TransformsCatalog catalog, string timeSeriesColumn, string[] grainColumns, string[] filterColumns, TimeSeriesImputerEstimator.FilterMode filterMode = TimeSeriesImputerEstimator.FilterMode.Exclude, TimeSeriesImputerEstimator.ImputationStrategy imputeMode = TimeSeriesImputerEstimator.ImputationStrategy.ForwardFill, bool suppressTypeErrors = false) => new TimeSeriesImputerEstimator(CatalogUtils.GetEnvironment(catalog), timeSeriesColumn, grainColumns, filterColumns, filterMode, imputeMode, suppressTypeErrors);
/// <summary> /// Create a <see cref="TimeSeriesImputerEstimator"/>, Imputes missing rows and column data per grain. Operates on all columns in the IDataView. /// Currently only float/double/string columns are supported for imputation strategies, and an empty string is considered "missing" for the /// purpose of this estimator. Other column types will have the default value placed if a row is imputed. /// </summary> /// <param name="catalog">The transform catalog.</param> /// <param name="timeSeriesColumn">Column representing the time series. Should be of type <see cref="long"/> or <see cref="System.DateTime"/></param> /// <param name="grainColumns">List of columns to use as grains</param> /// <param name="imputeMode">Mode of imputation for missing values in column. If not passed defaults to forward fill</param> public static TimeSeriesImputerEstimator ReplaceMissingTimeSeriesValues(this TransformsCatalog catalog, string timeSeriesColumn, string[] grainColumns, TimeSeriesImputerEstimator.ImputationStrategy imputeMode = TimeSeriesImputerEstimator.ImputationStrategy.ForwardFill) => new TimeSeriesImputerEstimator(CatalogUtils.GetEnvironment(catalog), timeSeriesColumn, grainColumns, null, TimeSeriesImputerEstimator.FilterMode.NoFilter, imputeMode, true);