コード例 #1
0
        /// <summary>
        /// A helper method to create <see cref="MissingValueHandlingTransformer"/> for public facing API.
        /// </summary>
        /// <param name="env">Host Environment.</param>
        /// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
        /// <param name="outputColumnName">Name of the output column.</param>
        /// <param name="inputColumnName">Name of the column to be transformed. If this is null '<paramref name="outputColumnName"/>' will be used.</param>
        /// <param name="replaceWith">The replacement method to utilize.</param>
        private static IDataView Create(IHostEnvironment env, IDataView input, string outputColumnName, string inputColumnName = null,
                                        ReplacementKind replaceWith = ReplacementKind.DefaultValue)
        {
            var args = new Options()
            {
                Columns = new[]
                {
                    new Column()
                    {
                        Name = outputColumnName, Source = inputColumnName ?? outputColumnName
                    }
                },
                ReplaceWith = replaceWith
            };

            return(Create(env, args, input));
        }
コード例 #2
0
 /// <summary>
 /// Convenience constructor for public facing API.
 /// </summary>
 /// <param name="env">Host Environment.</param>
 /// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
 /// <param name="name">Name of the output column.</param>
 /// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
 /// <param name="replacementKind">The replacement method to utilize.</param>
 public NAReplaceTransform(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replacementKind = ReplacementKind.DefaultValue)
     : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, ReplacementKind = replacementKind }, input)
 {
 }
コード例 #3
0
        /// <summary>
        /// Fill the repValues array with the correct replacement values based on the user-given replacement kinds.
        /// Vectors default to by-slot imputation unless otherwise specified, except for unknown sized vectors
        /// which force across-slot imputation.
        /// </summary>
        private void GetReplacementValues(Arguments args, out object[] repValues, out BitArray[] slotIsDefault)
        {
            repValues     = new object[Infos.Length];
            slotIsDefault = new BitArray[Infos.Length];

            ReplacementKind?[] imputationModes = new ReplacementKind?[Infos.Length];

            List <int> columnsToImpute = null;
            // REVIEW: Would like to get rid of the sourceColumns list but seems to be the best way to provide
            // the cursor with what columns to cursor through.
            HashSet <int> sourceColumns = null;

            for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
            {
                ReplacementKind kind = args.Column[iinfo].Kind ?? args.ReplacementKind;
                switch (kind)
                {
                case ReplacementKind.SpecifiedValue:
                    repValues[iinfo] = GetSpecifiedValue(args.Column[iinfo].ReplacementString, _types[iinfo], _isNAs[iinfo]);
                    break;

                case ReplacementKind.DefaultValue:
                    repValues[iinfo] = GetDefault(_types[iinfo]);
                    break;

                case ReplacementKind.Mean:
                case ReplacementKind.Min:
                case ReplacementKind.Max:
                    if (!_types[iinfo].ItemType.IsNumber && !_types[iinfo].ItemType.IsTimeSpan && !_types[iinfo].ItemType.IsDateTime)
                    {
                        throw Host.Except("Cannot perform mean imputations on non-numeric '{0}'", _types[iinfo].ItemType);
                    }
                    imputationModes[iinfo] = kind;
                    Utils.Add(ref columnsToImpute, iinfo);
                    Utils.Add(ref sourceColumns, Infos[iinfo].Source);
                    break;

                default:
                    Host.Assert(false);
                    throw Host.Except("Internal error, undefined ReplacementKind '{0}' assigned in NAReplaceTransform.", kind);
                }
            }

            // Exit if there are no columns needing a replacement value imputed.
            if (Utils.Size(columnsToImpute) == 0)
            {
                return;
            }

            // Impute values.
            using (var ch = Host.Start("Computing Statistics"))
                using (var cursor = Source.GetRowCursor(sourceColumns.Contains))
                {
                    StatAggregator[] statAggregators = new StatAggregator[columnsToImpute.Count];
                    for (int ii = 0; ii < columnsToImpute.Count; ii++)
                    {
                        int  iinfo  = columnsToImpute[ii];
                        bool bySlot = args.Column[ii].Slot ?? args.ImputeBySlot;
                        if (_types[iinfo].IsVector && !_types[iinfo].IsKnownSizeVector && bySlot)
                        {
                            ch.Warning("By-slot imputation can not be done on variable-length column");
                            bySlot = false;
                        }
                        statAggregators[ii] = CreateStatAggregator(ch, _types[iinfo], imputationModes[iinfo], bySlot,
                                                                   cursor, Infos[iinfo].Source);
                    }

                    while (cursor.MoveNext())
                    {
                        for (int ii = 0; ii < statAggregators.Length; ii++)
                        {
                            statAggregators[ii].ProcessRow();
                        }
                    }

                    for (int ii = 0; ii < statAggregators.Length; ii++)
                    {
                        repValues[columnsToImpute[ii]] = statAggregators[ii].GetStat();
                    }

                    ch.Done();
                }

            // Construct the slotIsDefault bit arrays.
            for (int ii = 0; ii < columnsToImpute.Count; ii++)
            {
                int slot = columnsToImpute[ii];
                if (repValues[slot] is Array)
                {
                    Func <ColumnType, int[], BitArray> func = ComputeDefaultSlots <int>;
                    var meth = func.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(_types[slot].ItemType.RawType);
                    slotIsDefault[slot] = (BitArray)meth.Invoke(this, new object[] { _types[slot], repValues[slot] });
                }
            }
        }
コード例 #4
0
        /// <summary>
        /// A helper method to create <see cref="MissingValueHandlingTransformer"/> for public facing API.
        /// </summary>
        /// <param name="env">Host Environment.</param>
        /// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
        /// <param name="name">Name of the output column.</param>
        /// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
        /// <param name="replaceWith">The replacement method to utilize.</param>
        public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replaceWith = ReplacementKind.DefaultValue)
        {
            var args = new Arguments()
            {
                Column = new[]
                {
                    new Column()
                    {
                        Source = source ?? name, Name = name
                    }
                },
                ReplaceWith = replaceWith
            };

            return(Create(env, args, input));
        }