예제 #1
0
        Dictionary <int, ScalingFactor> GetScalingParameters()
        {
            var res = new Dictionary <int, ScalingFactor>();
            int index, index2;
            var thisSchema = Schema;
            var schema     = _input.Schema;

            for (int i = 0; i < _args.columns.Length; ++i)
            {
                index = SchemaHelper.GetColumnIndex(schema, _args.columns[i].Source);
                string name  = thisSchema[index].Name;
                var    stats = _scalingStat[name];

                if (_args.columns[i].Source == _args.columns[i].Name)
                {
                    res[index] = new ScalingFactor(_host, index, _args.scaling, stats);
                }
                else
                {
                    index2      = SchemaHelper.GetColumnIndex(Schema, _args.columns[i].Name);
                    res[index2] = new ScalingFactor(_host, index, _args.scaling, stats);
                }
            }
            return(res);
        }
예제 #2
0
        /// <summary>
        /// Drops some columns.
        /// Data is not copied.
        /// </summary>
        public DataFrameView Drop(IEnumerable <string> colNames)
        {
            var idrop = new HashSet <int>(colNames.Select(c => SchemaHelper.GetColumnIndex(Schema, c)));
            var ikeep = Enumerable.Range(0, ColumnCount).Where(c => !idrop.Contains(c));

            return(new DataFrameView(_src, _rows, ikeep));
        }
예제 #3
0
        /// <summary>
        /// The function walk through the data to compute the highest label.
        /// </summary>
        protected Tuple <int, int> MinMaxLabelOverDataSet(MultiToBinaryTransform tr, string label, out int nb)
        {
            int index = SchemaHelper.GetColumnIndex(tr.Schema, label);
            var ty    = tr.Schema[index].Type;

            switch (ty.RawKind())
            {
            case DataKind.Single:
                // float is 0 based
                var tf = MinMaxLabel <float>(tr, index);
                nb = (int)tf.Item2 + 1;
                return(new Tuple <int, int>((int)tf.Item1, (int)tf.Item2));

            case DataKind.SByte:
                // key is 1 based
                var tb = MinMaxLabel <byte>(tr, index);
                nb = tb.Item2;
                return(new Tuple <int, int>(1, (int)tb.Item2));

            case DataKind.UInt16:
                // key is 1 based
                var ts = MinMaxLabel <ushort>(tr, index);
                nb = ts.Item2;
                return(new Tuple <int, int>(1, (int)ts.Item2));

            case DataKind.UInt32:
                // key is 1 based
                var tu = MinMaxLabel <uint>(tr, index);
                nb = (int)tu.Item2;
                return(new Tuple <int, int>(1, (int)tu.Item2));

            default:
                throw Contracts.ExceptNotImpl("Type '{0}' not implemented", ty.RawKind());
            }
        }
        protected void DebugChecking0(IDataView viewI, string labName, bool oneO)
        {
            int index  = SchemaHelper.GetColumnIndex(viewI.Schema, labName);
            int nbRows = 0;

            using (var cursor = viewI.GetRowCursor(i => i == index))
            {
                if (oneO)
                {
                    var gfu = cursor.GetGetter <float>(index);
                    var gff = cursor.GetGetter <uint>(index);
                    Contracts.Assert(gfu != null || gff != null);
                }

                var ty = viewI.Schema[index].Type;
                if (ty.IsVector() && ty.AsVector().ItemType().RawKind() == DataKind.R4)
                {
                    var getter = cursor.GetGetter <VBuffer <float> >(index);
                    var value  = new VBuffer <float>();
                    while (cursor.MoveNext())
                    {
                        getter(ref value);
                        if (value.Length == 0 || value.Count == 0)
                        {
                            throw Host.Except("Issue.");
                        }
                        ++nbRows;
                    }
                }
                else if (!ty.IsVector() && ty.RawKind() == DataKind.R4)
                {
                    var getter = cursor.GetGetter <float>(index);
                    var sch    = SchemaHelper.ToString(cursor.Schema);
                    var value  = 0f;
                    while (cursor.MoveNext())
                    {
                        getter(ref value);
                        ++nbRows;
                    }
                }
                else if (ty.IsKey() && ty.RawKind() == DataKind.U4)
                {
                    var  getter = cursor.GetGetter <uint>(index);
                    uint value  = 0;
                    while (cursor.MoveNext())
                    {
                        getter(ref value);
                        ++nbRows;
                    }
                }
                else
                {
                    throw Host.ExceptNotSupp();
                }
            }
            if (nbRows == 0)
            {
                throw Contracts.Except("View is empty.");
            }
        }
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="env">environment like ConsoleEnvironment</param>
        /// <param name="mapper">IValueMapper</param>
        /// <param name="source">source to replace</param>
        /// <param name="inputColumn">name of the input column (the last one sharing the same type)</param>
        /// <param name="outputColumn">name of the output column</param>
        public TransformFromValueMapper(IHostEnvironment env, IValueMapper mapper, IDataView source,
                                        string inputColumn, string outputColumn = "output")
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(mapper);
            Contracts.AssertValue(source);
            _host = env;

            if (string.IsNullOrEmpty(inputColumn))
            {
                var inputType = mapper.InputType;
                for (int i = source.Schema.Count - 1; i >= 0; --i)
                {
                    var ty = source.Schema[i].Type;
                    if (ty.SameSizeAndItemType(inputType))
                    {
                        inputColumn = source.Schema[i].Name;
                        break;
                    }
                }
            }

            _input  = source;
            _mapper = mapper;
            int index = SchemaHelper.GetColumnIndex(_input.Schema, inputColumn);

            _inputColumn  = inputColumn;
            _outputColumn = outputColumn;
            _schema       = ExtendedSchema.Create(new ExtendedSchema(source.Schema, new[] { outputColumn }, new[] { mapper.OutputType }));
            _transform    = CreateMemoryTransform();
        }
예제 #6
0
        ValueMapperDispose <TSrc, TDst> GetMapperDispose <TSrc, TDst>()
        {
            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform);
            var inputView = new InfiniteLoopViewCursorColumn <TSrc>(_inputIndex, firstView.Schema, ignoreOtherColumn: _ignoreOtherColumn);

            // This is extremely time consuming as the transform is serialized and deserialized.
            var outputView = _sourceToReplace == _transform.Source
                                ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView)
                                : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView,
                                                                               _sourceToReplace);
            int index          = SchemaHelper.GetColumnIndex(outputView.Schema, _outputColumn);
            int newOutputIndex = index;
            var cur            = outputView.GetRowCursor(i => i == newOutputIndex);
            var getter         = cur.GetGetter <TDst>(newOutputIndex);

            if (getter == null)
            {
                throw _env.Except("Unable to get a getter on the transform for type {0}", default(TDst).GetType());
            }
            return(new ValueMapperDispose <TSrc, TDst>((in TSrc src, ref TDst dst) =>
            {
                inputView.Set(in src);
                cur.MoveNext();
                getter(ref dst);
            }, new IDisposable[] { cur }));
예제 #7
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="env">environment like ConsoleEnvironment</param>
        /// <param name="transform">transform to convert</param>
        /// <param name="inputColumn">input column of the mapper</param>
        /// <param name="outputColumn">output column of the mapper</param>
        /// <param name="sourceToReplace">source to replace</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param>
        public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform,
                                             string inputColumn, string outputColumn,
                                             IDataView sourceToReplace = null, int conc = 1,
                                             bool ignoreOtherColumn    = false)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(transform);
            _env               = env;
            _transform         = transform;
            _sourceToReplace   = sourceToReplace;
            _outputColumn      = outputColumn;
            _ignoreOtherColumn = ignoreOtherColumn;
            _toDispose         = new List <IDisposable>();

            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform);

            int index = SchemaHelper.GetColumnIndex(firstView.Schema, inputColumn);

            _inputIndex = index;
            index       = SchemaHelper.GetColumnIndex(transform.Schema, outputColumn);
            _outputType = _transform.Schema[index].Type;

            _disposeEnv = conc > 0;
            _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env;
        }
예제 #8
0
        /// <summary>
        /// Creates the transform.
        /// </summary>
        private IDataTransform CreateTransform <TValue>()
            where TValue : IComparable <TValue>
        {
            int col = SchemaHelper.GetColumnIndex(Source.Schema, _sortColumn);

            return(new SortInDataFrameState <TValue>(Host, Source, col, _reverse, _numThreads));
        }
예제 #9
0
        private INearestNeighborsPredictor TrainPredictorLabel <TLabel>(IChannel ch, RoleMappedData data)
            where TLabel : IComparable <TLabel>
        {
            int featureIndex = data.Schema.Feature.Value.Index;
            int labelIndex   = data.Schema.Label.Value.Index;
            int idIndex      = SchemaHelper.GetColumnIndex(data.Schema.Schema, _args.colId, true);
            int weightIndex  = data.Schema.Weight == null ? -1 : data.Schema.Weight.Value.Index;
            var indexes      = new HashSet <int>()
            {
                featureIndex, labelIndex, weightIndex
            };

            if (!string.IsNullOrEmpty(_args.colId) && idIndex != -1)
            {
                indexes.Add(idIndex);
            }
            if (idIndex != -1)
            {
                var colType = data.Schema.Schema[idIndex].Type;
                if (colType.IsVector() || colType.RawKind() != DataKind.Int64)
                {
                    throw ch.Except("Column '{0}' must be of type '{1}' not '{2}'", _args.colId, DataKind.Int64, colType);
                }
            }

            Dictionary <long, Tuple <TLabel, float> > merged;
            var kdtrees = NearestNeighborsBuilder.NearestNeighborsBuild <TLabel>(ch, data.Data, featureIndex, labelIndex,
                                                                                 idIndex, weightIndex, out merged, _args);

            // End.
            return(CreateTrainedPredictor(kdtrees.Trees, merged));
        }
예제 #10
0
 int GetColumnIndex(IExceptionContext ch, string name)
 {
     if (string.IsNullOrEmpty(name))
     {
         return(-1);
     }
     return(SchemaHelper.GetColumnIndex(Schema, name));
 }
예제 #11
0
        Dictionary <int, int> BuildMapping()
        {
            var res = new Dictionary <int, int>();

            foreach (var col in _args.columns)
            {
                res[SchemaHelper.GetColumnIndex(_schema, col.Name)] = SchemaHelper.GetColumnIndex(_schema, col.Source);
            }
            return(res);
        }
예제 #12
0
        protected override RowCursor GetRowCursorCore(Func <int, bool> needCol, Random rand = null)
        {
            Host.Check(string.IsNullOrEmpty(_sortColumn) || rand == null, "Random access is not allowed on sorted data. (5)");
            Host.AssertValue(_transform, "_transform");
            int sortColumn = -1;

            if (!string.IsNullOrEmpty(_sortColumn))
            {
                sortColumn = SchemaHelper.GetColumnIndex(Source.Schema, _sortColumn);
            }
            return(_transform.GetRowCursor(i => i == sortColumn || needCol(i), rand));
        }
예제 #13
0
 public static int[] GetColumnsIndex(Schema schema, IEnumerable <string> subsetColumns = null)
 {
     if (subsetColumns == null)
     {
         return(Enumerable.Range(0, schema.Count).Where(c => !schema[c].IsHidden).ToArray());
     }
     else
     {
         return(subsetColumns.Select(c => SchemaHelper.GetColumnIndex(schema, c))
                .ToArray());
     }
 }
예제 #14
0
        protected override DataViewRowCursor GetRowCursorCore(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null)
        {
            Host.Check(string.IsNullOrEmpty(_sortColumn) || rand == null, "Random access is not allowed on sorted data. (5)");
            Host.AssertValue(_transform, "_transform");
            int sortColumn = -1;

            if (!string.IsNullOrEmpty(_sortColumn))
            {
                sortColumn = SchemaHelper.GetColumnIndex(Source.Schema, _sortColumn);
            }
            return(_transform.GetRowCursor(SchemaHelper.ColumnsNeeded(columnsNeeded, _transform.Schema, _sortColumn), rand));
        }
        public ValueMapper <TSrc, TDst> GetMapper <TSrc, TDst>()
        {
            if (_preProcess == null)
            {
                if (_postProcess == null)
                {
                    return((_predictor as IValueMapper).GetMapper <TSrc, TDst>());
                }
                else
                {
                    throw _host.ExceptNotImpl();
                }
            }
            else
            {
                var          valuemapper = _preProcess as IValueMapper;
                DataViewType outType;
                if (valuemapper != null)
                {
                    outType = valuemapper.OutputType;
                }
                else
                {
                    int index = SchemaHelper.GetColumnIndex(_preProcess.Source.Schema, _inputColumn);
                    outType = _preProcess.Source.Schema[index].Type;
                }

                if (outType.IsVector())
                {
                    switch (outType.AsVector().ItemType().RawKind())
                    {
                    case DataKind.Single:
                        return(GetMapperWithTransform <TSrc, VBuffer <float>, TDst>(_preProcess));

                    default:
                        throw _host.ExceptNotSupp("Type '{0}' is not handled yet.", outType);
                    }
                }
                else
                {
                    switch (valuemapper.OutputType.RawKind())
                    {
                    case DataKind.Single:
                        return(GetMapperWithTransform <TSrc, float, TDst>(_preProcess));

                    default:
                        throw _host.ExceptNotSupp("Type '{0}' is not handled yet.", outType);
                    }
                }
                throw _host.ExceptNotImpl();
            }
        }
예제 #16
0
            public Delegate[] GetCursorGetter(RowCursor cursor)
            {
                int indexL = SchemaHelper.GetColumnIndex(cursor.Schema, "PredictedLabel");
                int indexS = SchemaHelper.GetColumnIndex(cursor.Schema, "Score");
                int indexP = SchemaHelper.GetColumnIndex(cursor.Schema, "Probability");

                return(new Delegate[]
                {
                    cursor.GetGetter <bool>(indexL),
                    cursor.GetGetter <float>(indexS),
                    cursor.GetGetter <float>(indexP),
                });
            }
예제 #17
0
 public override RowCursor[] GetRowCursorSet(Func <int, bool> needCol, int n, Random rand = null)
 {
     Host.Check(string.IsNullOrEmpty(_sortColumn) || rand == null, "Random access is not allowed on sorted data. (6)");
     Host.AssertValue(_transform, "_transform");
     if (string.IsNullOrEmpty(_sortColumn))
     {
         return(_transform.GetRowCursorSet(needCol, n, rand));
     }
     else
     {
         int sortColumn = SchemaHelper.GetColumnIndex(Source.Schema, _sortColumn);
         return(_transform.GetRowCursorSet(i => i == sortColumn || needCol(i), n, rand));
     }
 }
예제 #18
0
 public override DataViewRowCursor[] GetRowCursorSet(IEnumerable <DataViewSchema.Column> columnsNeeded, int n, Random rand = null)
 {
     Host.Check(string.IsNullOrEmpty(_sortColumn) || rand == null, "Random access is not allowed on sorted data. (6)");
     Host.AssertValue(_transform, "_transform");
     if (string.IsNullOrEmpty(_sortColumn))
     {
         return(_transform.GetRowCursorSet(columnsNeeded, n, rand));
     }
     else
     {
         int sortColumn = SchemaHelper.GetColumnIndex(Source.Schema, _sortColumn);
         return(_transform.GetRowCursorSet(SchemaHelper.ColumnsNeeded(columnsNeeded, _transform.Schema, _sortColumn), n, rand));
     }
 }
        void _CreateMapper(IDataScorerTransform scorer)
        {
            _mapperBinaryClassification = null;
            var schema = scorer.Schema;
            int i1, i2, i3;

            i1 = SchemaHelper.GetColumnIndex(schema, "PredictedLabel");
            i2 = SchemaHelper.GetColumnIndex(schema, "Score");
            i3 = SchemaHelper.GetColumnIndex(schema, "Probability");
            var map = new ValueMapperFromTransform <TRowValue, PredictionTypeForBinaryClassification>(_env, scorer);

            _mapperBinaryClassification = map.GetMapper <TRowValue, PredictionTypeForBinaryClassification>();
            _valueMapper = map;
        }
 public LambdaColumnPassThroughView(IHostEnvironment env, string name, IDataView input,
                                    string src, string dst, ColumnType typeSrc, ColumnType typeDst,
                                    ValueMapper <TSrc, TDst> mapper)
 {
     _host      = env.Register(name);
     _source    = input;
     _mapper    = mapper;
     _columnDst = dst;
     _columnSrc = src;
     _typeDst   = typeDst;
     _typeSrc   = typeSrc;
     _newSchema = Schema.Create(new ExtendedSchema(_source.Schema, new[] { dst }, new[] { typeDst }));
     _srcIndex  = SchemaHelper.GetColumnIndex(_source.Schema, _columnSrc);
     _host.Except("Unable to find column '{0}' in input schema.", _columnSrc);
 }
            public RowCursor GetRowCursor(Func <int, bool> predicate, Random rand = null)
            {
                int index = SchemaHelper.GetColumnIndex(Source.Schema, _parent.InputName);

                if (predicate(index))
                {
                    var cursor = Source.GetRowCursor(i => PredicatePropagation(i, index, predicate), rand);
                    return(new MemoryCursor <TSrc, TDst>(this, cursor, index));
                }
                else
                {
                    // The new column is not required. We do not need to compute it. But we need to keep the same schema.
                    return(new SameCursor(Source.GetRowCursor(predicate, rand), Schema));
                }
            }
        protected void DebugChecking0Vfloat(IDataView viewI, string labName, int count)
        {
            int index = SchemaHelper.GetColumnIndex(viewI.Schema, labName);
            var ty    = viewI.Schema[index].Type;

            Contracts.Assert(ty.IsKey() || ty.IsVector() || ty.RawKind() == DataKind.R4);
            using (var cursor = viewI.GetRowCursor(i => i == index))
            {
                var getter = cursor.GetGetter <VBuffer <float> >(index);
                var value  = new VBuffer <float>();
                int nb     = 0;
                while (cursor.MoveNext())
                {
                    getter(ref value);
                    if (value.Length == 0 || value.Count == 0)
                    {
                        throw Host.Except("Issue.");
                    }
                    if (value.Length > count || value.Count > count)
                    {
                        throw Host.Except("Issue.");
                    }
                    if (value.Length != count || value.Count != 1)
                    {
                        getter(ref value);
                        throw Host.Except("Issue.");
                    }
                    if (value.Values[0] != 1)
                    {
                        throw Host.Except("Issue.");
                    }
                    if (value.Indices[0] >= count)
                    {
                        getter(ref value);
                        throw Host.Except("Issue.");
                    }
                    if (value.Indices[0] < 0)
                    {
                        throw Host.Except("Issue.");
                    }
                    ++nb;
                }
                if (nb < 10)
                {
                    throw Host.Except("Issue.");
                }
            }
        }
        public ShakeInputTransform(IHostEnvironment env, Arguments args, IDataView input, IValueMapper[] toShake)
        {
            Contracts.CheckValue(env, "env");
            _host = env.Register("ShakeInputTransform");
            _host.CheckValue(args, "args");
            args.PostProcess();
            _host.CheckValue(toShake, "toShake");
            _host.CheckValue(input, "input");
            _host.CheckValue(args.inputColumn, "inputColumn");
            _host.CheckValue(args.inputFeaturesInt, "inputFeatures");
            _host.CheckValue(args.outputColumns, "outputColumns");

            _toShake = toShake;
            _input   = input;

            int ind = SchemaHelper.GetColumnIndex(input.Schema, args.inputColumn);

            if (toShake.Length != args.outputColumns.Length)
            {
                throw _host.ExceptParam("outputColumns", "toShake and outputColumns must have the same length");
            }

            for (int i = 1; i < _toShake.Length; ++i)
            {
                if (_toShake[i].OutputType.IsVector() && _toShake[i - 1].OutputType.IsVector())
                {
                    if (_toShake[i].OutputType.ItemType() != _toShake[i - 1].OutputType.ItemType())
                    {
                        throw _host.Except("All value mappers must be the same type.");
                    }
                }
                else if (_toShake[i].OutputType != _toShake[i - 1].OutputType)
                {
                    throw _host.Except("All value mappers must be the same type.");
                }
            }
            if (_toShake[0].OutputType.IsVector())
            {
                var vec = _toShake[0].OutputType.AsVector();
                if (vec.ItemType().IsVector())
                {
                    throw _host.ExceptNotSupp("Unable to handle vectors of vectors as outputs of the mapper.");
                }
            }

            _args      = args;
            _transform = CreateTemplatedTransform();
        }
        protected TVectorPredictor CreateFinalPredictor(IChannel ch, RoleMappedData data,
                                                        MultiToBinaryTransform trans, int count, Arguments args,
                                                        TScalarPredictor[] predictors, IPredictor reclassPredictor)
        {
            // We create the final predictor. We remove every unneeded transform.
            string dstName, labName;
            int    indexLab;
            var    trans_ = trans;

            trans = MapLabelsAndInsertTransform(ch, data, out dstName, out labName, count, false, _args);
            trans.Steal(trans_);
            indexLab = SchemaHelper.GetColumnIndex(trans.Schema, labName);
            var labType        = trans.Schema[indexLab].Type;
            var initialLabKind = data.Schema.Label.Value.Type.RawKind();

            TVectorPredictor predictor;

            switch (initialLabKind)
            {
            case DataKind.Single:
                var p4 = MultiToRankerPredictor.Create(Host, trans.GetClasses <float>(), predictors, _reclassPredictor, _args.singleColumn, false);
                predictor = p4 as TVectorPredictor;
                break;

            case DataKind.SByte:
                var pu1 = MultiToRankerPredictor.Create(Host, trans.GetClasses <byte>(), predictors, _reclassPredictor, _args.singleColumn, true);
                predictor = pu1 as TVectorPredictor;
                break;

            case DataKind.UInt16:
                var pu2 = MultiToRankerPredictor.Create(Host, trans.GetClasses <ushort>(), predictors, _reclassPredictor, _args.singleColumn, true);
                predictor = pu2 as TVectorPredictor;
                break;

            case DataKind.UInt32:
                var pu4 = MultiToRankerPredictor.Create(Host, trans.GetClasses <uint>(), predictors, _reclassPredictor, _args.singleColumn, true);
                predictor = pu4 as TVectorPredictor;
                break;

            default:
                throw ch.ExceptNotSupp("Unsupported type for a multi class label.");
            }

            Host.Assert(predictor != null);
            return(predictor);
        }
예제 #25
0
        public RowCursor GetRowCursor(Func <int, bool> predicate, Random rand = null)
        {
            ComputeNearestNeighbors();
            _host.AssertValue(_input, "_input");
            var schema = _input.Schema;

            if (predicate(schema.Count))
            {
                int featureIndex = SchemaHelper.GetColumnIndex(Schema, _args.column);
                return(new NearestNeighborsCursor(_input.GetRowCursor(i => PredicatePropagation(i, featureIndex, predicate), rand), this, predicate, featureIndex));
            }
            else
            {
                // The new column is not required. We do not need to compute it. But we need to keep the same schema.
                return(new SameCursor(_input.GetRowCursor(predicate, rand), Schema));
            }
        }
예제 #26
0
        Schema ComputeExtendedSchema()
        {
            int index;
            Func <string, ColumnType> getType = (string col) =>
            {
                var schema = _input.Schema;
                index = SchemaHelper.GetColumnIndex(schema, col);
                return(schema[index].Type);
            };
            var iterCols = _args.columns.Where(c => c.Name != c.Source);

            return(iterCols.Any()
                        ? Schema.Create(new ExtendedSchema(_input.Schema,
                                                           iterCols.Select(c => c.Name).ToArray(),
                                                           iterCols.Select(c => getType(c.Source)).ToArray()))
                        : _input.Schema);
        }
            public RowCursor[] GetRowCursorSet(Func <int, bool> predicate, int n, Random rand = null)
            {
                int index = SchemaHelper.GetColumnIndex(Source.Schema, _parent.InputName);

                if (predicate(index))
                {
                    var cursors = Source.GetRowCursorSet(i => PredicatePropagation(i, index, predicate), n, rand);
                    return(cursors.Select(c => new MemoryCursor <TSrc, TDst>(this, c, index)).ToArray());
                }
                else
                {
                    // The new column is not required. We do not need to compute it. But we need to keep the same schema.
                    return(Source.GetRowCursorSet(predicate, n, rand)
                           .Select(c => new SameCursor(c, Schema))
                           .ToArray());
                }
            }
        /// <summary>
        /// Create a SplitTrainTestTransform transform.
        /// </summary>
        public SplitTrainTestTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(env, RegistrationName, input)
        {
            Host.CheckValue(args, "args");
            args.PostProcess();
            Host.CheckUserArg(args.poolRows >= 0, "poolRows must be > 0");
            Host.CheckUserArg(!string.IsNullOrEmpty(args.newColumn), "newColumn cannot be empty");
            Host.CheckUserArg(args.ratios != null, "ratios cannot be null");
            Host.CheckUserArg(args.ratios.Length > 1, "Number of ratios must be > 1");
            Host.CheckUserArg(args.filename == null || args.tag != null || args.filename.Length == args.ratios.Length, "filenames must be either empty either an array of the same size as ratios");
            Host.CheckUserArg(args.tag == null || args.filename != null || args.tag.Length == args.ratios.Length, "filenames must be either empty either an array of the same size as ratios");
            Host.CheckUserArg(!args.numThreads.HasValue || args.numThreads.Value > 0, "numThreads cannot be negative.");
            var sum = args.fratios.Sum();

            Host.CheckUserArg(Math.Abs(sum - 1f) < 1e-5, "Sum of ratios must be 1.");
            int col = SchemaHelper.GetColumnIndex(input.Schema, args.newColumn, true);

            Host.Check(col == -1, $"Column '{args.newColumn}' should not exist.");

            _newColumn    = args.newColumn;
            _shuffleInput = args.shuffleInput;
            _poolRows     = args.poolRows;
            _filenames    = args.filename;
            _seed         = args.seed;
            _seedShuffle  = args.seedShuffle;
            _ratios       = args.fratios;
            _cacheFile    = args.cacheFile;
            _reuse        = args.reuse;
            _tags         = args.tag;

            var saveSettings = args.saverSettings as ICommandLineComponentFactory;

            Host.CheckValue(saveSettings, nameof(saveSettings));
            _saverSettings = string.Format("{0}{{{1}}}", saveSettings.Name, saveSettings.GetSettingsString());
            _saverSettings = _saverSettings.Replace("{}", "");

            var saver = ComponentCreation.CreateSaver(Host, _saverSettings);

            if (saver == null)
            {
                throw Host.Except("Cannot parse '{0}'", _saverSettings);
            }

            _pipedTransform = AppendToPipeline(input);
        }
예제 #29
0
        private IDataTransform CreateTemplatedTransform()
        {
            int index = SchemaHelper.GetColumnIndex(Source.Schema, _args.features);
            var type  = Source.Schema[index].Type;

            if (!type.IsVector())
            {
                throw Host.Except("Features must be a vector.");
            }
            switch (type.AsVector().ItemType().RawKind())
            {
            case DataKind.Single:
                return(new OpticsState(Host, this, Source, _args));

            default:
                throw Host.Except("Features must be a vector a floats.");
            }
        }
예제 #30
0
 public DeTrendTransform(IHostEnvironment env, Arguments args, IDataView input)
     : base(env, RegistrationName, input)
 {
     Host.CheckValue(args, "args");
     _args = args;
     if (_args.columns == null || _args.columns.Length != 1)
     {
         Host.ExceptUserArg(nameof(_args.columns), "One column must be specified.");
     }
     SchemaHelper.GetColumnIndex(input.Schema, args.timeColumn);
     SchemaHelper.GetColumnIndex(input.Schema, args.columns[0].Source);
     _schema = Schema.Create(new ExtendedSchema(input.Schema,
                                                new[] { _args.columns[0].Name },
                                                new[] { NumberType.R4 /*input.Schema.GetColumnType(index)*/ }));
     _trend     = null;
     _transform = null;
     _lock      = new object();
 }