/// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="env">environment like ConsoleEnvironment</param>
        /// <param name="mapper">IValueMapper</param>
        /// <param name="source">source to replace</param>
        /// <param name="inputColumn">name of the input column (the last one sharing the same type)</param>
        /// <param name="outputColumn">name of the output column</param>
        public TransformFromValueMapper(IHostEnvironment env, IValueMapper mapper, IDataView source,
                                        string inputColumn, string outputColumn = "output")
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(mapper);
            Contracts.AssertValue(source);
            _host = env;

            if (string.IsNullOrEmpty(inputColumn))
            {
                var inputType = mapper.InputType;
                for (int i = source.Schema.Count - 1; i >= 0; --i)
                {
                    var ty = source.Schema[i].Type;
                    if (ty.SameSizeAndItemType(inputType))
                    {
                        inputColumn = source.Schema[i].Name;
                        break;
                    }
                }
            }

            _input  = source;
            _mapper = mapper;
            int index = SchemaHelper.GetColumnIndex(_input.Schema, inputColumn);

            _inputColumn  = inputColumn;
            _outputColumn = outputColumn;
            _schema       = ExtendedSchema.Create(new ExtendedSchema(source.Schema, new[] { outputColumn }, new[] { mapper.OutputType }));
            _transform    = CreateMemoryTransform();
        }
Example #2
0
        Schema BuildSchema()
        {
            var sch       = Source.Schema;
            var newNames  = _args.columns.Select(c => c.Name).ToArray();
            var newTypes  = _args.columns.Select(c => SchemaHelper.GetColumnType(sch, c.Source)).ToArray();
            var extSchema = new ExtendedSchema(Source.Schema, newNames, newTypes);

            return(Schema.Create(extSchema));
        }
Example #3
0
        public void TestExtendedSchema()
        {
            var schema = new ExtendedSchema();

            TestCoreSchemaCommon(schema);

            TryParse(schema, "2002-12-14", ExtendedSchema.TimestampShortTag, new DateTime(2002, 12, 14));
            TryParse(schema, "2002-12-14 21:59:43.234", ExtendedSchema.TimestampShortTag, new DateTime(2002, 12, 14, 21, 59, 43, 234));
        }
Example #4
0
 private OpticsTransform(IHost host, ModelLoadContext ctx, IDataView input) :
     base(host, input)
 {
     Host.CheckValue(input, "input");
     Host.CheckValue(ctx, "ctx");
     _args = new Arguments();
     _args.Read(ctx, Host);
     _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new string[] { _args.outCluster, _args.outScore },
                                                        new DataViewType[] { NumberDataViewType.Int32, NumberDataViewType.Single }));
     _transform = CreateTemplatedTransform();
 }
 private OpticsOrderingTransform(IHost host, ModelLoadContext ctx, IDataView input) :
     base(host, input)
 {
     Host.CheckValue(input, "input");
     Host.CheckValue(ctx, "ctx");
     _args = new Arguments();
     _args.Read(ctx, Host);
     _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new string[] { _args.outOrdering, _args.outReachabilityDistance, _args.outCoreDistance },
                                                        new DataViewType[] { NumberDataViewType.Int64, NumberDataViewType.Single, NumberDataViewType.Single }));
     _transform = CreateTemplatedTransform();
 }
        private PrePostProcessPredictor(IHost host, ModelLoadContext ctx)
        {
            Contracts.CheckValue(host, nameof(host));
            _host        = host;
            _inputColumn = ctx.Reader.ReadString();
            var type = SchemaHelper.ReadType(ctx);

            _outputColumn = ctx.Reader.ReadString();

            DataViewSchema schema;
            IDataView      data;

            if (type.IsVector())
            {
                switch (type.AsVector().ItemType().RawKind())
                {
                case DataKind.Single:
                    schema = ExtendedSchema.Create(new ExtendedSchema((ISchema)null, new[] { _inputColumn }, new[] { new VectorDataViewType(NumberDataViewType.Single) }));
                    data   = new TemporaryViewCursorColumn <VBuffer <float> >(default(VBuffer <float>), 0, schema);
                    break;

                default:
                    throw Contracts.Except("Unable to create a temporary view from type '{0}'", type);
                }
            }
            else
            {
                switch (type.RawKind())
                {
                case DataKind.Single:
                    schema = ExtendedSchema.Create(new ExtendedSchema((ISchema)null, new[] { _inputColumn }, new[] { NumberDataViewType.Single }));
                    data   = new TemporaryViewCursorColumn <float>(default(float), 0, schema);
                    break;

                default:
                    throw Contracts.Except("Unable to create a temporary view from type '{0}'", type);
                }
            }

            ctx.LoadModel <IDataTransform, SignatureLoadDataTransform>(_host, out _preProcess, "_preProcess", data);
            ctx.LoadModel <IPredictor, SignatureLoadModel>(_host, out _predictor, "_predictor");
            var hasPost = ctx.Reader.ReadBoolByte();

            if (hasPost)
            {
                ctx.LoadModel <IDataTransform, SignatureLoadDataTransform>(_host, out _postProcess, "_postProcess", _transformFromPredictor);
            }
            else
            {
                _postProcess = null;
            }
            _transformFromPredictor = new TransformFromValueMapper(_host, _predictor as IValueMapper, _preProcess, _inputColumn, _outputColumn);
        }
Example #7
0
 public LambdaColumnPassThroughView(IHostEnvironment env, string name, IDataView input,
                                    string src, string dst, DataViewType typeSrc, DataViewType typeDst,
                                    ValueMapper <TSrc, TDst> mapper)
 {
     _host      = env.Register(name);
     _source    = input;
     _mapper    = mapper;
     _columnDst = dst;
     _columnSrc = src;
     _typeDst   = typeDst;
     _typeSrc   = typeSrc;
     _newSchema = ExtendedSchema.Create(new ExtendedSchema(_source.Schema, new[] { dst }, new[] { typeDst }));
     _srcIndex  = SchemaHelper.GetColumnIndex(_source.Schema, _columnSrc);
     _host.Except("Unable to find column '{0}' in input schema.", _columnSrc);
 }
Example #8
0
        public OpticsTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(env, RegistrationName, input)
        {
            Host.CheckValue(args, "args");
            args.PostProcess();

            if (args.epsilonsDouble != null && args.epsilonsDouble.Any(eps => eps < 0))
            {
                Contracts.Check(false, "Parameter epsilon, if passed, must be positive.");
            }

            if (args.minPoints <= 0)
            {
                Contracts.Check(false, "Parameter minPoints must be positive.");
            }

            _args = args;
            string[]       newColumnNames = null;
            DataViewType[] newColumnTypes = null;

            int epsilonsCount = args.epsilonsDouble == null ? 0 : args.epsilonsDouble.Count();

            if (args.epsilonsDouble == null || epsilonsCount <= 1)
            {
                newColumnNames = new string[] { args.outCluster, args.outScore };
                newColumnTypes = new DataViewType[] { NumberDataViewType.Int32, NumberDataViewType.Single };
            }
            else
            {
                //Adding 2 columns, ClusterId + Score, for each value of epsilon
                newColumnNames = new string[2 * epsilonsCount];
                newColumnTypes = new DataViewType[2 * epsilonsCount];

                for (int i = 0; i < epsilonsCount; i += 1)
                {
                    newColumnNames[2 * i]     = String.Format("{0}_{1}", args.outCluster, i);
                    newColumnNames[2 * i + 1] = String.Format("{0}_{1}", args.outScore, i);;
                    newColumnTypes[2 * i]     = NumberDataViewType.Int32;
                    newColumnTypes[2 * i + 1] = NumberDataViewType.Single;
                }
            }

            args.newColumnsNumber = newColumnNames.Count() / 2;
            _schema    = ExtendedSchema.Create(new ExtendedSchema(input.Schema, newColumnNames, newColumnTypes));
            _transform = CreateTemplatedTransform();
        }
Example #9
0
 public DBScanTransform(IHostEnvironment env, Arguments args, IDataView input)
     : base(env, RegistrationName, input)
 {
     Host.CheckValue(args, "args");
     if (args.epsilon < 0)
     {
         Contracts.Check(false, "Parameter epsilon must be positive or null.");
     }
     if (args.minPoints <= 0)
     {
         Contracts.Check(false, "Parameter minPoints must be positive.");
     }
     _args   = args;
     _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new string[] { args.outCluster, args.outScore },
                                                        new DataViewType[] { NumberDataViewType.Int32, NumberDataViewType.Single }));
     _transform = CreateTemplatedTransform();
 }
Example #10
0
        DataViewSchema ComputeExtendedSchema()
        {
            int index;
            Func <string, DataViewType> getType = (string col) =>
            {
                var schema = _input.Schema;
                index = SchemaHelper.GetColumnIndex(schema, col);
                return(schema[index].Type);
            };
            var iterCols = _args.columns.Where(c => c.Name != c.Source);

            return(iterCols.Any()
                        ? ExtendedSchema.Create(new ExtendedSchema(_input.Schema,
                                                                   iterCols.Select(c => c.Name).ToArray(),
                                                                   iterCols.Select(c => getType(c.Source)).ToArray()))
                        : _input.Schema);
        }
Example #11
0
 public DeTrendTransform(IHostEnvironment env, Arguments args, IDataView input)
     : base(env, RegistrationName, input)
 {
     Host.CheckValue(args, "args");
     _args = args;
     if (_args.columns == null || _args.columns.Length != 1)
     {
         Host.ExceptUserArg(nameof(_args.columns), "One column must be specified.");
     }
     SchemaHelper.GetColumnIndex(input.Schema, args.timeColumn);
     SchemaHelper.GetColumnIndex(input.Schema, args.columns[0].Source);
     _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema,
                                                        new[] { _args.columns[0].Name },
                                                        new[] { NumberDataViewType.Single /*input.Schema.GetColumnType(index)*/ }));
     _trend     = null;
     _transform = null;
     _lock      = new object();
 }
Example #12
0
        private DeTrendTransform(IHost host, ModelLoadContext ctx, IDataView input) :
            base(host, input)
        {
            Host.CheckValue(input, "input");
            Host.CheckValue(ctx, "ctx");
            _args = new Arguments();
            _args.Read(ctx, Host);

            ctx.LoadModel <IPredictor, SignatureLoadModel>(host, out _trend, "trend");

            if (_args.columns == null || _args.columns.Length != 1)
            {
                Host.ExceptUserArg(nameof(_args.columns), "One column must be specified.");
            }
            int index = SchemaHelper.GetColumnIndex(input.Schema, _args.columns[0].Source);

            _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema,
                                                               new[] { _args.columns[0].Name },
                                                               new[] { NumberDataViewType.Single /*input.Schema.GetColumnType(index)*/ }));
            _lock      = new object();
            _transform = BuildTransform(_trend);
        }
Example #13
0
            public PolynomialState(IHostEnvironment host, IDataView input, Arguments args, Func <TInput, TInput, TInput> multiplication)
            {
                _host = host.Register("PolynomialState");
                _host.CheckValue(input, "input");
                _input = input;
                // _lock = new object();
                _args           = args;
                _multiplication = multiplication;
                var column = _args.columns[0];
                var schema = input.Schema;

                using (var ch = _host.Start("PolynomialState"))
                {
                    _inputCol = SchemaHelper.GetColumnIndexDC(schema, column.Source);
                    var type = schema[_inputCol.Index].Type;
                    if (!type.IsVector())
                    {
                        throw _host.Except("Input column type must be a vector.");
                    }
                    int dim = type.AsVector().DimCount();
                    if (dim > 1)
                    {
                        throw _host.Except("Input column type must be a vector of one dimension.");
                    }
                    int size = dim > 0 ? type.AsVector().GetDim(0) : 0;
                    if (size > 0)
                    {
                        size = TotalCumulated[_args.degree](size);
                    }
                    ch.Trace("PolynomialTransform {0}->{1}.", dim, size);

                    // We extend the input schema. The new type has the same type as the input.
                    _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema,
                                                                       new[] { column.Name },
                                                                       new[] { new VectorDataViewType(type.AsVector().ItemType(), size) }));
                }
            }
Example #14
0
 DataViewSchema ComputeExtendedSchema()
 {
     return(ExtendedSchema.Create(new ExtendedSchema(_input.Schema, new string[] { _args.distColumn, _args.idNeighborsColumn },
                                                     new DataViewType[] { new VectorDataViewType(NumberDataViewType.Single, _args.k),
                                                                          new VectorDataViewType(NumberDataViewType.Int64, _args.k) })));
 }
            public ShakeInputState(IHostEnvironment host, IDataView input, IValueMapper[] toShake, Arguments args)
            {
                _host = host.Register("ShakeInputState");
                _host.CheckValue(input, "input");
                _input   = input;
                _lock    = new object();
                _args    = args;
                _toShake = toShake;

                foreach (var vm in toShake)
                {
                    if (vm.OutputType.IsVector() && vm.OutputType.AsVector().DimCount() > 1)
                    {
                        throw _host.Except("If a ValueMapper return a vector, it should have one dimension or zero.");
                    }
                }

                _inputCol      = SchemaHelper.GetColumnIndexDC(_input.Schema, _args.inputColumn);
                _shakingValues = ExtractShakingValues();
                if (_shakingValues.Length != _args.inputFeaturesInt.Length)
                {
                    throw _host.Except("Shaking Values and columns to shake do not have the same dimension {0} and '{1}'.", _args.inputFeaturesInt.Length, _args.values);
                }

                var colTypes = new List <DataViewType>();

                switch (_args.aggregation)
                {
                case ShakeAggregation.concatenate:
                    int m = 1;
                    foreach (var shakeVal in _shakingValues)
                    {
                        m *= shakeVal.Length;
                    }
                    if (m == 0)
                    {
                        throw _host.Except("No shaking values ('{0}')", _args.values);
                    }
                    foreach (var c in toShake)
                    {
                        var vt = c.OutputType.IsVector()
                                            ? new VectorDataViewType(c.OutputType.ItemType().AsPrimitive(), c.OutputType.AsVector().DimCount() == 0 ? 0 : c.OutputType.AsVector().GetDim(0) * m)
                                            : new VectorDataViewType(c.OutputType.AsPrimitive(), m);
                        colTypes.Add(vt);
                    }
                    break;

                case ShakeAggregation.add:
                    foreach (var c in toShake)
                    {
                        var vt = c.OutputType.IsVector()
                                            ? new VectorDataViewType(c.OutputType.ItemType().AsPrimitive(), c.OutputType.AsVector().DimCount() == 0 ? 0 : c.OutputType.AsVector().GetDim(0))
                                            : new VectorDataViewType(c.OutputType.AsPrimitive(), 1);
                        colTypes.Add(vt);
                    }
                    break;

                default:
                    throw _host.ExceptNotSupp("Unknown aggregatino strategy {0}", _args.aggregation);
                }
                _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, args.outputColumns, colTypes.ToArray()));
            }