/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="mapper">IValueMapper</param> /// <param name="source">source to replace</param> /// <param name="inputColumn">name of the input column (the last one sharing the same type)</param> /// <param name="outputColumn">name of the output column</param> public TransformFromValueMapper(IHostEnvironment env, IValueMapper mapper, IDataView source, string inputColumn, string outputColumn = "output") { Contracts.AssertValue(env); Contracts.AssertValue(mapper); Contracts.AssertValue(source); _host = env; if (string.IsNullOrEmpty(inputColumn)) { var inputType = mapper.InputType; for (int i = source.Schema.Count - 1; i >= 0; --i) { var ty = source.Schema[i].Type; if (ty.SameSizeAndItemType(inputType)) { inputColumn = source.Schema[i].Name; break; } } } _input = source; _mapper = mapper; int index = SchemaHelper.GetColumnIndex(_input.Schema, inputColumn); _inputColumn = inputColumn; _outputColumn = outputColumn; _schema = ExtendedSchema.Create(new ExtendedSchema(source.Schema, new[] { outputColumn }, new[] { mapper.OutputType })); _transform = CreateMemoryTransform(); }
Schema BuildSchema() { var sch = Source.Schema; var newNames = _args.columns.Select(c => c.Name).ToArray(); var newTypes = _args.columns.Select(c => SchemaHelper.GetColumnType(sch, c.Source)).ToArray(); var extSchema = new ExtendedSchema(Source.Schema, newNames, newTypes); return(Schema.Create(extSchema)); }
public void TestExtendedSchema() { var schema = new ExtendedSchema(); TestCoreSchemaCommon(schema); TryParse(schema, "2002-12-14", ExtendedSchema.TimestampShortTag, new DateTime(2002, 12, 14)); TryParse(schema, "2002-12-14 21:59:43.234", ExtendedSchema.TimestampShortTag, new DateTime(2002, 12, 14, 21, 59, 43, 234)); }
private OpticsTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { Host.CheckValue(input, "input"); Host.CheckValue(ctx, "ctx"); _args = new Arguments(); _args.Read(ctx, Host); _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new string[] { _args.outCluster, _args.outScore }, new DataViewType[] { NumberDataViewType.Int32, NumberDataViewType.Single })); _transform = CreateTemplatedTransform(); }
private OpticsOrderingTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { Host.CheckValue(input, "input"); Host.CheckValue(ctx, "ctx"); _args = new Arguments(); _args.Read(ctx, Host); _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new string[] { _args.outOrdering, _args.outReachabilityDistance, _args.outCoreDistance }, new DataViewType[] { NumberDataViewType.Int64, NumberDataViewType.Single, NumberDataViewType.Single })); _transform = CreateTemplatedTransform(); }
private PrePostProcessPredictor(IHost host, ModelLoadContext ctx) { Contracts.CheckValue(host, nameof(host)); _host = host; _inputColumn = ctx.Reader.ReadString(); var type = SchemaHelper.ReadType(ctx); _outputColumn = ctx.Reader.ReadString(); DataViewSchema schema; IDataView data; if (type.IsVector()) { switch (type.AsVector().ItemType().RawKind()) { case DataKind.Single: schema = ExtendedSchema.Create(new ExtendedSchema((ISchema)null, new[] { _inputColumn }, new[] { new VectorDataViewType(NumberDataViewType.Single) })); data = new TemporaryViewCursorColumn <VBuffer <float> >(default(VBuffer <float>), 0, schema); break; default: throw Contracts.Except("Unable to create a temporary view from type '{0}'", type); } } else { switch (type.RawKind()) { case DataKind.Single: schema = ExtendedSchema.Create(new ExtendedSchema((ISchema)null, new[] { _inputColumn }, new[] { NumberDataViewType.Single })); data = new TemporaryViewCursorColumn <float>(default(float), 0, schema); break; default: throw Contracts.Except("Unable to create a temporary view from type '{0}'", type); } } ctx.LoadModel <IDataTransform, SignatureLoadDataTransform>(_host, out _preProcess, "_preProcess", data); ctx.LoadModel <IPredictor, SignatureLoadModel>(_host, out _predictor, "_predictor"); var hasPost = ctx.Reader.ReadBoolByte(); if (hasPost) { ctx.LoadModel <IDataTransform, SignatureLoadDataTransform>(_host, out _postProcess, "_postProcess", _transformFromPredictor); } else { _postProcess = null; } _transformFromPredictor = new TransformFromValueMapper(_host, _predictor as IValueMapper, _preProcess, _inputColumn, _outputColumn); }
public LambdaColumnPassThroughView(IHostEnvironment env, string name, IDataView input, string src, string dst, DataViewType typeSrc, DataViewType typeDst, ValueMapper <TSrc, TDst> mapper) { _host = env.Register(name); _source = input; _mapper = mapper; _columnDst = dst; _columnSrc = src; _typeDst = typeDst; _typeSrc = typeSrc; _newSchema = ExtendedSchema.Create(new ExtendedSchema(_source.Schema, new[] { dst }, new[] { typeDst })); _srcIndex = SchemaHelper.GetColumnIndex(_source.Schema, _columnSrc); _host.Except("Unable to find column '{0}' in input schema.", _columnSrc); }
public OpticsTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { Host.CheckValue(args, "args"); args.PostProcess(); if (args.epsilonsDouble != null && args.epsilonsDouble.Any(eps => eps < 0)) { Contracts.Check(false, "Parameter epsilon, if passed, must be positive."); } if (args.minPoints <= 0) { Contracts.Check(false, "Parameter minPoints must be positive."); } _args = args; string[] newColumnNames = null; DataViewType[] newColumnTypes = null; int epsilonsCount = args.epsilonsDouble == null ? 0 : args.epsilonsDouble.Count(); if (args.epsilonsDouble == null || epsilonsCount <= 1) { newColumnNames = new string[] { args.outCluster, args.outScore }; newColumnTypes = new DataViewType[] { NumberDataViewType.Int32, NumberDataViewType.Single }; } else { //Adding 2 columns, ClusterId + Score, for each value of epsilon newColumnNames = new string[2 * epsilonsCount]; newColumnTypes = new DataViewType[2 * epsilonsCount]; for (int i = 0; i < epsilonsCount; i += 1) { newColumnNames[2 * i] = String.Format("{0}_{1}", args.outCluster, i); newColumnNames[2 * i + 1] = String.Format("{0}_{1}", args.outScore, i);; newColumnTypes[2 * i] = NumberDataViewType.Int32; newColumnTypes[2 * i + 1] = NumberDataViewType.Single; } } args.newColumnsNumber = newColumnNames.Count() / 2; _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, newColumnNames, newColumnTypes)); _transform = CreateTemplatedTransform(); }
public DBScanTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { Host.CheckValue(args, "args"); if (args.epsilon < 0) { Contracts.Check(false, "Parameter epsilon must be positive or null."); } if (args.minPoints <= 0) { Contracts.Check(false, "Parameter minPoints must be positive."); } _args = args; _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new string[] { args.outCluster, args.outScore }, new DataViewType[] { NumberDataViewType.Int32, NumberDataViewType.Single })); _transform = CreateTemplatedTransform(); }
DataViewSchema ComputeExtendedSchema() { int index; Func <string, DataViewType> getType = (string col) => { var schema = _input.Schema; index = SchemaHelper.GetColumnIndex(schema, col); return(schema[index].Type); }; var iterCols = _args.columns.Where(c => c.Name != c.Source); return(iterCols.Any() ? ExtendedSchema.Create(new ExtendedSchema(_input.Schema, iterCols.Select(c => c.Name).ToArray(), iterCols.Select(c => getType(c.Source)).ToArray())) : _input.Schema); }
public DeTrendTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { Host.CheckValue(args, "args"); _args = args; if (_args.columns == null || _args.columns.Length != 1) { Host.ExceptUserArg(nameof(_args.columns), "One column must be specified."); } SchemaHelper.GetColumnIndex(input.Schema, args.timeColumn); SchemaHelper.GetColumnIndex(input.Schema, args.columns[0].Source); _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new[] { _args.columns[0].Name }, new[] { NumberDataViewType.Single /*input.Schema.GetColumnType(index)*/ })); _trend = null; _transform = null; _lock = new object(); }
private DeTrendTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { Host.CheckValue(input, "input"); Host.CheckValue(ctx, "ctx"); _args = new Arguments(); _args.Read(ctx, Host); ctx.LoadModel <IPredictor, SignatureLoadModel>(host, out _trend, "trend"); if (_args.columns == null || _args.columns.Length != 1) { Host.ExceptUserArg(nameof(_args.columns), "One column must be specified."); } int index = SchemaHelper.GetColumnIndex(input.Schema, _args.columns[0].Source); _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new[] { _args.columns[0].Name }, new[] { NumberDataViewType.Single /*input.Schema.GetColumnType(index)*/ })); _lock = new object(); _transform = BuildTransform(_trend); }
public PolynomialState(IHostEnvironment host, IDataView input, Arguments args, Func <TInput, TInput, TInput> multiplication) { _host = host.Register("PolynomialState"); _host.CheckValue(input, "input"); _input = input; // _lock = new object(); _args = args; _multiplication = multiplication; var column = _args.columns[0]; var schema = input.Schema; using (var ch = _host.Start("PolynomialState")) { _inputCol = SchemaHelper.GetColumnIndexDC(schema, column.Source); var type = schema[_inputCol.Index].Type; if (!type.IsVector()) { throw _host.Except("Input column type must be a vector."); } int dim = type.AsVector().DimCount(); if (dim > 1) { throw _host.Except("Input column type must be a vector of one dimension."); } int size = dim > 0 ? type.AsVector().GetDim(0) : 0; if (size > 0) { size = TotalCumulated[_args.degree](size); } ch.Trace("PolynomialTransform {0}->{1}.", dim, size); // We extend the input schema. The new type has the same type as the input. _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new[] { column.Name }, new[] { new VectorDataViewType(type.AsVector().ItemType(), size) })); } }
DataViewSchema ComputeExtendedSchema() { return(ExtendedSchema.Create(new ExtendedSchema(_input.Schema, new string[] { _args.distColumn, _args.idNeighborsColumn }, new DataViewType[] { new VectorDataViewType(NumberDataViewType.Single, _args.k), new VectorDataViewType(NumberDataViewType.Int64, _args.k) }))); }
public ShakeInputState(IHostEnvironment host, IDataView input, IValueMapper[] toShake, Arguments args) { _host = host.Register("ShakeInputState"); _host.CheckValue(input, "input"); _input = input; _lock = new object(); _args = args; _toShake = toShake; foreach (var vm in toShake) { if (vm.OutputType.IsVector() && vm.OutputType.AsVector().DimCount() > 1) { throw _host.Except("If a ValueMapper return a vector, it should have one dimension or zero."); } } _inputCol = SchemaHelper.GetColumnIndexDC(_input.Schema, _args.inputColumn); _shakingValues = ExtractShakingValues(); if (_shakingValues.Length != _args.inputFeaturesInt.Length) { throw _host.Except("Shaking Values and columns to shake do not have the same dimension {0} and '{1}'.", _args.inputFeaturesInt.Length, _args.values); } var colTypes = new List <DataViewType>(); switch (_args.aggregation) { case ShakeAggregation.concatenate: int m = 1; foreach (var shakeVal in _shakingValues) { m *= shakeVal.Length; } if (m == 0) { throw _host.Except("No shaking values ('{0}')", _args.values); } foreach (var c in toShake) { var vt = c.OutputType.IsVector() ? new VectorDataViewType(c.OutputType.ItemType().AsPrimitive(), c.OutputType.AsVector().DimCount() == 0 ? 0 : c.OutputType.AsVector().GetDim(0) * m) : new VectorDataViewType(c.OutputType.AsPrimitive(), m); colTypes.Add(vt); } break; case ShakeAggregation.add: foreach (var c in toShake) { var vt = c.OutputType.IsVector() ? new VectorDataViewType(c.OutputType.ItemType().AsPrimitive(), c.OutputType.AsVector().DimCount() == 0 ? 0 : c.OutputType.AsVector().GetDim(0)) : new VectorDataViewType(c.OutputType.AsPrimitive(), 1); colTypes.Add(vt); } break; default: throw _host.ExceptNotSupp("Unknown aggregatino strategy {0}", _args.aggregation); } _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, args.outputColumns, colTypes.ToArray())); }