/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="transform">transform to convert</param> /// <param name="inputColumn">input column of the mapper</param> /// <param name="outputColumn">output column of the mapper</param> /// <param name="sourceToReplace">source to replace</param> /// <param name="conc">number of concurrency threads</param> /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param> public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform, string inputColumn, string outputColumn, IDataView sourceToReplace = null, int conc = 1, bool ignoreOtherColumn = false) { Contracts.AssertValue(env); Contracts.AssertValue(transform); _env = env; _transform = transform; _sourceToReplace = sourceToReplace; _outputColumn = outputColumn; _ignoreOtherColumn = ignoreOtherColumn; _toDispose = new List <IDisposable>(); var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform); int index = SchemaHelper.GetColumnIndex(firstView.Schema, inputColumn); _inputIndex = index; index = SchemaHelper.GetColumnIndex(transform.Schema, outputColumn); _outputType = _transform.Schema[index].Type; _disposeEnv = conc > 0; _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env; }
/// <summary> /// Enumerates all variables in all transforms in a pipeline. /// </summary> /// <param name="trans"><see cref="IDataTransform"/></param> /// <returns>iterator of 4-uple: column name, data view, unique column name, column type</returns> public static IEnumerable <GaphViewNode> EnumerateVariables(IDataTransform trans, IDataView[] begin = null) { var unique = new HashSet <string>(); foreach (var view in TagHelper.EnumerateAllViews(trans, begin)) { var sch = view.Item1.Schema; for (int i = 0; i < sch.ColumnCount; ++i) { var name = sch.GetColumnName(i); var prop = name; int k = 1; while (unique.Contains(prop)) { prop = $"{name}_{k}"; ++k; } unique.Add(prop); yield return(new GaphViewNode() { variableName = name, view = view.Item1, position = view.Item2, variableType = sch.GetColumnType(i) }); } } }
async UniTask Test(byte[] data, IDataTransform dataTransform) { var applied = await dataTransform.ApplyAsync(data); var result = await dataTransform.ReverseAsync(applied); Assert.AreEqual(data, result); }
private protected SequentialTransformBase(IHostEnvironment env, ModelLoadContext ctx, string name, IDataView input) : base(env, name, input) { Host.CheckValue(ctx, nameof(ctx)); // *** Binary format *** // int: _windowSize // int: _initialWindowSize // int (string ID): _inputColumnName // int (string ID): _outputColumnName // ColumnType: _transform.Schema.GetColumnType(0) var windowSize = ctx.Reader.ReadInt32(); Host.CheckDecode(windowSize >= 0); var initialWindowSize = ctx.Reader.ReadInt32(); Host.CheckDecode(initialWindowSize >= 0); var inputColumnName = ctx.LoadNonEmptyString(); var outputColumnName = ctx.LoadNonEmptyString(); InputColumnName = inputColumnName; OutputColumnName = outputColumnName; InitialWindowSize = initialWindowSize; WindowSize = windowSize; BinarySaver bs = new BinarySaver(Host, new BinarySaver.Arguments()); ColumnType ct = bs.LoadTypeDescriptionOrNull(ctx.Reader.BaseStream); _transform = CreateLambdaTransform(Host, input, InputColumnName, OutputColumnName, InitFunction, WindowSize > 0, ct); }
public void TestChainTransformSerialize() { using (var host = EnvHelper.NewTestEnvironment()) { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; IDataView loader = host.CreateStreamingDataView(inputs); IDataTransform data = host.CreateTransform("Scaler{col=X4:X}", loader); data = host.CreateTransform("ChainTrans{ xf1=Scaler{col=X2:X} xf2=Poly{col=X3:X2} }", data); // We create a specific folder in build/UnitTest which will contain the output. var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public ResampleTransform(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, "env"); _host = env.Register("ResampleTransform"); _host.CheckValue(args, "args"); // Checks values are valid. _host.CheckValue(input, "input"); _host.Check(args.lambda > 0, "lambda must be > 0"); _input = input; _args = args; _cacheReplica = null; if (!string.IsNullOrEmpty(_args.column)) { int index; if (!_input.Schema.TryGetColumnIndex(_args.column, out index)) { throw _host.Except("Unable to find column '{0}' in\n'{1}'.", _args.column, SchemaHelper.ToString(_input.Schema)); } if (string.IsNullOrEmpty(_args.classValue)) { throw _host.Except("Class value cannot be null."); } } _transform = CreateTemplatedTransform(); }
private DeTrendTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { Host.CheckValue(input, "input"); Host.CheckValue(ctx, "ctx"); _args = new Arguments(); _args.Read(ctx, Host); ctx.LoadModel <IPredictor, SignatureLoadModel>(host, out _trend, "trend"); int index; if (_args.columns == null || _args.columns.Length != 1) { Host.ExceptUserArg(nameof(_args.columns), "One column must be specified."); } if (!input.Schema.TryGetColumnIndex(_args.columns[0].Source, out index)) { Host.ExceptUserArg(nameof(Column1x1.Source)); } _schema = Schema.Create(new ExtendedSchema(input.Schema, new[] { _args.columns[0].Name }, new[] { NumberType.R4 /*input.Schema.GetColumnType(index)*/ })); _lock = new object(); _transform = BuildTransform(_trend); }
private void GetPipe(IChannel ch, IDataView end, out IDataView source, out IDataView trueEnd, out LinkedList <ITransformCanSaveOnnx> transforms) { Host.AssertValue(end); source = trueEnd = (end as CompositeDataLoader)?.View ?? end; IDataTransform transform = source as IDataTransform; transforms = new LinkedList <ITransformCanSaveOnnx>(); while (transform != null) { ITransformCanSaveOnnx onnxTransform = transform as ITransformCanSaveOnnx; if (onnxTransform == null || !onnxTransform.CanSaveOnnx) { ch.Warning("Had to stop walkback of pipeline at {0} since it cannot save itself as ONNX.", transform.GetType().Name); while (source as IDataTransform != null) { source = (source as IDataTransform).Source; } return; } transforms.AddFirst(onnxTransform); transform = (source = transform.Source) as IDataTransform; } Host.AssertValue(source); }
/// <summary> /// Attempt to apply the data transform to a different data view source. /// If the transform in question implements <see cref="ITransformTemplate"/>, <see cref="ITransformTemplate.ApplyToData"/> /// is called. Otherwise, the transform is serialized into a byte array and then deserialized. /// </summary> /// <param name="env">The host to use</param> /// <param name="transform">The transform to apply.</param> /// <param name="newSource">The data view to apply the transform to.</param> /// <returns>The resulting data view.</returns> public static IDataTransform ApplyTransformToData(IHostEnvironment env, IDataTransform transform, IDataView newSource) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(transform, nameof(transform)); env.CheckValue(newSource, nameof(newSource)); var rebindable = transform as ITransformTemplate; if (rebindable != null) { return(rebindable.ApplyToData(env, newSource)); } // Revert to serialization. using (var stream = new MemoryStream()) { using (var rep = RepositoryWriter.CreateNew(stream, env)) { ModelSaveContext.SaveModel(rep, transform, "model"); rep.Commit(); } stream.Position = 0; using (var rep = RepositoryReader.Open(stream, env)) { IDataTransform newData; ModelLoadContext.LoadModel <IDataTransform, SignatureLoadDataTransform>(env, out newData, rep, "model", newSource); return(newData); } } }
public DeTrendTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { Host.CheckValue(args, "args"); _args = args; int index; if (_args.columns == null || _args.columns.Length != 1) { Host.ExceptUserArg(nameof(_args.columns), "One column must be specified."); } if (!input.Schema.TryGetColumnIndex(args.timeColumn, out index)) { Host.ExceptUserArg(nameof(_args.timeColumn)); } if (!input.Schema.TryGetColumnIndex(args.columns[0].Source, out index)) { Host.ExceptUserArg(nameof(Column1x1.Source)); } _schema = Schema.Create(new ExtendedSchema(input.Schema, new[] { _args.columns[0].Name }, new[] { NumberType.R4 /*input.Schema.GetColumnType(index)*/ })); _trend = null; _transform = null; _lock = new object(); }
/// <summary> /// Guesses all inputs in a pipeline. /// If empty, the function looks into every view with no predecessor. /// </summary> public static void GuessInputs(IDataTransform trans, ref string[] inputs, IDataView[] begin = null) { var vars = EnumerateVariables(trans, begin).Where(c => c.position == TagHelper.GraphPositionEnum.first).ToArray(); if (inputs == null) { inputs = vars.Select(c => c.variableName).ToArray(); var has = new HashSet <string>(inputs); if (has.Count != inputs.Length) { throw Contracts.Except($"One column is duplicated."); } } else { var has = new HashSet <string>(inputs); if (has.Count != inputs.Length) { throw Contracts.Except($"One column is duplicated."); } has = new HashSet <string>(vars.Select(c => c.variableName)); foreach (var inp in inputs) { if (!has.Contains(inp)) { throw Contracts.Except($"Unable to find column '{inp}' in {string.Join(", ", has.OrderBy(c => c))}."); } } } }
public CombinedDataTransform(IDataTransform firstTransform, IDataTransform secondTransform) { _firstTransform = firstTransform ?? throw new ArgumentNullException(nameof(firstTransform)); _secondTransform = secondTransform ?? throw new ArgumentNullException(nameof(secondTransform)); }
// Factory method for SignatureDataTransform. private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) { Contracts.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); Contracts.CheckValue(input, nameof(input)); IDataTransform res = null; var h = env.Register(RegistrationName); using (var ch = h.Start("Loading Model")) { // *** Binary format *** // number of row functions // row functions (each in a separate folder) var numFunctions = ctx.Reader.ReadInt32(); Contracts.CheckDecode(numFunctions > 0); for (int i = 0; i < numFunctions; i++) { var modelName = string.Format("Model_{0:000}", i); ctx.LoadModel <IDataTransform, SignatureLoadDataTransform>(env, out res, modelName, input); input = res; } } return(res); }
public void DataTransform_ApplyReverse(IDataTransform dataTransform) { var applied = dataTransform.Apply(TestByteData); var result = dataTransform.Reverse(applied); Assert.AreEqual(TestByteData, result); }
public void Initialize() { // set up the input _dataInput = new DataInput { ColumnNames = new[] { "Integer", "GUID" }, Rows = new List <IList <string> >() }; for (var x = 0; x < 5; x++) { var row = new List <string> { x.ToString(), Guid.NewGuid().ToString() }; _dataInput.Rows.Add(row); } _provider = new TestDataProvider(_dataInput); _exporters = new[] { new TestDataExporter(), new TestDataExporter() }; _target = new DataTransform { Input = _provider, Output = _exporters }; }
/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="transform">transform to convert</param> /// <param name="inputColumn">input column of the mapper</param> /// <param name="outputColumn">output column of the mapper</param> /// <param name="sourceToReplace">source to replace</param> /// <param name="conc">number of concurrency threads</param> /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param> public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform, string inputColumn, string outputColumn, IDataView sourceToReplace = null, int conc = 1, bool ignoreOtherColumn = false) { Contracts.AssertValue(env); Contracts.AssertValue(transform); _env = env; _transform = transform; _sourceToReplace = sourceToReplace; _outputColumn = outputColumn; _ignoreOtherColumn = ignoreOtherColumn; _toDispose = new List <IDisposable>(); var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform); int index; if (!firstView.Schema.TryGetColumnIndex(inputColumn, out index)) { throw env.Except("Unable to find column '{0}' in input schema '{1}'.", inputColumn, SchemaHelper.ToString(firstView.Schema)); } _inputIndex = index; if (!transform.Schema.TryGetColumnIndex(outputColumn, out index)) { throw env.Except("Unable to find column '{0}' in output schema '{1}'.", outputColumn, SchemaHelper.ToString(transform.Schema)); } _outputType = _transform.Schema.GetColumnType(index); _disposeEnv = conc > 0; _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env; }
/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="mapper">IValueMapper</param> /// <param name="source">source to replace</param> /// <param name="inputColumn">name of the input column (the last one sharing the same type)</param> /// <param name="outputColumn">name of the output column</param> public TransformFromValueMapper(IHostEnvironment env, IValueMapper mapper, IDataView source, string inputColumn, string outputColumn = "output") { Contracts.AssertValue(env); Contracts.AssertValue(mapper); Contracts.AssertValue(source); _host = env; if (string.IsNullOrEmpty(inputColumn)) { var inputType = mapper.InputType; for (int i = source.Schema.Count - 1; i >= 0; --i) { var ty = source.Schema[i].Type; if (ty.SameSizeAndItemType(inputType)) { inputColumn = source.Schema[i].Name; break; } } } _input = source; _mapper = mapper; int index = SchemaHelper.GetColumnIndex(_input.Schema, inputColumn); _inputColumn = inputColumn; _outputColumn = outputColumn; _schema = ExtendedSchema.Create(new ExtendedSchema(source.Schema, new[] { outputColumn }, new[] { mapper.OutputType })); _transform = CreateMemoryTransform(); }
public void AddTransform(IDataTransform tr) { if (tr.Source != Source) { throw Contracts.ExceptNotSupp("Source of the transform must be this StreamingDataFrame."); } _source = tr; }
public DataTransformSerializationProvider(ISerializationProvider baseProvider, IDataTransform dataTransform) { _baseProvider = baseProvider ?? throw new ArgumentNullException(nameof(baseProvider)); _dataTransform = dataTransform ?? throw new ArgumentNullException(nameof(dataTransform)); }
public void SerializationProvider_ThrowsArgumentNullException(ISerializationProvider serializationProvider, IDataTransform dataTransform) { Assert.Throws <ArgumentNullException>(() => { var result = new DataTransformSerializationProvider(serializationProvider, dataTransform); }); }
/// <summary> /// Saves a model in a zip file. /// </summary> public static void SaveModel(IHostEnvironment env, IDataTransform tr, string outModelFilePath) { using (var ch = env.Start("SaveModel")) using (var fs = File.Create(outModelFilePath)) { var trainingExamples = env.CreateExamples(tr, null); TrainUtils.SaveModel(env, ch, fs, null, trainingExamples); } }
public SequentialDataTransform(IHost host, SequentialTransformerBase <TInput, TOutput, TState> parent, IDataView input, IRowMapper mapper) : base(parent.Host, input) { _parent = parent; _transform = CreateLambdaTransform(_parent.Host, input, _parent.InputColumnName, _parent.OutputColumnName, InitFunction, _parent.WindowSize > 0, _parent.OutputColumnType); _mapper = mapper; _bindings = new ColumnBindings(Schema.Create(input.Schema), _mapper.GetOutputColumns()); }
public TransformEx(string tag, string argsString, IDataTransform transform) { Contracts.AssertNonEmpty(tag); Contracts.AssertValueOrNull(argsString); Contracts.AssertValue(transform, "transform"); Tag = tag; ArgsString = argsString; Transform = transform; }
/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="transform">transform to convert</param> /// <param name="sourceToReplace">source to replace</param> /// <param name="conc">number of concurrency threads</param> public ValueMapperDataFrameFromTransform(IHostEnvironment env, IDataTransform transform, IDataView sourceToReplace = null) { Contracts.AssertValue(env); Contracts.AssertValue(transform); _env = env; _transform = transform; _sourceToReplace = sourceToReplace; _disposeEnv = false; _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, verbose: false) : env; }
private OpticsTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { Host.CheckValue(input, "input"); Host.CheckValue(ctx, "ctx"); _args = new Arguments(); _args.Read(ctx, Host); _schema = ExtendedSchema.Create(new ExtendedSchema(input.Schema, new string[] { _args.outCluster, _args.outScore }, new DataViewType[] { NumberDataViewType.Int32, NumberDataViewType.Single })); _transform = CreateTemplatedTransform(); }
private OpticsOrderingTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { Host.CheckValue(input, "input"); Host.CheckValue(ctx, "ctx"); _args = new Arguments(); _args.Read(ctx, Host); _schema = Schema.Create(new ExtendedSchema(input.Schema, new string[] { _args.outOrdering, _args.outReachabilityDistance, _args.outCoreDistance }, new ColumnType[] { NumberType.I8, NumberType.R4, NumberType.R4 })); _transform = CreateTemplatedTransform(); }
private PrePostProcessPredictor(IHost host, ModelLoadContext ctx) { Contracts.CheckValue(host, nameof(host)); _host = host; _inputColumn = ctx.Reader.ReadString(); var type = SchemaHelper.ReadType(ctx); _outputColumn = ctx.Reader.ReadString(); DataViewSchema schema; IDataView data; if (type.IsVector()) { switch (type.AsVector().ItemType().RawKind()) { case DataKind.Single: schema = ExtendedSchema.Create(new ExtendedSchema((ISchema)null, new[] { _inputColumn }, new[] { new VectorDataViewType(NumberDataViewType.Single) })); data = new TemporaryViewCursorColumn <VBuffer <float> >(default(VBuffer <float>), 0, schema); break; default: throw Contracts.Except("Unable to create a temporary view from type '{0}'", type); } } else { switch (type.RawKind()) { case DataKind.Single: schema = ExtendedSchema.Create(new ExtendedSchema((ISchema)null, new[] { _inputColumn }, new[] { NumberDataViewType.Single })); data = new TemporaryViewCursorColumn <float>(default(float), 0, schema); break; default: throw Contracts.Except("Unable to create a temporary view from type '{0}'", type); } } ctx.LoadModel <IDataTransform, SignatureLoadDataTransform>(_host, out _preProcess, "_preProcess", data); ctx.LoadModel <IPredictor, SignatureLoadModel>(_host, out _predictor, "_predictor"); var hasPost = ctx.Reader.ReadBoolByte(); if (hasPost) { ctx.LoadModel <IDataTransform, SignatureLoadDataTransform>(_host, out _postProcess, "_postProcess", _transformFromPredictor); } else { _postProcess = null; } _transformFromPredictor = new TransformFromValueMapper(_host, _predictor as IValueMapper, _preProcess, _inputColumn, _outputColumn); }
ValueMapper <TSrc, TDst> GetMapperWithTransform <TSrc, TMiddle, TDst>(IDataTransform trans) { var mapperPreVM = new ValueMapperFromTransformFloat <TMiddle>(_host, trans, _inputColumn, _inputColumn, trans.Source); var mapperPre = mapperPreVM.GetMapper <TSrc, TMiddle>(); var mapperPred = (_predictor as IValueMapper).GetMapper <TMiddle, TDst>(); TMiddle middle = default(TMiddle); return((in TSrc src, ref TDst dst) => { mapperPre(in src, ref middle); mapperPred(in middle, ref dst); });
private ULabelToR4LabelTransform(IHost host, ModelLoadContext ctx, IDataView input) { Contracts.CheckValue(host, "host"); Contracts.CheckValue(input, "input"); _host = host; _input = input; _host.CheckValue(input, "input"); _host.CheckValue(ctx, "ctx"); _args = new Arguments(); _args.Read(ctx, _host); _transform = CreateTemplatedTransform(); }
/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="transform">transform to convert</param> /// <param name="sourceToReplace">source to replace</param> /// <param name="conc">number of concurrency threads</param> public ValueMapperFromTransform(IHostEnvironment env, IDataTransform transform, IDataView sourceToReplace = null, int conc = 1) { Contracts.AssertValue(env); Contracts.AssertValue(transform); _env = env; _transform = transform; _sourceToReplace = sourceToReplace; _disposeEnv = conc > 0; _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env; _valueMapperDispose = GetMapperDispose(); }
// When we also have an actual object in memory and its associated // environment object internal TransformInstance( int classType, string name, IDataTransform instance, TransformEnvironment environment ) : this(classType, name, instance, environment, null, null ) {;}
public void AppendDataTransform(IDataTransform dataTransform) { InnerList.Add(dataTransform); }
// When we know everything to put into a TransformInstance. internal TransformInstance( int classType, string name, IDataTransform instance, TransformEnvironment environment, Stream primaryStream, StorageInfo storage ) { typeName = name; transformReference = instance; transformEnvironment = environment; transformPrimaryStream = primaryStream; transformStorage = storage; _classType = classType; }
internal TransformInitializationEventArgs( IDataTransform instance, string dataSpaceInstanceLabel, string transformedStreamPath, string transformInstanceLabel ) { dataInstance = instance; dataSpaceLabel = dataSpaceInstanceLabel; streamPath = transformedStreamPath; transformLabel = transformInstanceLabel; }