ValueMapperDispose <TRowInput, TRowOutput> GetMapperDispose() { var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform); var schema = SchemaDefinition.Create(typeof(TRowOutput), SchemaDefinition.Direction.Read); var inputView = new InfiniteLoopViewCursorRow <TRowInput>(null, firstView.Schema, overwriteRowGetter: GetterSetterHelper.GetGetter <TRowInput>()); // This is extremely time consuming as the transform is serialized and deserialized. var outputView = _sourceToReplace == _transform.Source ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView) : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace); // We assume all columns are needed, otherwise they should be removed. using (var cur = outputView.GetRowCursor(i => true)) { Delegate[] dels; try { dels = new TRowOutput().GetCursorGetter(cur); } catch (InvalidOperationException e) { throw new InvalidOperationException($"Unable to create getter for the schema\n{SchemaHelper.ToString(cur.Schema)}", e); } return(new ValueMapperDispose <TRowInput, TRowOutput>((in TRowInput src, ref TRowOutput dst) => { inputView.Set(in src); cur.MoveNext(); dst.Set(dels); }, new IDisposable[] { cur }));
ValueMapperDispose <TSrc, TDst> GetMapperDispose <TSrc, TDst>() { var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform); var inputView = new InfiniteLoopViewCursorColumn <TSrc>(_inputIndex, firstView.Schema, ignoreOtherColumn: _ignoreOtherColumn); // This is extremely time consuming as the transform is serialized and deserialized. var outputView = _sourceToReplace == _transform.Source ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView) : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace); int index; if (!outputView.Schema.TryGetColumnIndex(_outputColumn, out index)) { throw _env.Except("Unable to find column '{0}' in output schema.", _outputColumn); } int newOutputIndex = index; var cur = outputView.GetRowCursor(i => i == newOutputIndex); var getter = cur.GetGetter <TDst>(newOutputIndex); if (getter == null) { throw _env.Except("Unable to get a getter on the transform for type {0}", default(TDst).GetType()); } return(new ValueMapperDispose <TSrc, TDst>((in TSrc src, ref TDst dst) => { inputView.Set(in src); cur.MoveNext(); getter(ref dst); }, new IDisposable[] { cur }));
/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="transform">transform to convert</param> /// <param name="inputColumn">input column of the mapper</param> /// <param name="outputColumn">output column of the mapper</param> /// <param name="sourceToReplace">source to replace</param> /// <param name="conc">number of concurrency threads</param> /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param> public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform, string inputColumn, string outputColumn, IDataView sourceToReplace = null, int conc = 1, bool ignoreOtherColumn = false) { Contracts.AssertValue(env); Contracts.AssertValue(transform); _env = env; _transform = transform; _sourceToReplace = sourceToReplace; _outputColumn = outputColumn; _ignoreOtherColumn = ignoreOtherColumn; _toDispose = new List <IDisposable>(); var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform); int index; if (!firstView.Schema.TryGetColumnIndex(inputColumn, out index)) { throw env.Except("Unable to find column '{0}' in input schema '{1}'.", inputColumn, SchemaHelper.ToString(firstView.Schema)); } _inputIndex = index; if (!transform.Schema.TryGetColumnIndex(outputColumn, out index)) { throw env.Except("Unable to find column '{0}' in output schema '{1}'.", outputColumn, SchemaHelper.ToString(transform.Schema)); } _outputType = _transform.Schema.GetColumnType(index); _disposeEnv = conc > 0; _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env; }
/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="transform">transform to convert</param> /// <param name="inputColumn">input column of the mapper</param> /// <param name="outputColumn">output column of the mapper</param> /// <param name="sourceToReplace">source to replace</param> /// <param name="conc">number of concurrency threads</param> /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param> public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform, string inputColumn, string outputColumn, IDataView sourceToReplace = null, int conc = 1, bool ignoreOtherColumn = false) { Contracts.AssertValue(env); Contracts.AssertValue(transform); _env = env; _transform = transform; _sourceToReplace = sourceToReplace; _outputColumn = outputColumn; _ignoreOtherColumn = ignoreOtherColumn; _toDispose = new List <IDisposable>(); var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform); int index = SchemaHelper.GetColumnIndex(firstView.Schema, inputColumn); _inputIndex = index; index = SchemaHelper.GetColumnIndex(transform.Schema, outputColumn); _outputType = _transform.Schema[index].Type; _disposeEnv = conc > 0; _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env; }
ValueMapper <DataFrame, DataFrame> GetMapperRow() { var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform); var schema = firstView.Schema; var inputView = new InfiniteLoopViewCursorDataFrame(null, firstView.Schema); // This is extremely time consuming as the transform is serialized and deserialized. var outputView = _sourceToReplace == _transform.Source ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView) : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace); // We assume all columns are needed, otherwise they should be removed. using (var cur = outputView.GetRowCursor(i => true)) { var getRowFiller = DataFrame.GetRowFiller(cur); return((in DataFrame src, ref DataFrame dst) => { if (dst is null) { dst = new DataFrame(outputView.Schema, src.Length); } else if (!dst.CheckSharedSchema(outputView.Schema)) { throw _env.Except($"DataFrame does not share the same schema, expected {SchemaHelper.ToString(outputView.Schema)}."); } dst.Resize(src.Length); inputView.Set(src); for (int i = 0; i < src.Length; ++i) { cur.MoveNext(); getRowFiller(dst, i); } });