Beispiel #1
0
        ValueMapperDispose <TRowInput, TRowOutput> GetMapperDispose()
        {
            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform);
            var schema    = SchemaDefinition.Create(typeof(TRowOutput), SchemaDefinition.Direction.Read);
            var inputView = new InfiniteLoopViewCursorRow <TRowInput>(null, firstView.Schema,
                                                                      overwriteRowGetter: GetterSetterHelper.GetGetter <TRowInput>());

            // This is extremely time consuming as the transform is serialized and deserialized.
            var outputView = _sourceToReplace == _transform.Source
                                ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView)
                                : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace);

            // We assume all columns are needed, otherwise they should be removed.
            using (var cur = outputView.GetRowCursor(i => true))
            {
                Delegate[] dels;
                try
                {
                    dels = new TRowOutput().GetCursorGetter(cur);
                }
                catch (InvalidOperationException e)
                {
                    throw new InvalidOperationException($"Unable to create getter for the schema\n{SchemaHelper.ToString(cur.Schema)}", e);
                }

                return(new ValueMapperDispose <TRowInput, TRowOutput>((in TRowInput src, ref TRowOutput dst) =>
                {
                    inputView.Set(in src);
                    cur.MoveNext();
                    dst.Set(dels);
                }, new IDisposable[] { cur }));
Beispiel #2
0
        ValueMapperDispose <TSrc, TDst> GetMapperDispose <TSrc, TDst>()
        {
            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform);
            var inputView = new InfiniteLoopViewCursorColumn <TSrc>(_inputIndex, firstView.Schema, ignoreOtherColumn: _ignoreOtherColumn);

            // This is extremely time consuming as the transform is serialized and deserialized.
            var outputView = _sourceToReplace == _transform.Source
                                ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView)
                                : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView,
                                                                               _sourceToReplace);
            int index;

            if (!outputView.Schema.TryGetColumnIndex(_outputColumn, out index))
            {
                throw _env.Except("Unable to find column '{0}' in output schema.", _outputColumn);
            }
            int newOutputIndex = index;
            var cur            = outputView.GetRowCursor(i => i == newOutputIndex);
            var getter         = cur.GetGetter <TDst>(newOutputIndex);

            if (getter == null)
            {
                throw _env.Except("Unable to get a getter on the transform for type {0}", default(TDst).GetType());
            }
            return(new ValueMapperDispose <TSrc, TDst>((in TSrc src, ref TDst dst) =>
            {
                inputView.Set(in src);
                cur.MoveNext();
                getter(ref dst);
            }, new IDisposable[] { cur }));
Beispiel #3
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="env">environment like ConsoleEnvironment</param>
        /// <param name="transform">transform to convert</param>
        /// <param name="inputColumn">input column of the mapper</param>
        /// <param name="outputColumn">output column of the mapper</param>
        /// <param name="sourceToReplace">source to replace</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param>
        public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform,
                                             string inputColumn, string outputColumn,
                                             IDataView sourceToReplace = null, int conc = 1,
                                             bool ignoreOtherColumn    = false)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(transform);
            _env               = env;
            _transform         = transform;
            _sourceToReplace   = sourceToReplace;
            _outputColumn      = outputColumn;
            _ignoreOtherColumn = ignoreOtherColumn;
            _toDispose         = new List <IDisposable>();

            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform);

            int index;

            if (!firstView.Schema.TryGetColumnIndex(inputColumn, out index))
            {
                throw env.Except("Unable to find column '{0}' in input schema '{1}'.",
                                 inputColumn, SchemaHelper.ToString(firstView.Schema));
            }
            _inputIndex = index;
            if (!transform.Schema.TryGetColumnIndex(outputColumn, out index))
            {
                throw env.Except("Unable to find column '{0}' in output schema '{1}'.",
                                 outputColumn, SchemaHelper.ToString(transform.Schema));
            }
            _outputType = _transform.Schema.GetColumnType(index);

            _disposeEnv = conc > 0;
            _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env;
        }
Beispiel #4
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="env">environment like ConsoleEnvironment</param>
        /// <param name="transform">transform to convert</param>
        /// <param name="inputColumn">input column of the mapper</param>
        /// <param name="outputColumn">output column of the mapper</param>
        /// <param name="sourceToReplace">source to replace</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param>
        public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform,
                                             string inputColumn, string outputColumn,
                                             IDataView sourceToReplace = null, int conc = 1,
                                             bool ignoreOtherColumn    = false)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(transform);
            _env               = env;
            _transform         = transform;
            _sourceToReplace   = sourceToReplace;
            _outputColumn      = outputColumn;
            _ignoreOtherColumn = ignoreOtherColumn;
            _toDispose         = new List <IDisposable>();

            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform);

            int index = SchemaHelper.GetColumnIndex(firstView.Schema, inputColumn);

            _inputIndex = index;
            index       = SchemaHelper.GetColumnIndex(transform.Schema, outputColumn);
            _outputType = _transform.Schema[index].Type;

            _disposeEnv = conc > 0;
            _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env;
        }
Beispiel #5
0
        ValueMapper <DataFrame, DataFrame> GetMapperRow()
        {
            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform);
            var schema    = firstView.Schema;

            var inputView = new InfiniteLoopViewCursorDataFrame(null, firstView.Schema);

            // This is extremely time consuming as the transform is serialized and deserialized.
            var outputView = _sourceToReplace == _transform.Source
                                ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView)
                                : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace);

            // We assume all columns are needed, otherwise they should be removed.
            using (var cur = outputView.GetRowCursor(i => true))
            {
                var getRowFiller = DataFrame.GetRowFiller(cur);

                return((in DataFrame src, ref DataFrame dst) =>
                {
                    if (dst is null)
                    {
                        dst = new DataFrame(outputView.Schema, src.Length);
                    }
                    else if (!dst.CheckSharedSchema(outputView.Schema))
                    {
                        throw _env.Except($"DataFrame does not share the same schema, expected {SchemaHelper.ToString(outputView.Schema)}.");
                    }
                    dst.Resize(src.Length);

                    inputView.Set(src);
                    for (int i = 0; i < src.Length; ++i)
                    {
                        cur.MoveNext();
                        getRowFiller(dst, i);
                    }
                });