コード例 #1
0
        public void TestI_ResampleSerialization()
        {
            var methodName         = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var dataFilePath       = FileHelper.GetTestFile("iris.txt");
            var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 " +
                                              "col=Pwidth:R4:4 header=+ sep=tab}",
                                              new MultiFileSource(dataFilePath));
                var sorted = env.CreateTransform("resample{lambda=1 c=-}", loader);
                DataViewHelper.ToCsv(env, sorted, outputDataFilePath);

                var lines = File.ReadAllLines(outputDataFilePath);
                int begin = 0;
                for (; begin < lines.Length; ++begin)
                {
                    if (lines[begin].StartsWith("Label"))
                    {
                        break;
                    }
                }
                lines = lines.Skip(begin).ToArray();
                var linesSorted = lines.OrderBy(c => c).ToArray();
                for (int i = 1; i < linesSorted.Length; ++i)
                {
                    if (linesSorted[i - 1][0] > linesSorted[i][0])
                    {
                        throw new Exception("The output is not sorted.");
                    }
                }
            }
        }
コード例 #2
0
        ValueMapperDispose <TRowInput, TRowOutput> GetMapperDispose()
        {
            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform);
            var schema    = SchemaDefinition.Create(typeof(TRowOutput), SchemaDefinition.Direction.Read);
            var inputView = new InfiniteLoopViewCursorRow <TRowInput>(null, firstView.Schema,
                                                                      overwriteRowGetter: GetterSetterHelper.GetGetter <TRowInput>());

            // This is extremely time consuming as the transform is serialized and deserialized.
            var outputView = _sourceToReplace == _transform.Source
                                ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView)
                                : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace);

            // We assume all columns are needed, otherwise they should be removed.
            using (var cur = outputView.GetRowCursor(i => true))
            {
                Delegate[] dels;
                try
                {
                    dels = new TRowOutput().GetCursorGetter(cur);
                }
                catch (InvalidOperationException e)
                {
                    throw new InvalidOperationException($"Unable to create getter for the schema\n{SchemaHelper.ToString(cur.Schema)}", e);
                }

                return(new ValueMapperDispose <TRowInput, TRowOutput>((in TRowInput src, ref TRowOutput dst) =>
                {
                    inputView.Set(in src);
                    cur.MoveNext();
                    dst.Set(dels);
                }, new IDisposable[] { cur }));
コード例 #3
0
        public void ValueMapOneValueTest()
        {
            var data = new[] { new TestClass()
                               {
                                   A = "bar", B = "test", C = "foo"
                               } };
            var dataView = ML.Data.LoadFromEnumerable(data);

            var keys = new List <string>()
            {
                "foo", "bar", "test", "wahoo"
            };
            var values = new List <int>()
            {
                1, 2, 3, 4
            };

            var lookupMap = DataViewHelper.CreateDataView(Env, keys, values,
                                                          ValueMappingTransformer.DefaultKeyColumnName,
                                                          ValueMappingTransformer.DefaultValueColumnName, false);

            var estimator = new ValueMappingEstimator <string, int>(Env, lookupMap,
                                                                    lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
                                                                    lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
                                                                    new[] { ("D", "A"), ("E", "B"), ("F", "C") });
コード例 #4
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="env">environment like ConsoleEnvironment</param>
        /// <param name="transform">transform to convert</param>
        /// <param name="inputColumn">input column of the mapper</param>
        /// <param name="outputColumn">output column of the mapper</param>
        /// <param name="sourceToReplace">source to replace</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param>
        public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform,
                                             string inputColumn, string outputColumn,
                                             IDataView sourceToReplace = null, int conc = 1,
                                             bool ignoreOtherColumn    = false)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(transform);
            _env               = env;
            _transform         = transform;
            _sourceToReplace   = sourceToReplace;
            _outputColumn      = outputColumn;
            _ignoreOtherColumn = ignoreOtherColumn;
            _toDispose         = new List <IDisposable>();

            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform);

            int index;

            if (!firstView.Schema.TryGetColumnIndex(inputColumn, out index))
            {
                throw env.Except("Unable to find column '{0}' in input schema '{1}'.",
                                 inputColumn, SchemaHelper.ToString(firstView.Schema));
            }
            _inputIndex = index;
            if (!transform.Schema.TryGetColumnIndex(outputColumn, out index))
            {
                throw env.Except("Unable to find column '{0}' in output schema '{1}'.",
                                 outputColumn, SchemaHelper.ToString(transform.Schema));
            }
            _outputType = _transform.Schema.GetColumnType(index);

            _disposeEnv = conc > 0;
            _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env;
        }
コード例 #5
0
        ValueMapperDispose <TSrc, TDst> GetMapperDispose <TSrc, TDst>()
        {
            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform);
            var inputView = new InfiniteLoopViewCursorColumn <TSrc>(_inputIndex, firstView.Schema, ignoreOtherColumn: _ignoreOtherColumn);

            // This is extremely time consuming as the transform is serialized and deserialized.
            var outputView = _sourceToReplace == _transform.Source
                                ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView)
                                : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView,
                                                                               _sourceToReplace);
            int index;

            if (!outputView.Schema.TryGetColumnIndex(_outputColumn, out index))
            {
                throw _env.Except("Unable to find column '{0}' in output schema.", _outputColumn);
            }
            int newOutputIndex = index;
            var cur            = outputView.GetRowCursor(i => i == newOutputIndex);
            var getter         = cur.GetGetter <TDst>(newOutputIndex);

            if (getter == null)
            {
                throw _env.Except("Unable to get a getter on the transform for type {0}", default(TDst).GetType());
            }
            return(new ValueMapperDispose <TSrc, TDst>((in TSrc src, ref TDst dst) =>
            {
                inputView.Set(in src);
                cur.MoveNext();
                getter(ref dst);
            }, new IDisposable[] { cur }));
コード例 #6
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="env">environment like ConsoleEnvironment</param>
        /// <param name="transform">transform to convert</param>
        /// <param name="inputColumn">input column of the mapper</param>
        /// <param name="outputColumn">output column of the mapper</param>
        /// <param name="sourceToReplace">source to replace</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param>
        public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform,
                                             string inputColumn, string outputColumn,
                                             IDataView sourceToReplace = null, int conc = 1,
                                             bool ignoreOtherColumn    = false)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(transform);
            _env               = env;
            _transform         = transform;
            _sourceToReplace   = sourceToReplace;
            _outputColumn      = outputColumn;
            _ignoreOtherColumn = ignoreOtherColumn;
            _toDispose         = new List <IDisposable>();

            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform);

            int index = SchemaHelper.GetColumnIndex(firstView.Schema, inputColumn);

            _inputIndex = index;
            index       = SchemaHelper.GetColumnIndex(transform.Schema, outputColumn);
            _outputType = _transform.Schema[index].Type;

            _disposeEnv = conc > 0;
            _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env;
        }
コード例 #7
0
        public MainWindow()
        {
            int[,] array =
            {
                { 0, 0, 0, 0 },
                { 0, 0, 0, 0 },
                { 0, 0, 0, 0 },
                { 0, 0, 0, 0 }
            };

            InitializeComponent();

            CDataGrid.ItemsSource     = DataViewHelper.GetBindable2DArray(array);
            CDataGrid.CellEditEnding += delegate { MessageBox.Show("Pydor"); };
        }
コード例 #8
0
        /// <summary>
        /// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from <paramref name="keyValuePairs"/>.
        /// </summary>
        /// <typeparam name="TInputType">The key type.</typeparam>
        /// <typeparam name="TOutputType">The value type.</typeparam>
        /// <param name="catalog">The conversion transform's catalog</param>
        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
        /// The output data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types.</param>
        /// <param name="keyValuePairs">Specifies the mapping that will be performed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param>
        /// <param name="inputColumnName">Name of the column to transform.
        /// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
        /// The input data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types.
        /// </param>
        /// <param name="treatValuesAsKeyType">Whether to treat the values as a key.</param>
        /// <returns>An instance of the <see cref="ValueMappingEstimator"/></returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[MapValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs)]
        /// ]]></format>
        /// </example>
        public static ValueMappingEstimator <TInputType, TOutputType> MapValue <TInputType, TOutputType>(
            this TransformsCatalog.ConversionTransforms catalog,
            string outputColumnName,
            IEnumerable <KeyValuePair <TInputType, TOutputType> > keyValuePairs,
            string inputColumnName    = null,
            bool treatValuesAsKeyType = false)
        {
            var keys   = keyValuePairs.Select(pair => pair.Key);
            var values = keyValuePairs.Select(pair => pair.Value);

            var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
                                                          ValueMappingTransformer.DefaultKeyColumnName,
                                                          ValueMappingTransformer.DefaultValueColumnName, treatValuesAsKeyType);

            return(new ValueMappingEstimator <TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
                                                                       lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
                                                                       lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
                                                                       new[] { (outputColumnName, inputColumnName ?? outputColumnName) }));
コード例 #9
0
        ValueMapper <DataFrame, DataFrame> GetMapperRow()
        {
            var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform);
            var schema    = firstView.Schema;

            var inputView = new InfiniteLoopViewCursorDataFrame(null, firstView.Schema);

            // This is extremely time consuming as the transform is serialized and deserialized.
            var outputView = _sourceToReplace == _transform.Source
                                ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView)
                                : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace);

            // We assume all columns are needed, otherwise they should be removed.
            using (var cur = outputView.GetRowCursor(i => true))
            {
                var getRowFiller = DataFrame.GetRowFiller(cur);

                return((in DataFrame src, ref DataFrame dst) =>
                {
                    if (dst is null)
                    {
                        dst = new DataFrame(outputView.Schema, src.Length);
                    }
                    else if (!dst.CheckSharedSchema(outputView.Schema))
                    {
                        throw _env.Except($"DataFrame does not share the same schema, expected {SchemaHelper.ToString(outputView.Schema)}.");
                    }
                    dst.Resize(src.Length);

                    inputView.Set(src);
                    for (int i = 0; i < src.Length; ++i)
                    {
                        cur.MoveNext();
                        getRowFiller(dst, i);
                    }
                });
コード例 #10
0
        private void ComputeStatistics()
        {
            lock (_lock)
            {
                if (_statistics == null)
                {
                    var stats = new Dictionary <string, List <ColumnStatObs> >();

                    using (var ch = _host.Start("Computing statistics"))
                    {
                        if (!_args.jsonFormat)
                        {
                            ch.Info("Begin DescribeTransform {0}", _args.name);
                        }

                        if (_args.showSchema)
                        {
                            if (_args.jsonFormat)
                            {
                                ch.Info("    <{0}>{{\"DataViewSchema\":\"{1}\"}},</{0}>", _args.name, SchemaHelper.ToString(_input.Schema));
                            }
                            else
                            {
                                ch.Info("    <{0}>DataViewSchema: {1}</{0}>", _args.name, SchemaHelper.ToString(_input.Schema));
                            }
                        }

                        if (_args.dimension)
                        {
                            var nbRows = DataViewHelper.ComputeRowCount(_input);
                            if (_args.jsonFormat)
                            {
                                ch.Info("    <{0}>{{\"NbRows\":\"{1}\"}},</{0}>", _args.name, nbRows);
                            }
                            else
                            {
                                ch.Info("    <{0}>NbRows: {1}</{0}>", _args.name, nbRows);
                            }
                        }

                        var sch        = _input.Schema;
                        var indexesCol = new List <int>();
                        var textCols   = new List <string>();
                        if (_args.columns != null)
                        {
                            textCols.AddRange(_args.columns);
                        }
                        if (_args.hists != null)
                        {
                            textCols.AddRange(_args.hists);
                        }

                        for (int i = 0; i < textCols.Count; ++i)
                        {
                            int index = SchemaHelper.GetColumnIndex(sch, textCols[i]);
                            var ty    = sch[index].Type;
                            if (!(ty == NumberDataViewType.Single || ty == NumberDataViewType.UInt32 ||
                                  ty == NumberDataViewType.Int32 || ty == TextDataViewType.Instance ||
                                  ty == BooleanDataViewType.Instance || ty == NumberDataViewType.Int64 ||
                                  (ty.IsKey() && ty.AsKey().RawKind() == DataKind.UInt32) ||
                                  (ty.IsVector() && ty.AsVector().ItemType() == NumberDataViewType.Single)))
                            {
                                throw ch.Except("Unsupported type {0} (schema={1}).", _args.columns[i], SchemaHelper.ToString(sch));
                            }
                            indexesCol.Add(index);
                        }

                        // Computation
                        var required        = new HashSet <int>(indexesCol);
                        var requiredIndexes = required.OrderBy(c => c).ToArray();
                        using (var cur = _input.GetRowCursor(Schema.Where(i => required.Contains(i.Index))))
                        {
                            bool[] isText  = requiredIndexes.Select(c => sch[c].Type == TextDataViewType.Instance).ToArray();
                            bool[] isBool  = requiredIndexes.Select(c => sch[c].Type == BooleanDataViewType.Instance).ToArray();
                            bool[] isFloat = requiredIndexes.Select(c => sch[c].Type == NumberDataViewType.Single).ToArray();
                            bool[] isUint  = requiredIndexes.Select(c => sch[c].Type == NumberDataViewType.UInt32 || sch[c].Type.RawKind() == DataKind.UInt32).ToArray();
                            bool[] isInt   = requiredIndexes.Select(c => sch[c].Type == NumberDataViewType.Int32 || sch[c].Type.RawKind() == DataKind.Int32).ToArray();
                            bool[] isInt8  = requiredIndexes.Select(c => sch[c].Type == NumberDataViewType.Int64 || sch[c].Type.RawKind() == DataKind.Int64).ToArray();

                            ValueGetter <bool>[] boolGetters = requiredIndexes.Select(i => sch[i].Type == BooleanDataViewType.Instance || sch[i].Type.RawKind() == DataKind.Boolean ? cur.GetGetter <bool>(SchemaHelper._dc(i, cur)) : null).ToArray();
                            ValueGetter <uint>[] uintGetters = requiredIndexes.Select(i => sch[i].Type == NumberDataViewType.UInt32 || sch[i].Type.RawKind() == DataKind.UInt32 ? cur.GetGetter <uint>(SchemaHelper._dc(i, cur)) : null).ToArray();
                            ValueGetter <ReadOnlyMemory <char> >[] textGetters = requiredIndexes.Select(i => sch[i].Type == TextDataViewType.Instance ? cur.GetGetter <ReadOnlyMemory <char> >(SchemaHelper._dc(i, cur)) : null).ToArray();
                            ValueGetter <float>[]            floatGetters      = requiredIndexes.Select(i => sch[i].Type == NumberDataViewType.Single ? cur.GetGetter <float>(SchemaHelper._dc(i, cur)) : null).ToArray();
                            ValueGetter <VBuffer <float> >[] vectorGetters     = requiredIndexes.Select(i => sch[i].Type.IsVector() ? cur.GetGetter <VBuffer <float> >(SchemaHelper._dc(i, cur)) : null).ToArray();
                            ValueGetter <int>[]  intGetters  = requiredIndexes.Select(i => sch[i].Type == NumberDataViewType.Int32 || sch[i].Type.RawKind() == DataKind.Int32 ? cur.GetGetter <int>(SchemaHelper._dc(i, cur)) : null).ToArray();
                            ValueGetter <long>[] int8Getters = requiredIndexes.Select(i => sch[i].Type == NumberDataViewType.Int64 || sch[i].Type.RawKind() == DataKind.Int64 ? cur.GetGetter <long>(SchemaHelper._dc(i, cur)) : null).ToArray();

                            var cols   = _args.columns == null ? null : new HashSet <string>(_args.columns);
                            var hists  = _args.hists == null ? null : new HashSet <string>(_args.hists);
                            var schema = _input.Schema;

                            for (int i = 0; i < schema.Count; ++i)
                            {
                                string name = schema[i].Name;
                                if (!required.Contains(i))
                                {
                                    continue;
                                }
                                stats[name] = new List <ColumnStatObs>();
                                var t = stats[name];
                                if (cols != null && cols.Contains(name))
                                {
                                    t.Add(new ColumnStatObs(ColumnStatObs.StatKind.min));
                                    t.Add(new ColumnStatObs(ColumnStatObs.StatKind.max));
                                    t.Add(new ColumnStatObs(ColumnStatObs.StatKind.sum));
                                    t.Add(new ColumnStatObs(ColumnStatObs.StatKind.sum2));
                                    t.Add(new ColumnStatObs(ColumnStatObs.StatKind.nb));
                                }
                                if (hists != null && hists.Contains(name))
                                {
                                    t.Add(new ColumnStatObs(ColumnStatObs.StatKind.hist));
                                }
                            }

                            float value  = 0;
                            var   tvalue = new ReadOnlyMemory <char>();
                            var   vector = new VBuffer <float>();
                            uint  uvalue = 0;
                            var   bvalue = true;
                            var   int4   = (int)0;
                            var   int8   = (long)0;

                            while (cur.MoveNext())
                            {
                                for (int i = 0; i < requiredIndexes.Length; ++i)
                                {
                                    string name = cur.Schema[requiredIndexes[i]].Name;
                                    if (!stats.ContainsKey(name))
                                    {
                                        continue;
                                    }
                                    if (isFloat[i])
                                    {
                                        floatGetters[i](ref value);
                                        foreach (var t in stats[name])
                                        {
                                            t.Update(value);
                                        }
                                    }
                                    else if (isBool[i])
                                    {
                                        boolGetters[i](ref bvalue);
                                        foreach (var t in stats[name])
                                        {
                                            t.Update(bvalue);
                                        }
                                    }
                                    else if (isText[i])
                                    {
                                        textGetters[i](ref tvalue);
                                        foreach (var t in stats[name])
                                        {
                                            t.Update(tvalue.ToString());
                                        }
                                    }
                                    else if (isUint[i])
                                    {
                                        uintGetters[i](ref uvalue);
                                        foreach (var t in stats[name])
                                        {
                                            t.Update(uvalue);
                                        }
                                    }
                                    else if (isInt[i])
                                    {
                                        intGetters[i](ref int4);
                                        foreach (var t in stats[name])
                                        {
                                            t.Update((double)int4);
                                        }
                                    }
                                    else if (isInt8[i])
                                    {
                                        int8Getters[i](ref int8);
                                        foreach (var t in stats[name])
                                        {
                                            t.Update((double)int8);
                                        }
                                    }
                                    else
                                    {
                                        vectorGetters[i](ref vector);
                                        foreach (var t in stats[name])
                                        {
                                            t.Update(vector);
                                        }
                                    }
                                }
                            }
                        }

                        if (_args.oneRowPerColumn || _args.jsonFormat)
                        {
                            var rows = new List <string>();
                            rows.Add(string.Format("<{0}>{1}", _args.name, _args.jsonFormat ? "[" : ""));
                            foreach (var col in stats.OrderBy(c => c.Key))
                            {
                                if (_args.jsonFormat)
                                {
                                    rows.Add(string.Format("    {{\"Column\": \"{0}\", \"stat\": {1}}},", col.Key,
                                                           string.Join(", ", col.Value.Select(c => c.ToString(true)))));
                                }
                                else
                                {
                                    rows.Add(string.Format("    <{2}>Column '{0}': {1}</{2}>", col.Key,
                                                           string.Join(", ", col.Value.Select(c => c.ToString(false))), _args.name));
                                }
                            }
                            rows.Add(string.Format("{1}</{0}>", _args.name, _args.jsonFormat ? "]" : ""));
                            ch.Info(string.Join("\n", rows));
                        }
                        else
                        {
                            var rows = new List <string>();
                            foreach (var col in stats.OrderBy(c => c.Key))
                            {
                                rows.Add(string.Format("     [{1}] Column '{0}'", col.Key, _args.name));
                                foreach (var st in col.Value)
                                {
                                    rows.Add(string.Format("        {0}", st.ToString(false)));
                                }
                            }
                            ch.Info(string.Join("\n", rows));
                        }

                        // Save
                        if (!string.IsNullOrEmpty(_args.saveInFile))
                        {
                            throw ch.ExceptNotImpl("Unable to save into \"{0}\"", _args.saveInFile);
                        }

                        if (!_args.jsonFormat)
                        {
                            ch.Info("End DescribeTransform {0}", _args.name);
                        }
                    }
                    _statistics = _input;
                }
            }
        }