public void TestI_ResampleSerialization() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("iris.txt"); var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1); { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 " + "col=Pwidth:R4:4 header=+ sep=tab}", new MultiFileSource(dataFilePath)); var sorted = env.CreateTransform("resample{lambda=1 c=-}", loader); DataViewHelper.ToCsv(env, sorted, outputDataFilePath); var lines = File.ReadAllLines(outputDataFilePath); int begin = 0; for (; begin < lines.Length; ++begin) { if (lines[begin].StartsWith("Label")) { break; } } lines = lines.Skip(begin).ToArray(); var linesSorted = lines.OrderBy(c => c).ToArray(); for (int i = 1; i < linesSorted.Length; ++i) { if (linesSorted[i - 1][0] > linesSorted[i][0]) { throw new Exception("The output is not sorted."); } } } }
ValueMapperDispose <TRowInput, TRowOutput> GetMapperDispose() { var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform); var schema = SchemaDefinition.Create(typeof(TRowOutput), SchemaDefinition.Direction.Read); var inputView = new InfiniteLoopViewCursorRow <TRowInput>(null, firstView.Schema, overwriteRowGetter: GetterSetterHelper.GetGetter <TRowInput>()); // This is extremely time consuming as the transform is serialized and deserialized. var outputView = _sourceToReplace == _transform.Source ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView) : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace); // We assume all columns are needed, otherwise they should be removed. using (var cur = outputView.GetRowCursor(i => true)) { Delegate[] dels; try { dels = new TRowOutput().GetCursorGetter(cur); } catch (InvalidOperationException e) { throw new InvalidOperationException($"Unable to create getter for the schema\n{SchemaHelper.ToString(cur.Schema)}", e); } return(new ValueMapperDispose <TRowInput, TRowOutput>((in TRowInput src, ref TRowOutput dst) => { inputView.Set(in src); cur.MoveNext(); dst.Set(dels); }, new IDisposable[] { cur }));
public void ValueMapOneValueTest() { var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; var dataView = ML.Data.LoadFromEnumerable(data); var keys = new List <string>() { "foo", "bar", "test", "wahoo" }; var values = new List <int>() { 1, 2, 3, 4 }; var lookupMap = DataViewHelper.CreateDataView(Env, keys, values, ValueMappingTransformer.DefaultKeyColumnName, ValueMappingTransformer.DefaultValueColumnName, false); var estimator = new ValueMappingEstimator <string, int>(Env, lookupMap, lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName], lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName], new[] { ("D", "A"), ("E", "B"), ("F", "C") });
/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="transform">transform to convert</param> /// <param name="inputColumn">input column of the mapper</param> /// <param name="outputColumn">output column of the mapper</param> /// <param name="sourceToReplace">source to replace</param> /// <param name="conc">number of concurrency threads</param> /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param> public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform, string inputColumn, string outputColumn, IDataView sourceToReplace = null, int conc = 1, bool ignoreOtherColumn = false) { Contracts.AssertValue(env); Contracts.AssertValue(transform); _env = env; _transform = transform; _sourceToReplace = sourceToReplace; _outputColumn = outputColumn; _ignoreOtherColumn = ignoreOtherColumn; _toDispose = new List <IDisposable>(); var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform); int index; if (!firstView.Schema.TryGetColumnIndex(inputColumn, out index)) { throw env.Except("Unable to find column '{0}' in input schema '{1}'.", inputColumn, SchemaHelper.ToString(firstView.Schema)); } _inputIndex = index; if (!transform.Schema.TryGetColumnIndex(outputColumn, out index)) { throw env.Except("Unable to find column '{0}' in output schema '{1}'.", outputColumn, SchemaHelper.ToString(transform.Schema)); } _outputType = _transform.Schema.GetColumnType(index); _disposeEnv = conc > 0; _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env; }
ValueMapperDispose <TSrc, TDst> GetMapperDispose <TSrc, TDst>() { var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform); var inputView = new InfiniteLoopViewCursorColumn <TSrc>(_inputIndex, firstView.Schema, ignoreOtherColumn: _ignoreOtherColumn); // This is extremely time consuming as the transform is serialized and deserialized. var outputView = _sourceToReplace == _transform.Source ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView) : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace); int index; if (!outputView.Schema.TryGetColumnIndex(_outputColumn, out index)) { throw _env.Except("Unable to find column '{0}' in output schema.", _outputColumn); } int newOutputIndex = index; var cur = outputView.GetRowCursor(i => i == newOutputIndex); var getter = cur.GetGetter <TDst>(newOutputIndex); if (getter == null) { throw _env.Except("Unable to get a getter on the transform for type {0}", default(TDst).GetType()); } return(new ValueMapperDispose <TSrc, TDst>((in TSrc src, ref TDst dst) => { inputView.Set(in src); cur.MoveNext(); getter(ref dst); }, new IDisposable[] { cur }));
/// <summary> /// Constructor. /// </summary> /// <param name="env">environment like ConsoleEnvironment</param> /// <param name="transform">transform to convert</param> /// <param name="inputColumn">input column of the mapper</param> /// <param name="outputColumn">output column of the mapper</param> /// <param name="sourceToReplace">source to replace</param> /// <param name="conc">number of concurrency threads</param> /// <param name="ignoreOtherColumn">ignore other columns instead of raising an exception if they are requested</param> public ValueMapperFromTransformFloat(IHostEnvironment env, IDataTransform transform, string inputColumn, string outputColumn, IDataView sourceToReplace = null, int conc = 1, bool ignoreOtherColumn = false) { Contracts.AssertValue(env); Contracts.AssertValue(transform); _env = env; _transform = transform; _sourceToReplace = sourceToReplace; _outputColumn = outputColumn; _ignoreOtherColumn = ignoreOtherColumn; _toDispose = new List <IDisposable>(); var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(transform); int index = SchemaHelper.GetColumnIndex(firstView.Schema, inputColumn); _inputIndex = index; index = SchemaHelper.GetColumnIndex(transform.Schema, outputColumn); _outputType = _transform.Schema[index].Type; _disposeEnv = conc > 0; _computeEnv = _disposeEnv ? new PassThroughEnvironment(env, conc: conc, verbose: false) : env; }
public MainWindow() { int[,] array = { { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 } }; InitializeComponent(); CDataGrid.ItemsSource = DataViewHelper.GetBindable2DArray(array); CDataGrid.CellEditEnding += delegate { MessageBox.Show("Pydor"); }; }
/// <summary> /// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from <paramref name="keyValuePairs"/>. /// </summary> /// <typeparam name="TInputType">The key type.</typeparam> /// <typeparam name="TOutputType">The value type.</typeparam> /// <param name="catalog">The conversion transform's catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// The output data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types.</param> /// <param name="keyValuePairs">Specifies the mapping that will be performed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param> /// <param name="inputColumnName">Name of the column to transform. /// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// The input data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types. /// </param> /// <param name="treatValuesAsKeyType">Whether to treat the values as a key.</param> /// <returns>An instance of the <see cref="ValueMappingEstimator"/></returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs)] /// ]]></format> /// </example> public static ValueMappingEstimator <TInputType, TOutputType> MapValue <TInputType, TOutputType>( this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, IEnumerable <KeyValuePair <TInputType, TOutputType> > keyValuePairs, string inputColumnName = null, bool treatValuesAsKeyType = false) { var keys = keyValuePairs.Select(pair => pair.Key); var values = keyValuePairs.Select(pair => pair.Value); var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values, ValueMappingTransformer.DefaultKeyColumnName, ValueMappingTransformer.DefaultValueColumnName, treatValuesAsKeyType); return(new ValueMappingEstimator <TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap, lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName], lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName], new[] { (outputColumnName, inputColumnName ?? outputColumnName) }));
ValueMapper <DataFrame, DataFrame> GetMapperRow() { var firstView = _sourceToReplace ?? DataViewHelper.GetFirstView(_transform); var schema = firstView.Schema; var inputView = new InfiniteLoopViewCursorDataFrame(null, firstView.Schema); // This is extremely time consuming as the transform is serialized and deserialized. var outputView = _sourceToReplace == _transform.Source ? ApplyTransformUtils.ApplyTransformToData(_computeEnv, _transform, inputView) : ApplyTransformUtils.ApplyAllTransformsToData(_computeEnv, _transform, inputView, _sourceToReplace); // We assume all columns are needed, otherwise they should be removed. using (var cur = outputView.GetRowCursor(i => true)) { var getRowFiller = DataFrame.GetRowFiller(cur); return((in DataFrame src, ref DataFrame dst) => { if (dst is null) { dst = new DataFrame(outputView.Schema, src.Length); } else if (!dst.CheckSharedSchema(outputView.Schema)) { throw _env.Except($"DataFrame does not share the same schema, expected {SchemaHelper.ToString(outputView.Schema)}."); } dst.Resize(src.Length); inputView.Set(src); for (int i = 0; i < src.Length; ++i) { cur.MoveNext(); getRowFiller(dst, i); } });
private void ComputeStatistics() { lock (_lock) { if (_statistics == null) { var stats = new Dictionary <string, List <ColumnStatObs> >(); using (var ch = _host.Start("Computing statistics")) { if (!_args.jsonFormat) { ch.Info("Begin DescribeTransform {0}", _args.name); } if (_args.showSchema) { if (_args.jsonFormat) { ch.Info(" <{0}>{{\"DataViewSchema\":\"{1}\"}},</{0}>", _args.name, SchemaHelper.ToString(_input.Schema)); } else { ch.Info(" <{0}>DataViewSchema: {1}</{0}>", _args.name, SchemaHelper.ToString(_input.Schema)); } } if (_args.dimension) { var nbRows = DataViewHelper.ComputeRowCount(_input); if (_args.jsonFormat) { ch.Info(" <{0}>{{\"NbRows\":\"{1}\"}},</{0}>", _args.name, nbRows); } else { ch.Info(" <{0}>NbRows: {1}</{0}>", _args.name, nbRows); } } var sch = _input.Schema; var indexesCol = new List <int>(); var textCols = new List <string>(); if (_args.columns != null) { textCols.AddRange(_args.columns); } if (_args.hists != null) { textCols.AddRange(_args.hists); } for (int i = 0; i < textCols.Count; ++i) { int index = SchemaHelper.GetColumnIndex(sch, textCols[i]); var ty = sch[index].Type; if (!(ty == NumberDataViewType.Single || ty == NumberDataViewType.UInt32 || ty == NumberDataViewType.Int32 || ty == TextDataViewType.Instance || ty == BooleanDataViewType.Instance || ty == NumberDataViewType.Int64 || (ty.IsKey() && ty.AsKey().RawKind() == DataKind.UInt32) || (ty.IsVector() && ty.AsVector().ItemType() == NumberDataViewType.Single))) { throw ch.Except("Unsupported type {0} (schema={1}).", _args.columns[i], SchemaHelper.ToString(sch)); } indexesCol.Add(index); } // Computation var required = new HashSet <int>(indexesCol); var requiredIndexes = required.OrderBy(c => c).ToArray(); using (var cur = _input.GetRowCursor(Schema.Where(i => required.Contains(i.Index)))) { bool[] isText = requiredIndexes.Select(c => sch[c].Type == TextDataViewType.Instance).ToArray(); bool[] isBool = requiredIndexes.Select(c => sch[c].Type == BooleanDataViewType.Instance).ToArray(); bool[] isFloat = requiredIndexes.Select(c => sch[c].Type == NumberDataViewType.Single).ToArray(); bool[] isUint = requiredIndexes.Select(c => sch[c].Type == NumberDataViewType.UInt32 || sch[c].Type.RawKind() == DataKind.UInt32).ToArray(); bool[] isInt = requiredIndexes.Select(c => sch[c].Type == NumberDataViewType.Int32 || sch[c].Type.RawKind() == DataKind.Int32).ToArray(); bool[] isInt8 = requiredIndexes.Select(c => sch[c].Type == NumberDataViewType.Int64 || sch[c].Type.RawKind() == DataKind.Int64).ToArray(); ValueGetter <bool>[] boolGetters = requiredIndexes.Select(i => sch[i].Type == BooleanDataViewType.Instance || sch[i].Type.RawKind() == DataKind.Boolean ? cur.GetGetter <bool>(SchemaHelper._dc(i, cur)) : null).ToArray(); ValueGetter <uint>[] uintGetters = requiredIndexes.Select(i => sch[i].Type == NumberDataViewType.UInt32 || sch[i].Type.RawKind() == DataKind.UInt32 ? cur.GetGetter <uint>(SchemaHelper._dc(i, cur)) : null).ToArray(); ValueGetter <ReadOnlyMemory <char> >[] textGetters = requiredIndexes.Select(i => sch[i].Type == TextDataViewType.Instance ? cur.GetGetter <ReadOnlyMemory <char> >(SchemaHelper._dc(i, cur)) : null).ToArray(); ValueGetter <float>[] floatGetters = requiredIndexes.Select(i => sch[i].Type == NumberDataViewType.Single ? cur.GetGetter <float>(SchemaHelper._dc(i, cur)) : null).ToArray(); ValueGetter <VBuffer <float> >[] vectorGetters = requiredIndexes.Select(i => sch[i].Type.IsVector() ? cur.GetGetter <VBuffer <float> >(SchemaHelper._dc(i, cur)) : null).ToArray(); ValueGetter <int>[] intGetters = requiredIndexes.Select(i => sch[i].Type == NumberDataViewType.Int32 || sch[i].Type.RawKind() == DataKind.Int32 ? cur.GetGetter <int>(SchemaHelper._dc(i, cur)) : null).ToArray(); ValueGetter <long>[] int8Getters = requiredIndexes.Select(i => sch[i].Type == NumberDataViewType.Int64 || sch[i].Type.RawKind() == DataKind.Int64 ? cur.GetGetter <long>(SchemaHelper._dc(i, cur)) : null).ToArray(); var cols = _args.columns == null ? null : new HashSet <string>(_args.columns); var hists = _args.hists == null ? null : new HashSet <string>(_args.hists); var schema = _input.Schema; for (int i = 0; i < schema.Count; ++i) { string name = schema[i].Name; if (!required.Contains(i)) { continue; } stats[name] = new List <ColumnStatObs>(); var t = stats[name]; if (cols != null && cols.Contains(name)) { t.Add(new ColumnStatObs(ColumnStatObs.StatKind.min)); t.Add(new ColumnStatObs(ColumnStatObs.StatKind.max)); t.Add(new ColumnStatObs(ColumnStatObs.StatKind.sum)); t.Add(new ColumnStatObs(ColumnStatObs.StatKind.sum2)); t.Add(new ColumnStatObs(ColumnStatObs.StatKind.nb)); } if (hists != null && hists.Contains(name)) { t.Add(new ColumnStatObs(ColumnStatObs.StatKind.hist)); } } float value = 0; var tvalue = new ReadOnlyMemory <char>(); var vector = new VBuffer <float>(); uint uvalue = 0; var bvalue = true; var int4 = (int)0; var int8 = (long)0; while (cur.MoveNext()) { for (int i = 0; i < requiredIndexes.Length; ++i) { string name = cur.Schema[requiredIndexes[i]].Name; if (!stats.ContainsKey(name)) { continue; } if (isFloat[i]) { floatGetters[i](ref value); foreach (var t in stats[name]) { t.Update(value); } } else if (isBool[i]) { boolGetters[i](ref bvalue); foreach (var t in stats[name]) { t.Update(bvalue); } } else if (isText[i]) { textGetters[i](ref tvalue); foreach (var t in stats[name]) { t.Update(tvalue.ToString()); } } else if (isUint[i]) { uintGetters[i](ref uvalue); foreach (var t in stats[name]) { t.Update(uvalue); } } else if (isInt[i]) { intGetters[i](ref int4); foreach (var t in stats[name]) { t.Update((double)int4); } } else if (isInt8[i]) { int8Getters[i](ref int8); foreach (var t in stats[name]) { t.Update((double)int8); } } else { vectorGetters[i](ref vector); foreach (var t in stats[name]) { t.Update(vector); } } } } } if (_args.oneRowPerColumn || _args.jsonFormat) { var rows = new List <string>(); rows.Add(string.Format("<{0}>{1}", _args.name, _args.jsonFormat ? "[" : "")); foreach (var col in stats.OrderBy(c => c.Key)) { if (_args.jsonFormat) { rows.Add(string.Format(" {{\"Column\": \"{0}\", \"stat\": {1}}},", col.Key, string.Join(", ", col.Value.Select(c => c.ToString(true))))); } else { rows.Add(string.Format(" <{2}>Column '{0}': {1}</{2}>", col.Key, string.Join(", ", col.Value.Select(c => c.ToString(false))), _args.name)); } } rows.Add(string.Format("{1}</{0}>", _args.name, _args.jsonFormat ? "]" : "")); ch.Info(string.Join("\n", rows)); } else { var rows = new List <string>(); foreach (var col in stats.OrderBy(c => c.Key)) { rows.Add(string.Format(" [{1}] Column '{0}'", col.Key, _args.name)); foreach (var st in col.Value) { rows.Add(string.Format(" {0}", st.ToString(false))); } } ch.Info(string.Join("\n", rows)); } // Save if (!string.IsNullOrEmpty(_args.saveInFile)) { throw ch.ExceptNotImpl("Unable to save into \"{0}\"", _args.saveInFile); } if (!_args.jsonFormat) { ch.Info("End DescribeTransform {0}", _args.name); } } _statistics = _input; } } }