public void LambdaTransformCreate() { using (var env = new ConsoleEnvironment(42)) { var data = ReadBreastCancerExamples(); var idv = env.CreateDataView(data); var map = LambdaTransform.CreateMap <BreastCancerExample, LambdaOutput>(env, idv, (input, output) => { output.OutField = string.Join(";", input.Features); }); var filter = LambdaTransform.CreateFilter <BreastCancerExample, object>(env, map, (input, state) => input.Label == 0, null); Assert.Null(filter.GetRowCount(false)); // test re-apply var applied = env.CreateDataView(data); applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied); var saver = new TextSaver(env, new TextSaver.Arguments()); Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label)); Assert.True(applied.Schema.TryGetColumnIndex("OutField", out int outField)); using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv"))) saver.SaveData(fs, applied, label, outField); } }
private static IDataTransform CreateLambdaTransform(IHost host, IDataView input, string inputColumnName, string outputColumnName, Action <TState> initFunction, bool hasBuffer, ColumnType outputColTypeOverride) { var inputSchema = SchemaDefinition.Create(typeof(DataBox <TInput>)); inputSchema[0].ColumnName = inputColumnName; var outputSchema = SchemaDefinition.Create(typeof(DataBox <TOutput>)); outputSchema[0].ColumnName = outputColumnName; if (outputColTypeOverride != null) { outputSchema[0].ColumnType = outputColTypeOverride; } Action <DataBox <TInput>, DataBox <TOutput>, TState> lambda; if (hasBuffer) { lambda = MapFunction; } else { lambda = MapFunctionWithoutBuffer; } return(LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema)); }
public TransformWrapper Fit(IDataView input) { var xf = LambdaTransform.CreateMap(_env, input, _action); var empty = new EmptyDataView(_env, input.Schema); var chunk = ApplyTransformUtils.ApplyAllTransformsToData(_env, xf, empty, input); return(new TransformWrapper(_env, chunk)); }
private static IDataTransform CreateLambdaTransform(IHost host, IDataView input, string inputColumnName, string outputColumnName, string forecastingConfidenceIntervalMinOutputColumnName, string forecastingConfidenceIntervalMaxOutputColumnName, Action <TState> initFunction, bool hasBuffer, DataViewType outputColTypeOverride) { var inputSchema = SchemaDefinition.Create(typeof(DataBox <TInput>)); inputSchema[0].ColumnName = inputColumnName; SchemaDefinition outputSchema; if (!string.IsNullOrEmpty(forecastingConfidenceIntervalMinOutputColumnName)) { outputSchema = SchemaDefinition.Create(typeof(DataBoxForecastingWithConfidenceIntervals <TOutput>)); outputSchema[0].ColumnName = outputColumnName; if (outputColTypeOverride != null) { outputSchema[0].ColumnType = outputSchema[1].ColumnType = outputSchema[2].ColumnType = outputColTypeOverride; } outputSchema[1].ColumnName = forecastingConfidenceIntervalMinOutputColumnName; outputSchema[2].ColumnName = forecastingConfidenceIntervalMaxOutputColumnName; Action <DataBox <TInput>, DataBoxForecastingWithConfidenceIntervals <TOutput>, TState> lambda; if (hasBuffer) { lambda = MapFunction; } else { lambda = MapFunctionWithoutBuffer; } return(LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema)); } else { outputSchema = SchemaDefinition.Create(typeof(DataBox <TOutput>)); outputSchema[0].ColumnName = outputColumnName; if (outputColTypeOverride != null) { outputSchema[0].ColumnType = outputColTypeOverride; } Action <DataBox <TInput>, DataBox <TOutput>, TState> lambda; if (hasBuffer) { lambda = MapFunction; } else { lambda = MapFunctionWithoutBuffer; } return(LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema)); } }
void Extensibility() { var dataPath = GetDataPath(IrisDataPath); using (var env = new LocalEnvironment()) { var loader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); Action <IrisData, IrisData> action = (i, j) => { j.Label = i.Label; j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength; j.PetalWidth = i.PetalWidth; j.SepalLength = i.SepalLength; j.SepalWidth = i.SepalWidth; }; var lambda = LambdaTransform.CreateMap(env, loader, action); var term = TermTransform.Create(env, lambda, "Label"); var concat = new ConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Transform(term); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var keyToValue = new KeyToValueTransform(env, "PredictedLabel").Transform(scorer); var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue); var testLoader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
static void TestCacheTransformSimple(int nt, bool async) { using (var host = EnvHelper.NewTestEnvironment(conc: nt == 1 ? 1 : 0)) { var inputs = new InputOutput[] { new InputOutput() { X = new float[] { 0, 1 }, Y = 1 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 0 } }; var data = host.CreateStreamingDataView(inputs); using (var cursor = data.GetRowCursor(i => true)) { var sortedValues = new List <int>(); var sortColumnGetter = cursor.GetGetter <int>(1); while (cursor.MoveNext()) { int got = 0; sortColumnGetter(ref got); sortedValues.Add((int)got); } if (sortedValues.Count != 2) { throw new Exception(); } if (sortedValues[0] != 1) { throw new Exception(); } if (sortedValues[1] != 0) { throw new Exception(); } } var args = new ExtendedCacheTransform.Arguments { numTheads = nt, async = async }; var transformedData = new ExtendedCacheTransform(host, args, data); var lastTransform = transformedData; LambdaTransform.CreateMap <InputOutput, InputOutput, EnvHelper.EmptyState>(host, data, (input, output, state) => { output.X = input.X; output.Y = input.Y; }, (EnvHelper.EmptyState state) => { }); using (var cursor = lastTransform.GetRowCursor(i => true)) { var sortedValues = new List <int>(); var sortColumnGetter = cursor.GetGetter <int>(1); while (cursor.MoveNext()) { int got = 0; sortColumnGetter(ref got); sortedValues.Add((int)got); } if (sortedValues.Count != 2) { throw new Exception(); } } } }
public void TestSortInDataFrameTransformSimple() { using (var host = EnvHelper.NewTestEnvironment()) { var inputs = new InputOutput[] { new InputOutput() { X = new float[] { 0, 1 }, Y = 1 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 0 } }; var data = host.CreateStreamingDataView(inputs); using (var cursor = data.GetRowCursor(i => true)) { var sortedValues = new List <int>(); var sortColumnGetter = cursor.GetGetter <int>(1); while (cursor.MoveNext()) { int got = 0; sortColumnGetter(ref got); sortedValues.Add((int)got); } if (sortedValues.Count != 2) { throw new Exception(); } if (sortedValues[0] != 1) { throw new Exception(); } if (sortedValues[1] != 0) { throw new Exception(); } } var args = new SortInDataFrameTransform.Arguments { sortColumn = "Y" }; var transformedData = new SortInDataFrameTransform(host, args, data); var sorted = transformedData; LambdaTransform.CreateMap <InputOutput, InputOutput, EnvHelper.EmptyState>(host, data, (input, output, state) => { output.X = input.X; output.Y = input.Y; }, (state) => { }); using (var cursor = sorted.GetRowCursor(i => true)) { var sortedValues = new List <int>(); var sortColumnGetter = cursor.GetGetter <int>(1); while (cursor.MoveNext()) { int got = 0; sortColumnGetter(ref got); sortedValues.Add((int)got); } if (sortedValues.Count != 2) { throw new Exception(); } if (sortedValues[0] != 0) { throw new Exception(); } if (sortedValues[1] != 1) { throw new Exception(); } } } }
public void TestTransform2ValueMapperMultiThread() { using (var env = EnvHelper.NewTestEnvironment()) { var host = env.Register("unittest"); var inputs = new[] { new InputOutput { X = new float[] { 0, 1 }, Y = 10 }, new InputOutput { X = new float[] { 2, 3 }, Y = 100 } }; var data = host.CreateStreamingDataView(inputs); var trv = LambdaTransform.CreateMap(host, data, (InputOutput src, InputOutput dst, EnvHelper.EmptyState state) => { dst.X = new float[] { src.X[0] + 1f, src.X[1] - 1f }; }, (EnvHelper.EmptyState state) => { }); var ino = new InputOutput { X = new float[] { -5, -5 }, Y = 3 }; var inob = new VBuffer <float>(2, ino.X); var ans = new VBuffer <float>(); using (var valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(host, trv, "X", "X", ignoreOtherColumn: true)) { var mapper = valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >(); var listy = new List <int>(); var listx = new List <float>(); int y = 0; for (int i = 0; i < inputs.Length; ++i) { mapper(in inob, ref ans); y = inputs[i].Y; if (ans.Count != 2) { throw new Exception("Issue with dimension."); } listx.AddRange(ans.GetValues().ToArray()); listy.Add((int)y); } if (listy.Count != 2) { throw new Exception("Issue with dimension."); } if (listy[0] != 10 || listy[1] != 100) { throw new Exception("Issue with values."); } if (listx.Count != 4) { throw new Exception("Issue with dimension."); } if (listx[0] != -4) { throw new Exception("Issue with values."); } if (listx[1] != -6) { throw new Exception("Issue with values."); } if (listx[2] != -4) { throw new Exception("Issue with values."); } if (listx[3] != -6) { throw new Exception("Issue with values."); } if (inob.Count != 2) { throw new Exception("Issue with dimension."); } if (inob.Values[0] != -5) { throw new Exception("Values were overwritten."); } if (inob.Values[0] != -5) { throw new Exception("Values were overwritten."); } } } }
public void CursorChannelExposedInMapTransform() { using (var env = new ConsoleEnvironment(0)) { // Correct use of CursorChannel attribute. var data1 = Utils.CreateArray(10, new OneIChannelWithAttribute()); var idv1 = env.CreateDataView(data1); Assert.Null(data1[0].Channel); var map1 = LambdaTransform.CreateMap <OneIChannelWithAttribute, OneIChannelWithAttribute>(env, idv1, (input, output) => { output.OutField = input.OutField + input.OutField; }); map1.GetRowCursor(col => true); var filter1 = LambdaTransform.CreateFilter <OneIChannelWithAttribute, object>(env, idv1, (input, state) => { Assert.NotNull(input.Channel); return(false); }, null); filter1.GetRowCursor(col => true).MoveNext(); // Error case: non-IChannel field marked with attribute. var data2 = Utils.CreateArray(10, new OneStringWithAttribute()); var idv2 = env.CreateDataView(data2); Assert.Null(data2[0].Channel); var filter2 = LambdaTransform.CreateFilter <OneStringWithAttribute, object>(env, idv2, (input, state) => { Assert.Null(input.Channel); return(false); }, null); try { filter2.GetRowCursor(col => true).MoveNext(); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } var map2 = LambdaTransform.CreateMap <OneStringWithAttribute, OneStringWithAttribute>(env, idv2, (input, output) => { output.OutField = input.OutField + input.OutField; }); try { map2.GetRowCursor(col => true); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } // Error case: multiple fields marked with attributes. var data3 = Utils.CreateArray(10, new TwoIChannelsWithAttributes()); var idv3 = env.CreateDataView(data3); Assert.Null(data3[0].ChannelOne); Assert.Null(data3[2].ChannelTwo); var filter3 = LambdaTransform.CreateFilter <TwoIChannelsWithAttributes, object>(env, idv3, (input, state) => { Assert.Null(input.ChannelOne); Assert.Null(input.ChannelTwo); return(false); }, null); try { filter3.GetRowCursor(col => true).MoveNext(); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } var map3 = LambdaTransform.CreateMap <TwoIChannelsWithAttributes, TwoIChannelsWithAttributes>(env, idv3, (input, output) => { output.OutField = input.OutField + input.OutField; }); try { map3.GetRowCursor(col => true); Assert.True(false, "Throw an error if attribute is applied to a multiple fields."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } // Correct case: non-marked IChannel field is not touched. var example4 = new TwoIChannelsOnlyOneWithAttribute(); Assert.Null(example4.ChannelTwo); Assert.Null(example4.ChannelOne); var idv4 = env.CreateDataView(Utils.CreateArray(10, example4)); var map4 = LambdaTransform.CreateMap <TwoIChannelsOnlyOneWithAttribute, TwoIChannelsOnlyOneWithAttribute>(env, idv4, (input, output) => { }); map4.GetRowCursor(col => true); var filter4 = LambdaTransform.CreateFilter <TwoIChannelsOnlyOneWithAttribute, object>(env, idv4, (input, state) => { Assert.Null(input.ChannelOne); Assert.NotNull(input.ChannelTwo); return(false); }, null); filter1.GetRowCursor(col => true).MoveNext(); } }