public void LambdaTransformCreate() { using (var env = new ConsoleEnvironment(42)) { var data = ReadBreastCancerExamples(); var idv = env.CreateDataView(data); var map = LambdaTransform.CreateMap <BreastCancerExample, LambdaOutput>(env, idv, (input, output) => { output.OutField = string.Join(";", input.Features); }); var filter = LambdaTransform.CreateFilter <BreastCancerExample, object>(env, map, (input, state) => input.Label == 0, null); Assert.Null(filter.GetRowCount(false)); // test re-apply var applied = env.CreateDataView(data); applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied); var saver = new TextSaver(env, new TextSaver.Arguments()); Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label)); Assert.True(applied.Schema.TryGetColumnIndex("OutField", out int outField)); using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv"))) saver.SaveData(fs, applied, label, outField); } }
public void TrainAveragedPerceptronWithCache() { var mlContext = new MLContext(0); var dataFile = GetDataPath(TestDatasets.breastCancer.trainFilename); var loader = TextLoader.Create(mlContext, new TextLoader.Options(), new MultiFileSource(dataFile)); var globalCounter = 0; IDataView xf = LambdaTransform.CreateFilter <object, object>(mlContext, loader, (i, s) => true, s => { globalCounter++; }); xf = mlContext.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean).Fit(xf).Transform(xf); // The baseline result of this was generated with everything cached in memory. As auto-cache is removed, // an explicit step of caching is required to make this test ok. var cached = mlContext.Data.Cache(xf); var estimator = mlContext.BinaryClassification.Trainers.AveragedPerceptron( new AveragedPerceptronTrainer.Options { NumberOfIterations = 2 }); estimator.Fit(cached).Transform(cached); // Make sure there were 2 cursoring events. Assert.Equal(1, globalCounter); }
public void TrainAveragedPerceptronWithCache() { var env = new MLContext(0); var dataFile = GetDataPath("breast-cancer.txt"); var loader = TextLoader.Create(env, new TextLoader.Arguments(), new MultiFileSource(dataFile)); var globalCounter = 0; var xf = LambdaTransform.CreateFilter <object, object>(env, loader, (i, s) => true, s => { globalCounter++; }); new AveragedPerceptronTrainer(env, "Label", "Features", numIterations: 2).Fit(xf).Transform(xf); // Make sure there were 2 cursoring events. Assert.Equal(1, globalCounter); }
public void TrainAveragedPerceptronWithCache() { var env = new MLContext(0); var dataFile = GetDataPath("breast-cancer.txt"); var loader = TextLoader.Create(env, new TextLoader.Arguments(), new MultiFileSource(dataFile)); var globalCounter = 0; var xf = LambdaTransform.CreateFilter <object, object>(env, loader, (i, s) => true, s => { globalCounter++; }); // The baseline result of this was generated with everything cached in memory. As auto-cache is removed, // an explicit step of caching is required to make this test ok. var cached = env.Data.Cache(xf); new AveragedPerceptronTrainer(env, "Label", "Features", numIterations: 2).Fit(cached).Transform(cached); // Make sure there were 2 cursoring events. Assert.Equal(1, globalCounter); }
public void LambdaTransformCreate() { var env = new MLContext(seed: 42); var data = ReadBreastCancerExamples(); var idv = env.CreateDataView(data); var filter = LambdaTransform.CreateFilter<BreastCancerExample, object>(env, idv, (input, state) => input.Label == 0, null); Assert.Null(filter.GetRowCount()); // test re-apply var applied = env.CreateDataView(data); applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied); var saver = new TextSaver(env, new TextSaver.Arguments()); Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label)); using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv"))) saver.SaveData(fs, applied, label); }
public void CursorChannelExposedInMapTransform() { var env = new MLContext(seed: 0); // Correct use of CursorChannel attribute. var data1 = Utils.CreateArray(10, new OneIChannelWithAttribute()); var idv1 = env.CreateDataView(data1); Assert.Null(data1[0].Channel); var filter1 = LambdaTransform.CreateFilter<OneIChannelWithAttribute, object>(env, idv1, (input, state) => { Assert.NotNull(input.Channel); return false; }, null); filter1.GetRowCursorForAllColumns().MoveNext(); // Error case: non-IChannel field marked with attribute. var data2 = Utils.CreateArray(10, new OneStringWithAttribute()); var idv2 = env.CreateDataView(data2); Assert.Null(data2[0].Channel); var filter2 = LambdaTransform.CreateFilter<OneStringWithAttribute, object>(env, idv2, (input, state) => { Assert.Null(input.Channel); return false; }, null); try { filter2.GetRowCursorForAllColumns().MoveNext(); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } // Error case: multiple fields marked with attributes. var data3 = Utils.CreateArray(10, new TwoIChannelsWithAttributes()); var idv3 = env.CreateDataView(data3); Assert.Null(data3[0].ChannelOne); Assert.Null(data3[2].ChannelTwo); var filter3 = LambdaTransform.CreateFilter<TwoIChannelsWithAttributes, object>(env, idv3, (input, state) => { Assert.Null(input.ChannelOne); Assert.Null(input.ChannelTwo); return false; }, null); try { filter3.GetRowCursorForAllColumns().MoveNext(); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } // Correct case: non-marked IChannel field is not touched. var example4 = new TwoIChannelsOnlyOneWithAttribute(); Assert.Null(example4.ChannelTwo); Assert.Null(example4.ChannelOne); var idv4 = env.CreateDataView(Utils.CreateArray(10, example4)); var filter4 = LambdaTransform.CreateFilter<TwoIChannelsOnlyOneWithAttribute, object>(env, idv4, (input, state) => { Assert.Null(input.ChannelOne); Assert.NotNull(input.ChannelTwo); return false; }, null); filter1.GetRowCursorForAllColumns().MoveNext(); }
public void CursorChannelExposedInMapTransform() { using (var env = new ConsoleEnvironment(0)) { // Correct use of CursorChannel attribute. var data1 = Utils.CreateArray(10, new OneIChannelWithAttribute()); var idv1 = env.CreateDataView(data1); Assert.Null(data1[0].Channel); var map1 = LambdaTransform.CreateMap <OneIChannelWithAttribute, OneIChannelWithAttribute>(env, idv1, (input, output) => { output.OutField = input.OutField + input.OutField; }); map1.GetRowCursor(col => true); var filter1 = LambdaTransform.CreateFilter <OneIChannelWithAttribute, object>(env, idv1, (input, state) => { Assert.NotNull(input.Channel); return(false); }, null); filter1.GetRowCursor(col => true).MoveNext(); // Error case: non-IChannel field marked with attribute. var data2 = Utils.CreateArray(10, new OneStringWithAttribute()); var idv2 = env.CreateDataView(data2); Assert.Null(data2[0].Channel); var filter2 = LambdaTransform.CreateFilter <OneStringWithAttribute, object>(env, idv2, (input, state) => { Assert.Null(input.Channel); return(false); }, null); try { filter2.GetRowCursor(col => true).MoveNext(); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } var map2 = LambdaTransform.CreateMap <OneStringWithAttribute, OneStringWithAttribute>(env, idv2, (input, output) => { output.OutField = input.OutField + input.OutField; }); try { map2.GetRowCursor(col => true); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } // Error case: multiple fields marked with attributes. var data3 = Utils.CreateArray(10, new TwoIChannelsWithAttributes()); var idv3 = env.CreateDataView(data3); Assert.Null(data3[0].ChannelOne); Assert.Null(data3[2].ChannelTwo); var filter3 = LambdaTransform.CreateFilter <TwoIChannelsWithAttributes, object>(env, idv3, (input, state) => { Assert.Null(input.ChannelOne); Assert.Null(input.ChannelTwo); return(false); }, null); try { filter3.GetRowCursor(col => true).MoveNext(); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } var map3 = LambdaTransform.CreateMap <TwoIChannelsWithAttributes, TwoIChannelsWithAttributes>(env, idv3, (input, output) => { output.OutField = input.OutField + input.OutField; }); try { map3.GetRowCursor(col => true); Assert.True(false, "Throw an error if attribute is applied to a multiple fields."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } // Correct case: non-marked IChannel field is not touched. var example4 = new TwoIChannelsOnlyOneWithAttribute(); Assert.Null(example4.ChannelTwo); Assert.Null(example4.ChannelOne); var idv4 = env.CreateDataView(Utils.CreateArray(10, example4)); var map4 = LambdaTransform.CreateMap <TwoIChannelsOnlyOneWithAttribute, TwoIChannelsOnlyOneWithAttribute>(env, idv4, (input, output) => { }); map4.GetRowCursor(col => true); var filter4 = LambdaTransform.CreateFilter <TwoIChannelsOnlyOneWithAttribute, object>(env, idv4, (input, state) => { Assert.Null(input.ChannelOne); Assert.NotNull(input.ChannelTwo); return(false); }, null); filter1.GetRowCursor(col => true).MoveNext(); } }