public void LambdaTransformCreate() { var env = new MLContext(seed: 42); var data = ReadBreastCancerExamples(); var idv = env.CreateDataView(data); var filter = LambdaTransform.CreateFilter <BreastCancerExample, object>(env, idv, (input, state) => input.Label == 0, null); Assert.Null(filter.GetRowCount()); // test re-apply var applied = env.CreateDataView(data); applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied); var saver = new TextSaver(env, new TextSaver.Arguments()); Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label)); using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv"))) saver.SaveData(fs, applied, label); }
/// <summary> /// Save schema associations of role/column-name in <paramref name="rep"/>. /// </summary> internal static void SaveRoleMappings(IHostEnvironment env, IChannel ch, RoleMappedSchema schema, RepositoryWriter rep) { // REVIEW: Should we also save this stuff, for instance, in some portion of the // score command or transform? Contracts.AssertValue(env); env.AssertValue(ch); ch.AssertValue(schema); ArrayDataViewBuilder builder = new ArrayDataViewBuilder(env); List <string> rolesList = new List <string>(); List <string> columnNamesList = new List <string>(); // OrderBy is stable, so there is no danger in it "reordering" columns // when a role is filled by multiple columns. foreach (var role in schema.GetColumnRoleNames().OrderBy(r => r.Key.Value)) { rolesList.Add(role.Key.Value); columnNamesList.Add(role.Value); } builder.AddColumn("Role", rolesList.ToArray()); builder.AddColumn("Column", columnNamesList.ToArray()); using (var entry = rep.CreateEntry(DirTrainingInfo, RoleMappingFile)) { // REVIEW: It seems very important that we have the role mappings // be easily human interpretable and even manipulable, but relying on the // text saver/loader means that special characters like '\n' won't be reinterpretable. // On the other hand, no one is such a big lunatic that they will actually // ever go ahead and do something so stupid as that. var saver = new TextSaver(env, new TextSaver.Arguments() { Dense = true, Silent = true }); var view = builder.GetDataView(); saver.SaveData(entry.Stream, view, Utils.GetIdentityPermutation(view.Schema.ColumnCount)); } }
/// <summary> /// Saves the dataframe in a stream as text format. /// </summary> /// <param name="filename">filename</param> /// <param name="sep">column separator</param> /// <param name="header">add header</param> /// <param name="silent">Suppress any info output (not warnings or errors)</param> public static void ViewToCsv(IDataView view, Stream st, string sep = ",", bool header = true, bool silent = false, IHost host = null) { IHostEnvironment env = host; if (env == null) { env = new ConsoleEnvironment(); } var saver = new TextSaver(env, new TextSaver.Arguments() { Separator = sep, OutputSchema = false, OutputHeader = header, Silent = silent }); var columns = new int[view.Schema.Count]; for (int i = 0; i < columns.Length; ++i) { columns[i] = i; } saver.SaveData(st, view, columns); }