Beispiel #1
0
        public void LambdaTransformCreate()
        {
            var env  = new MLContext(seed: 42);
            var data = ReadBreastCancerExamples();
            var idv  = env.CreateDataView(data);

            var filter = LambdaTransform.CreateFilter <BreastCancerExample, object>(env, idv,
                                                                                    (input, state) => input.Label == 0, null);

            Assert.Null(filter.GetRowCount());

            // test re-apply
            var applied = env.CreateDataView(data);

            applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied);

            var saver = new TextSaver(env, new TextSaver.Arguments());

            Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label));
            using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv")))
                saver.SaveData(fs, applied, label);
        }
        /// <summary>
        /// Save schema associations of role/column-name in <paramref name="rep"/>.
        /// </summary>
        internal static void SaveRoleMappings(IHostEnvironment env, IChannel ch, RoleMappedSchema schema, RepositoryWriter rep)
        {
            // REVIEW: Should we also save this stuff, for instance, in some portion of the
            // score command or transform?
            Contracts.AssertValue(env);
            env.AssertValue(ch);
            ch.AssertValue(schema);

            ArrayDataViewBuilder builder = new ArrayDataViewBuilder(env);

            List <string> rolesList       = new List <string>();
            List <string> columnNamesList = new List <string>();

            // OrderBy is stable, so there is no danger in it "reordering" columns
            // when a role is filled by multiple columns.
            foreach (var role in schema.GetColumnRoleNames().OrderBy(r => r.Key.Value))
            {
                rolesList.Add(role.Key.Value);
                columnNamesList.Add(role.Value);
            }
            builder.AddColumn("Role", rolesList.ToArray());
            builder.AddColumn("Column", columnNamesList.ToArray());

            using (var entry = rep.CreateEntry(DirTrainingInfo, RoleMappingFile))
            {
                // REVIEW: It seems very important that we have the role mappings
                // be easily human interpretable and even manipulable, but relying on the
                // text saver/loader means that special characters like '\n' won't be reinterpretable.
                // On the other hand, no one is such a big lunatic that they will actually
                // ever go ahead and do something so stupid as that.
                var saver = new TextSaver(env, new TextSaver.Arguments()
                {
                    Dense = true, Silent = true
                });
                var view = builder.GetDataView();
                saver.SaveData(entry.Stream, view, Utils.GetIdentityPermutation(view.Schema.ColumnCount));
            }
        }
        /// <summary>
        /// Saves the dataframe in a stream as text format.
        /// </summary>
        /// <param name="filename">filename</param>
        /// <param name="sep">column separator</param>
        /// <param name="header">add header</param>
        /// <param name="silent">Suppress any info output (not warnings or errors)</param>
        public static void ViewToCsv(IDataView view, Stream st, string sep = ",", bool header = true,
                                     bool silent = false, IHost host = null)
        {
            IHostEnvironment env = host;

            if (env == null)
            {
                env = new ConsoleEnvironment();
            }
            var saver = new TextSaver(env, new TextSaver.Arguments()
            {
                Separator    = sep,
                OutputSchema = false,
                OutputHeader = header,
                Silent       = silent
            });
            var columns = new int[view.Schema.Count];

            for (int i = 0; i < columns.Length; ++i)
            {
                columns[i] = i;
            }
            saver.SaveData(st, view, columns);
        }