Esempio n. 1
0
        /// <summary>
        /// Create method corresponding to SignatureDataTransform.
        /// </summary>
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(RegistrationName);

            host.CheckValue(args, nameof(args));
            host.CheckValue(input, nameof(input));
            args.Check(host);

            var scores = default(VBuffer <Single>);

            TrainCore(host, input, args, ref scores);

            using (var ch = host.Start("Dropping Slots"))
            {
                int selectedCount;
                var column = CreateDropSlotsColumn(args, ref scores, out selectedCount);

                if (column == null)
                {
                    ch.Info("No features are being dropped.");
                    return(NopTransform.CreateIfNeeded(host, input));
                }

                ch.Info(MessageSensitivity.Schema, "Selected {0} slots out of {1} in column '{2}'", selectedCount, scores.Length, args.FeatureColumn);

                var dsArgs = new DropSlotsTransform.Arguments();
                dsArgs.Column = new[] { column };
                return(new DropSlotsTransform(host, dsArgs, input));
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Create method corresponding to SignatureDataTransform.
        /// </summary>
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(RegistrationName);

            host.CheckValue(args, nameof(args));
            host.CheckValue(input, nameof(input));
            host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column));
            host.CheckUserArg(args.SlotsInOutput > 0, nameof(args.SlotsInOutput));
            host.CheckNonWhiteSpace(args.LabelColumn, nameof(args.LabelColumn));
            host.Check(args.NumBins > 1, "numBins must be greater than 1.");

            using (var ch = host.Start("Selecting Slots"))
            {
                ch.Info("Computing mutual information");
                var sw = new Stopwatch();
                sw.Start();
                var colSet = new HashSet <string>();
                foreach (var col in args.Column)
                {
                    if (!colSet.Add(col))
                    {
                        ch.Warning("Column '{0}' specified multiple time.", col);
                    }
                }
                var colArr   = colSet.ToArray();
                var colSizes = new int[colArr.Length];
                var scores   = MutualInformationFeatureSelectionUtils.TrainCore(host, input, args.LabelColumn, colArr,
                                                                                args.NumBins, colSizes);
                sw.Stop();
                ch.Info("Finished mutual information computation in {0}", sw.Elapsed);

                ch.Info("Selecting features to drop");
                var threshold = ComputeThreshold(scores, args.SlotsInOutput, out int tiedScoresToKeep);

                var columns = CreateDropSlotsColumns(colArr, colArr.Length, scores, threshold, tiedScoresToKeep, out int[] selectedCount);

                if (columns.Count <= 0)
                {
                    ch.Info("No features are being dropped.");
                    return(NopTransform.CreateIfNeeded(host, input));
                }

                for (int i = 0; i < selectedCount.Length; i++)
                {
                    ch.Info("Selected {0} slots out of {1} in column '{2}'", selectedCount[i], colSizes[i], colArr[i]);
                }
                ch.Info("Total number of slots selected: {0}", selectedCount.Sum());

                var dsArgs = new DropSlotsTransform.Arguments();
                dsArgs.Column = columns.ToArray();
                var ds = new DropSlotsTransform(host, dsArgs, input);
                ch.Done();
                return(ds);
            }
        }
        /// <summary>
        /// Create method corresponding to SignatureDataTransform.
        /// </summary>
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(RegistrationName);

            host.CheckValue(args, nameof(args));
            host.CheckValue(input, nameof(input));
            host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column));
            host.CheckUserArg(args.Count > 0, nameof(args.Count));

            int[] colSizes;
            var   scores = CountFeatureSelectionUtils.Train(host, input, args.Column, out colSizes);
            var   size   = args.Column.Length;

            using (var ch = host.Start("Dropping Slots"))
            {
                int[] selectedCount;
                var   columns = CreateDropSlotsColumns(args, size, scores, out selectedCount);

                if (columns.Count <= 0)
                {
                    ch.Info("No features are being dropped.");
                    return(NopTransform.CreateIfNeeded(host, input));
                }

                for (int i = 0; i < selectedCount.Length; i++)
                {
                    ch.Info(MessageSensitivity.Schema, "Selected {0} slots out of {1} in column '{2}'", selectedCount[i], colSizes[i], args.Column[i]);
                }
                ch.Info("Total number of slots selected: {0}", selectedCount.Sum());

                var dsArgs = new DropSlotsTransform.Arguments();
                dsArgs.Column = columns.ToArray();
                ch.Done();
                return(new DropSlotsTransform(host, dsArgs, input));
            }
        }
Esempio n. 4
0
 internal override IDataTransform GetPerInstanceMetricsCore(RoleMappedData data)
 {
     return(NopTransform.CreateIfNeeded(Host, data.Data));
 }