예제 #1
0
        /// <summary>
        /// Generates and returns unique names for columns source. Each element of the returned array is
        /// an array of unique source names per specific column.
        /// </summary>
        public static string[][] GenerateUniqueSourceNames(IHostEnvironment env, ManyToOneColumn[] columns, Schema schema)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(columns, nameof(columns));
            env.CheckValue(schema, nameof(schema));

            string[][] uniqueNames = new string[columns.Length][];
            int tmp = 0;
            for (int iinfo = 0; iinfo < columns.Length; iinfo++)
            {
                var col = columns[iinfo];
                env.CheckUserArg(col != null, nameof(WordHashBagProducingTransformer.Arguments.Column));
                env.CheckUserArg(!string.IsNullOrWhiteSpace(col.Name), nameof(col.Name));
                env.CheckUserArg(Utils.Size(col.Source) > 0 &&
                              col.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(col.Source));

                int srcCount = col.Source.Length;
                uniqueNames[iinfo] = new string[srcCount];
                for (int isrc = 0; isrc < srcCount; isrc++)
                {
                    string tmpColName;
                    for (; ; )
                    {
                        tmpColName = string.Format("_tmp{0:000}", tmp++);
                        int index;
                        if (!schema.TryGetColumnIndex(tmpColName, out index))
                            break;
                    }

                    uniqueNames[iinfo][isrc] = tmpColName;
                }
            }

            return uniqueNames;
        }
예제 #2
0
        /// <summary>
        /// Drop rows where any column in <paramref name="columns"/> contains a missing value.
        /// </summary>
        /// <param name="input">The input data.</param>
        /// <param name="columns">Name of the columns to filter on. If a row is has a missing value in any of
        /// these columns, it will be dropped from the dataset.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[FilterRowsByMissingValues](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public IDataView FilterRowsByMissingValues(IDataView input, params string[] columns)
        {
            _env.CheckValue(input, nameof(input));
            _env.CheckUserArg(Utils.Size(columns) > 0, nameof(columns));

            return(new NAFilter(_env, input, complement: false, columns));
        }
예제 #3
0
        public static IDataView ApplyConcatOnSources(IHostEnvironment env, ManyToOneColumn[] columns, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(columns, nameof(columns));
            env.CheckValue(input, nameof(input));

            IDataView view          = input;
            var       concatColumns = new List <ColumnConcatenatingTransformer.ColumnInfo>();

            foreach (var col in columns)
            {
                env.CheckUserArg(col != null, nameof(WordBagBuildingTransformer.Options.Columns));
                env.CheckUserArg(!string.IsNullOrWhiteSpace(col.Name), nameof(col.Name));
                env.CheckUserArg(Utils.Size(col.Source) > 0, nameof(col.Source));
                env.CheckUserArg(col.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(col.Source));
                if (col.Source.Length > 1)
                {
                    concatColumns.Add(new ColumnConcatenatingTransformer.ColumnInfo(col.Name, col.Source));
                }
            }
            if (concatColumns.Count > 0)
            {
                return(new ColumnConcatenatingTransformer(env, concatColumns.ToArray()).Transform(view));
            }

            return(view);
        }
예제 #4
0
        public static SkipTakeFilter Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(args, nameof(args));
            long skip = args.Skip ?? Arguments.DefaultSkip;
            long take = args.Take ?? Arguments.DefaultTake;

            env.CheckUserArg(skip >= 0, nameof(args.Skip), "should be non-negative");
            env.CheckUserArg(take >= 0, nameof(args.Take), "should be non-negative");
            return(new SkipTakeFilter(skip, take, env, input));
        }
        internal static Session GetSession(IHostEnvironment env, string modelPath, bool metaGraph = false)
        {
            Contracts.Check(env != null, nameof(env));
            if (IsSavedModel(env, modelPath))
            {
                env.CheckUserArg(Directory.Exists(modelPath), nameof(modelPath));
                return(LoadTFSession(env, modelPath));
            }

            env.CheckUserArg(File.Exists(modelPath), nameof(modelPath));
            return(LoadTFSessionByModelFilePath(env, modelPath, metaGraph));
        }
예제 #6
0
        internal static TFSession GetSession(IHostEnvironment env, string modelPath)
        {
            Contracts.Check(env != null, nameof(env));
            if (IsSavedModel(env, modelPath))
            {
                env.CheckUserArg(Directory.Exists(modelPath), nameof(modelPath));
                return(LoadTFSession(env, modelPath));
            }

            env.CheckUserArg(File.Exists(modelPath), nameof(modelPath));
            var bytes = File.ReadAllBytes(modelPath);

            return(LoadTFSession(env, bytes, modelPath));
        }
예제 #7
0
        private static void CheckRootCauseInput(IHostEnvironment host, RootCauseLocalizationInput src)
        {
            host.CheckUserArg(src.Slices.Count >= 1, nameof(src.Slices), "Must has more than one item");

            bool containsAnomalyTimestamp = false;

            foreach (MetricSlice slice in src.Slices)
            {
                if (slice.TimeStamp.Equals(src.AnomalyTimestamp))
                {
                    containsAnomalyTimestamp = true;
                }
            }
            host.CheckUserArg(containsAnomalyTimestamp, nameof(src.Slices), "Has no points in the given anomaly timestamp");
        }
예제 #8
0
 public static SkipTakeFilter Create(IHostEnvironment env, SkipOptions options, IDataView input)
 {
     Contracts.CheckValue(env, nameof(env));
     env.CheckValue(options, nameof(options));
     env.CheckUserArg(options.Count >= 0, nameof(options.Count), "should be non-negative");
     return(new SkipTakeFilter(options.Count, Options.DefaultTake, env, input));
 }
예제 #9
0
        IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx, IDataScorerTransform scorer)
        {
            sourceCtx = input;
            Contracts.CheckValue(env, "env");
            env.CheckValue(args, "args");
            env.CheckValue(input, "input");
            env.CheckUserArg(!string.IsNullOrWhiteSpace(args.taggedPredictor), "taggedPredictor",
                             "The input tag is required.");

            if (scorer != null)
            {
                _scorer = scorer;
                return(scorer);
            }
            else
            {
                var    predictor = TagHelper.GetTaggedPredictor(env, input, args.taggedPredictor);
                string feat      = TrainUtils.MatchNameOrDefaultOrNull(env, input.Schema,
                                                                       "featureColumn", args.featureColumn, DefaultColumnNames.Features);
                string group = TrainUtils.MatchNameOrDefaultOrNull(env, input.Schema,
                                                                   "groupColumn", args.groupColumn, DefaultColumnNames.GroupId);
                var customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.customColumnPair);

                _scorer = PredictorHelper.CreateDefaultScorer(_host, input, feat, group, predictor);
                return(_scorer);
            }
        }
        /// <summary>
        /// Outputs an ordered list of <see cref="RootCause"/>s. The order corresponds to which prepared cause is most likely to be the root cause.
        /// </summary>
        /// <param name="catalog">The anomaly detection catalog.</param>
        /// <param name="src">Root cause's input. The data is an instance of <see cref="Microsoft.ML.TimeSeries.RootCauseLocalizationInput"/>.</param>
        /// <param name="beta">Beta is a weight parameter for user to choose. It is used when score is calculated for each root cause item. The range of beta should be in [0,1]. For a larger beta, root cause point which has a large difference between value and expected value will get a high score. On the contrary, for a small beta, root cause items which has a high relative change will get a high score.</param>
        /// <param name="rootCauseThreshold">A threshold to determine whether the point should be root cause. The range of this threshold should be in [0,1].
        /// If the point's delta is equal to or larger than rootCauseThreshold multiplied by anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different results. Users can choose the delta according to their data and requirments.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LocalizeRootCauseMultipleDimensions](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseMultipleDimensions.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static List <RootCause> LocalizeRootCauses(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5, double rootCauseThreshold = 0.95)
        {
            IHostEnvironment host = CatalogUtils.GetEnvironment(catalog);

            //check the root cause input
            CheckRootCauseInput(host, src);

            //check parameters
            host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]");
            host.CheckUserArg(rootCauseThreshold >= 0 && rootCauseThreshold <= 1, nameof(rootCauseThreshold), "Must be in [0,1]");

            //find out the possible causes
            RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta, rootCauseThreshold);

            return(analyzer.AnalyzePossibleCauses());
        }
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(args, nameof(args));
            env.CheckValue(input, nameof(input));
            env.CheckUserArg(args.Trainer.IsGood(), nameof(args.Trainer),
                             "Trainer cannot be null. If your model is already trained, please use ScoreTransform instead.");

            var host = env.Register("TrainAndScoreTransform");

            using (var ch = host.Start("Train"))
            {
                ch.Trace("Constructing trainer");
                ITrainer trainer    = args.Trainer.CreateInstance(host);
                var      customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn);
                string   feat;
                string   group;
                var      data      = CreateDataFromArgs(ch, input, args, out feat, out group);
                var      predictor = TrainUtils.Train(host, ch, data, trainer, args.Trainer.Kind, null,
                                                      args.Calibrator, args.MaxCalibrationExamples, null);

                ch.Done();

                return(ScoreUtils.GetScorer(args.Scorer, predictor, input, feat, group, customCols, env, data.Schema));
            }
        }
예제 #12
0
 public static SkipTakeFilter Create(IHostEnvironment env, TakeArguments args, IDataView input)
 {
     Contracts.CheckValue(env, nameof(env));
     env.CheckValue(args, nameof(args));
     env.CheckUserArg(args.Count >= 0, nameof(args.Count), "should be non-negative");
     return(new SkipTakeFilter(Arguments.DefaultSkip, args.Count, env, input));
 }
예제 #13
0
        private static TFSession CheckFileAndRead(IHostEnvironment env, string modelFile)
        {
            env.CheckNonWhiteSpace(modelFile, nameof(modelFile));
            env.CheckUserArg(File.Exists(modelFile), nameof(modelFile));
            var bytes = File.ReadAllBytes(modelFile);

            return(TensorFlowUtils.LoadTFSession(env, bytes, modelFile));
        }
예제 #14
0
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(args, nameof(args));
            env.CheckUserArg(args.Trainer.IsGood(), nameof(args.Trainer),
                             "Trainer cannot be null. If your model is already trained, please use ScoreTransform instead.");
            env.CheckValue(input, nameof(input));

            return(Create(env, args, args.Trainer.CreateInstance(env), input, null));
        }
예제 #15
0
        // Factory method for SignatureDataTransform.
        internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(options, nameof(options));
            env.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns));

            var estimator = new CountTargetEncodingEstimator(env, options);

            return((estimator.Fit(input) as ITransformerWithDifferentMappingAtTrainingTime).TransformForTrainingPipeline(input) as IDataTransform);
        }
예제 #16
0
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, IComponentFactory <IPredictor, ISchemaBindableMapper> mapperFactory)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(args, nameof(args));
            env.CheckUserArg(args.Trainer.IsGood(), nameof(args.Trainer),
                             "Trainer cannot be null. If your model is already trained, please use ScoreTransform instead.");
            env.CheckValue(input, nameof(input));
            env.CheckValueOrNull(mapperFactory);

            return(Create(env, args, args.Trainer.CreateInstance(env), input, mapperFactory));
        }
        public static IEstimator <ITransformer> GetConcatEstimator(IHostEnvironment env, ManyToOneColumn[] columns)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(columns, nameof(columns));

            var estimator = new EstimatorChain <ITransformer>();

            foreach (var col in columns)
            {
                env.CheckUserArg(col != null, nameof(WordBagBuildingTransformer.Options.Columns));
                env.CheckUserArg(!string.IsNullOrWhiteSpace(col.Name), nameof(col.Name));
                env.CheckUserArg(Utils.Size(col.Source) > 0, nameof(col.Source));
                env.CheckUserArg(col.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(col.Source));
                if (col.Source.Length > 1)
                {
                    estimator = estimator.Append <ITransformer>(new ColumnConcatenatingEstimator(env, col.Name, col.Source));
                }
            }
            return(estimator);
        }
예제 #18
0
        public TypeName(IHostEnvironment env, float p, int foo)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckParam(0 <= p && p <= 1, nameof(p), "Should be in range [0,1]");
            env.CheckParam(0 <= p && p <= 1, "p");                   // Should fail.
            env.CheckParam(0 <= p && p <= 1, nameof(p) + nameof(p)); // Should fail.
            env.CheckValue(paramName: nameof(p), val: "p");          // Should succeed despite confusing order.
            env.CheckValue(paramName: "p", val: nameof(p));          // Should fail despite confusing order.
            env.CheckValue("p", nameof(p));
            env.CheckUserArg(foo > 5, "foo", "Nice");
            env.CheckUserArg(foo > 5, nameof(foo), "Nice");
            env.Except();                                           // Not throwing or doing anything with the exception, so should fail.
            Contracts.ExceptParam(nameof(env), "What a silly env"); // Should also fail.
            if (false)
            {
                throw env.Except(); // Should not fail.
            }
            if (false)
            {
                throw env.ExceptParam(nameof(env), "What a silly env"); // Should not fail.
            }
            if (false)
            {
                throw env.ExceptParam("env", "What a silly env"); // Should fail due to name error.
            }
            var e = env.Except();

            env.Check(true, $"Hello {foo} is cool");
            env.Check(true, "Hello it is cool");
            string coolMessage = "Hello it is cool";

            env.Check(true, coolMessage);
            env.Check(true, string.Format("Hello {0} is cool", foo));
            env.Check(true, Messages.CoolMessage);
            env.CheckDecode(true, "Not suspicious, no ModelLoadContext");
            Contracts.Check(true, "Fine: " + nameof(env));
            Contracts.Check(true, "Less fine: " + env.GetType().Name);
            Contracts.CheckUserArg(0 <= p && p <= 1,
                                   "p", "On a new line");
        }
예제 #19
0
        public static IDataView ApplyConcatOnSources(IHostEnvironment env, ManyToOneColumn[] columns, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(columns, nameof(columns));
            env.CheckValue(input, nameof(input));

            IDataView view       = input;
            var       concatCols = new List <ConcatTransform.Column>();

            foreach (var col in columns)
            {
                env.CheckUserArg(col != null, nameof(WordBagTransform.Arguments.Column));
                env.CheckUserArg(!string.IsNullOrWhiteSpace(col.Name), nameof(col.Name));
                env.CheckUserArg(Utils.Size(col.Source) > 0, nameof(col.Source));
                env.CheckUserArg(col.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(col.Source));

                if (col.Source.Length > 1)
                {
                    concatCols.Add(
                        new ConcatTransform.Column
                    {
                        Source = col.Source,
                        Name   = col.Name
                    });
                }
            }
            if (concatCols.Count > 0)
            {
                var concatArgs = new ConcatTransform.Arguments {
                    Column = concatCols.ToArray()
                };
                return(new ConcatTransform(env, concatArgs, view));
            }

            return(view);
        }
예제 #20
0
        /// <summary>
        /// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm.
        /// </summary>
        /// <param name="catalog">The anomaly detection catalog.</param>
        /// <param name="src">Root cause's input. The data is an instance of <see cref="Microsoft.ML.TimeSeries.RootCauseLocalizationInput"/>.</param>
        /// <param name="beta">Beta is a weight parameter for user to choose. It is used when score is calculated for each root cause item. The range of beta should be in [0,1]. For a larger beta, root cause point which has a large difference between value and expected value will get a high score. On the contrary, for a small beta, root cause items which has a high relative change will get a high score.</param>
        /// <param name="anomalyDeltaThreshold">A threshold to determine whether the point should be root cause. If the point's delta is equal to or larger than anomalyDeltaThreshold multiplies anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different result. Users can choose the delta according to their data and requirment. </param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LocalizeRootCause](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5, double anomalyDeltaThreshold = 0.95)
        {
            IHostEnvironment host = CatalogUtils.GetEnvironment(catalog);

            //check the root cause input
            CheckRootCauseInput(host, src);

            //check beta
            host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]");

            //find out the root cause
            RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta, anomalyDeltaThreshold);
            RootCause         dst      = analyzer.Analyze();

            return(dst);
        }
            protected ImplBase(IHostEnvironment env, TOptions options, string name)
            {
                Contracts.CheckValue(env, nameof(env));

                // Note that env may be null here, which is OK since the CheckXxx methods are extension
                // methods designed to allow null.
                env.CheckValue(options, nameof(options));

                env.CheckUserArg(!(options.Parallel < 0), nameof(options.Parallel), "Degree of parallelism must be non-negative (or null)");

                // Capture the environment options from args.
                env = env.Register(name, options.RandomSeed, options.Verbose);

                env.CheckNonWhiteSpace(name, nameof(name));
                Host           = env.Register(name);
                ImplOptions    = options;
                _serverFactory = options.Server;
                Utils.CheckOptionalUserDirectory(options.OutputModelFile, nameof(options.OutputModelFile));
            }
예제 #22
0
            /// <summary>
            /// The degree of concurrency is passed in the conc parameter. If it is null, the value
            /// of args.parralel is used. If that is null, zero is used (which means "automatic").
            /// </summary>
            protected ImplBase(IHostEnvironment env, TArgs args, string name, int?conc = null)
            {
                Contracts.CheckValue(env, nameof(env));

                // Note that env may be null here, which is OK since the CheckXxx methods are extension
                // methods designed to allow null.
                env.CheckValue(args, nameof(args));
                env.CheckParam(conc == null || conc >= 0, nameof(conc), "Degree of concurrency must be non-negative (or null)");

                conc = conc ?? args.Parallel;
                env.CheckUserArg(!(conc < 0), nameof(args.Parallel), "Degree of parallelism must be non-negative (or null)");

                // Capture the environment options from args.
                env = env.Register(name, args.RandomSeed, args.Verbose, conc);

                env.CheckNonWhiteSpace(name, nameof(name));
                Host           = env.Register(name);
                Args           = args;
                _serverFactory = args.Server;
                Utils.CheckOptionalUserDirectory(args.OutputModelFile, nameof(args.OutputModelFile));
            }
예제 #23
0
        public NelderMeadSweeper(IHostEnvironment env, Arguments args)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckUserArg(-1 < args.DeltaInsideContraction, nameof(args.DeltaInsideContraction), "Must be greater than -1");
            env.CheckUserArg(args.DeltaInsideContraction < 0, nameof(args.DeltaInsideContraction), "Must be less than 0");
            env.CheckUserArg(0 < args.DeltaOutsideContraction, nameof(args.DeltaOutsideContraction), "Must be greater than 0");
            env.CheckUserArg(args.DeltaReflection > args.DeltaOutsideContraction, nameof(args.DeltaReflection), "Must be greater than " + nameof(args.DeltaOutsideContraction));
            env.CheckUserArg(args.DeltaExpansion > args.DeltaReflection, nameof(args.DeltaExpansion), "Must be greater than " + nameof(args.DeltaReflection));
            env.CheckUserArg(0 < args.GammaShrink && args.GammaShrink < 1, nameof(args.GammaShrink), "Must be between 0 and 1");
            env.CheckValue(args.FirstBatchSweeper, nameof(args.FirstBatchSweeper), "First Batch Sweeper Contains Null Value");

            _args = args;

            _sweepParameters = new List <IValueGenerator>();
            foreach (var sweptParameter in args.SweptParameters)
            {
                var parameter = sweptParameter.CreateComponent(env);
                // REVIEW: ideas about how to support discrete values:
                // 1. assign each discrete value a random number (1-n) to make mirroring possible
                // 2. each time we need to mirror a discrete value, sample from the remaining value
                // 2.1. make the sampling non-uniform by learning "weights" for the different discrete values based on
                // the metric values that we get when using them. (For example, if, for a given discrete value, we get a bad result,
                // we lower its weight, but if we get a good result we increase its weight).
                var parameterNumeric = parameter as INumericValueGenerator;
                env.CheckUserArg(parameterNumeric != null, nameof(args.SweptParameters), "Nelder-Mead sweeper can only sweep over numeric parameters");
                _sweepParameters.Add(parameterNumeric);
            }

            _initSweeper = args.FirstBatchSweeper.CreateComponent(env, _sweepParameters.ToArray());
            _dim         = _sweepParameters.Count;
            env.CheckUserArg(_dim > 1, nameof(args.SweptParameters), "Nelder-Mead sweeper needs at least two parameters to sweep over.");

            _simplexVertices           = new SortedList <IRunResult, Float[]>(new SimplexVertexComparer());
            _stage                     = OptimizationStage.NeedReflectionPoint;
            _pendingSweeps             = new List <KeyValuePair <ParameterSet, Float[]> >();
            _pendingSweepsNotSubmitted = new Queue <KeyValuePair <ParameterSet, Float[]> >();
        }
예제 #24
0
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckUserArg(!string.IsNullOrWhiteSpace(args.InputModelFile), nameof(args.InputModelFile), "The input model file is required.");

            IPredictor       predictor;
            RoleMappedSchema trainSchema = null;

            using (var file = env.OpenInputFile(args.InputModelFile))
                using (var strm = file.OpenReadStream())
                    using (var rep = RepositoryReader.Open(strm, env))
                    {
                        ModelLoadContext.LoadModel <IPredictor, SignatureLoadModel>(env, out predictor, rep, ModelFileUtils.DirPredictor);
                        trainSchema = ModelFileUtils.LoadRoleMappedSchemaOrNull(env, rep);
                    }

            string feat = TrainUtils.MatchNameOrDefaultOrNull(env, input.Schema,
                                                              nameof(args.FeatureColumn), args.FeatureColumn, DefaultColumnNames.Features);
            string group = TrainUtils.MatchNameOrDefaultOrNull(env, input.Schema,
                                                               nameof(args.GroupColumn), args.GroupColumn, DefaultColumnNames.GroupId);
            var customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn);

            return(ScoreUtils.GetScorer(args.Scorer, predictor, input, feat, group, customCols, env, trainSchema));
        }
예제 #25
0
        protected override IDataTransform Create(IHostEnvironment env, PredictTransform.Arguments args_, IDataView input, out IDataView sourceCtx, IPredictor overwritePredictor)
        {
            Contracts.CheckValue(env, "env");
            env.CheckValue(args_, "args_");
            var args = args_ as Arguments;

            env.CheckValue(args, "args");
            env.CheckValue(input, "input");

            IPredictor predictor;

            if (overwritePredictor == null)
            {
                env.CheckUserArg(!string.IsNullOrWhiteSpace(args.taggedPredictor), "taggedPredictor",
                                 "The input tag is required.");
                predictor = TagHelper.GetTaggedPredictor(env, input, args.taggedPredictor);
            }
            else
            {
                predictor = overwritePredictor;
            }

            return(base.Create(env, args, input, out sourceCtx, predictor));
        }
예제 #26
0
        public SumupPerformanceCommand(IHostEnvironment env, Arguments args)
        {
            Contracts.CheckValue(env, nameof(env));

            // Capture the environment options from args.
            env.CheckUserArg(!args.Parallel.HasValue || args.Parallel > 0, nameof(args.Parallel), "If defined must be positive");

            _host = env.Register("FastTreeSumupPerformance", args.RandomSeed, args.Verbose, args.Parallel);
            _host.CheckValue(args, nameof(args));

            _host.CheckUserArg(Enum.IsDefined(typeof(IntArrayType), args.Type) && args.Type != IntArrayType.Current, nameof(args.Type), "Value not defined");
            _host.CheckUserArg(args.Length >= 0, nameof(args.Length), "Must be non-negative");
            _host.CheckUserArg(args.Count >= 0, nameof(args.Count), "Must be non-negative");
            _host.CheckUserArg(args.Bins > 0, nameof(args.Bins), "Must be positive");
            _host.CheckUserArg(args.Seconds > 0, nameof(args.Seconds), "Must be positive");

            _type     = args.Type;
            _len      = args.Length;
            _count    = args.Count;
            _bins     = args.Bins;
            _parallel = args.Parallel ?? Environment.ProcessorCount;
            _param    = args.Parameter;
            _seconds  = args.Seconds;
        }
        /// <summary>
        /// This method detects this predictable interval (or period) by adopting techniques of fourier analysis.
        /// Returns -1 if no such pattern is found, that is, the input values do not follow a seasonal fluctuation.
        /// </summary>
        /// <param name="host">The detect seasonality host environment.</param>
        /// <param name="input">Input DataView.The data is an instance of <see cref="Microsoft.ML.IDataView"/>.</param>
        /// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
        /// <param name="seasonalityWindowSize">An upper bound on the number of values to be considered in the input values.
        /// When set to -1, use the whole input to fit model; when set to a positive integer, use this number as batch size.
        /// Default value is -1.</param>
        /// <param name="randomessThreshold">Randomness threshold, ranging from [0, 1]. It specifies how confidence the input
        /// follows a predictable pattern recurring as seasonal data. By default, it is set as 0.95.
        /// </param>
        /// <returns>The detected period if seasonality period exists, otherwise return -1.</returns>
        public int DetectSeasonality(
            IHostEnvironment host,
            IDataView input,
            string inputColumnName,
            int seasonalityWindowSize,
            double randomessThreshold)
        {
            host.CheckValue(input, nameof(input));
            host.CheckValue(inputColumnName, nameof(inputColumnName));
            host.CheckUserArg(seasonalityWindowSize == -1 || seasonalityWindowSize >= 0, nameof(seasonalityWindowSize));

            var column = input.Schema.GetColumnOrNull(inputColumnName);

            host.CheckUserArg(column.HasValue, nameof(inputColumnName));

            var rowCursor = input.GetRowCursor(new List <DataViewSchema.Column>()
            {
                column.Value
            });
            var valueDelegate = rowCursor.GetGetter <double>(column.Value);

            int    length     = 0;
            double valueRef   = 0;
            var    valueCache = seasonalityWindowSize == -1 ? new List <double>() : new List <double>(seasonalityWindowSize);

            while (rowCursor.MoveNext())
            {
                valueDelegate.Invoke(ref valueRef);
                length++;
                valueCache.Add(valueRef);
                if (seasonalityWindowSize != -1 && length >= seasonalityWindowSize)
                {
                    break;
                }
            }

            double[] fftRe   = new double[length];
            double[] fftIm   = new double[length];
            double[] inputRe = valueCache.ToArray();

            FftUtils.ComputeForwardFft(inputRe, Enumerable.Repeat(0.0, length).ToArray(), fftRe, fftIm, length);

            var energies = fftRe.Select((m, i) => new Complex(m, fftIm[i])).ToArray();

            /* Periodogram indicates the square of "energy" on the  frequency domain.
             * Specifically, periodogram[j] = a[j]^2+b[j]^2, where a and b are Fourier Coefficients for cosine and sine,
             * x(t) = a0+sum(a[j]cos(2Pi * f[j]t)+b[j]sin(2Pi * f[j]t)
             */
            var periodogram = energies.Select((t, i) => t * Complex.Conjugate(t)).ToArray();

            FindBestTwoFrequencies(periodogram, length, out var bestFreq, out var secondFreq);

            double[] ifftRe = new double[length];
            double[] ifftIm = new double[length];
            FftUtils.ComputeBackwardFft(
                periodogram.Select(t => t.Real).ToArray(),
                periodogram.Select(t => t.Imaginary).ToArray(),
                ifftRe,
                ifftIm,
                length);
            var values = ifftRe.Select((t, i) => new Complex(t, ifftIm[i])).ToArray();

            int period = FindActualPeriod(values, bestFreq, secondFreq, length, randomessThreshold);

            return(period < 0 ? -1 : period);
        }
 private static byte[] CheckFileAndRead(IHostEnvironment env, string modelFile)
 {
     env.CheckNonWhiteSpace(modelFile, nameof(modelFile));
     env.CheckUserArg(File.Exists(modelFile), nameof(modelFile));
     return(File.ReadAllBytes(modelFile));
 }