private static VBuffer <Float>[] GaussianMatrix(int k, int d, int seed)
        {
            var rv  = Zeros(k, d);
            var rng = new SysRandom(seed);

            // REVIEW: use a faster Gaussian random matrix generator
            //MKL has a fast vectorized random number generation.
            for (var i = 0; i < k; ++i)
            {
                for (var j = 0; j < d; ++j)
                {
                    rv[i].Values[j] = (Float)Stats.SampleFromGaussian(rng); // not fast for large matrix generation
                }
            }
            return(rv);
        }
        /// <summary>
        /// Draws n random integers in [0, N-1].
        /// They can be distinct or not.
        /// The function is not efficient if n is close to N and distinct is true.
        /// </summary>
        public static int[] RandomIntegers(int n, int N, bool distinct = false, IRandom rand = null)
        {
            var res = new int[n];

            if (rand == null)
            {
                rand = new SysRandom();
            }
            if (distinct)
            {
                if (n > N)
                {
                    throw new DataValueError($"Cannot draw more than {N} distinct values.");
                }
                var hash = new HashSet <int>();
                int nb   = 0;
                int i;
                while (nb < n)
                {
                    i = rand.Next(N);
                    if (hash.Contains(i))
                    {
                        continue;
                    }
                    hash.Add(i);
                    res[nb] = i;
                    ++nb;
                }
            }
            else
            {
                for (int i = 0; i < n; ++i)
                {
                    res[i] = rand.Next(N);
                }
            }
            return(res);
        }
Esempio n. 3
0
        /// <summary>
        /// Sample child configuration from configuration centered at parent, using fitness proportional mutation.
        /// </summary>
        /// <param name="parent">Starting parent configuration (used as mean in multivariate Gaussian).</param>
        /// <param name="fitness">Numeric value indicating how good a configuration parent is.</param>
        /// <param name="n">Count of how many items currently in history.</param>
        /// <param name="previousRuns">Run history.</param>
        /// <param name="rMean">Mean metric value of previous random runs.</param>
        /// <param name="rVar">Metric value empirical variance of previous random runs.</param>
        /// <param name="isMetricMaximizing">Flag for if we are minimizing or maximizing values.</param>
        /// <returns>A mutated version of parent (i.e., point sampled near parent).</returns>
        private ParameterSet SampleChild(ParameterSet parent, double fitness, int n, IRunResult[] previousRuns, double rMean, double rVar, bool isMetricMaximizing)
        {
            Float[]       child = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, parent, false);
            List <int>    numericParamIndices = new List <int>();
            List <double> numericParamValues  = new List <double>();
            int           loopCount           = 0;

            // Interleave uniform random samples, according to proportion defined.
            if (_spu.SampleUniform() <= _args.ProportionRandom)
            {
                ParameterSet ps = _randomSweeper.ProposeSweeps(1)[0];
                _randomParamSets.Add(ps);
                return(ps);
            }

            do
            {
                for (int i = 0; i < _sweepParameters.Length; i++)
                {
                    // This allows us to query possible values of this parameter.
                    var sweepParam = _sweepParameters[i];

                    if (sweepParam is DiscreteValueGenerator parameterDiscrete)
                    {
                        // Sample categorical parameter.
                        double[] categoryWeights = _args.LegacyDpBehavior
                            ? CategoriesToWeightsOld(parameterDiscrete, previousRuns)
                            : CategoriesToWeights(parameterDiscrete, previousRuns);
                        child[i] = SampleCategoricalDist(1, categoryWeights)[0];
                    }
                    else
                    {
                        var parameterNumeric = sweepParam as INumericValueGenerator;
                        _host.Check(parameterNumeric != null, "KDO sweeper can only sweep over discrete and numeric parameters");
                        numericParamIndices.Add(i);
                        numericParamValues.Add(child[i]);
                    }
                }

                if (numericParamIndices.Count > 0)
                {
                    if (!_args.Beta)
                    {
                        // Sample point from multivariate Gaussian, centered on parent values, with mutation proportional to fitness.
                        double[] mu           = numericParamValues.ToArray();
                        double   correctedVal = isMetricMaximizing
                            ? 1.0 - SweeperProbabilityUtils.NormalCdf(fitness, rMean, rVar)
                            : 1.0 - SweeperProbabilityUtils.NormalCdf(2 * rMean - fitness, rMean, rVar);
                        double     bandwidthScale  = Math.Max(_args.MinimumMutationSpread, correctedVal);
                        double[]   stddevs         = Enumerable.Repeat(_args.Simple ? 0.2 : bandwidthScale, mu.Length).ToArray();
                        double[][] bandwidthMatrix = BuildBandwidthMatrix(n, stddevs);
                        double[]   sampledPoint    = SampleDiagonalCovMultivariateGaussian(1, mu, bandwidthMatrix)[0];
                        for (int j = 0; j < sampledPoint.Length; j++)
                        {
                            child[numericParamIndices[j]] = (Float)Corral(sampledPoint[j]);
                        }
                    }
                    else
                    {
                        // If Beta flag set, sample from independent Beta distributions instead.
                        SysRandom rng   = new SysRandom();
                        double    alpha = 1 + 15 * fitness;
                        foreach (int index in numericParamIndices)
                        {
                            const double epsCutoff = 1e-10;
                            double       eps       = Math.Min(Math.Max(child[index], epsCutoff), 1 - epsCutoff);
                            double       beta      = alpha / eps - alpha;
                            child[index] = (Float)Stats.SampleFromBeta(rng, alpha, beta);
                        }
                    }
                }

                // Don't get stuck at local point.
                loopCount++;
                if (loopCount > 10)
                {
                    return(_randomSweeper.ProposeSweeps(1, null)[0]);
                }
            } while (_alreadySeenConfigs.Contains(child));

            _alreadySeenConfigs.Add(child);
            return(SweeperProbabilityUtils.FloatArrayAsParameterSet(_host, _sweepParameters, child, false));
        }
Esempio n. 4
0
        public void FactoryExampleTest()
        {
            var    dataset          = TestDatasets.adultText;
            string dataFilename     = GetDataPath(dataset.trainFilename);
            string testDataFilename = GetDataPath(dataset.testFilename);

            ///*********  Training a model *******//
            string       modelFilename = Path.GetTempFileName();
            TLCArguments cmd           = new TLCArguments();

            Assert.True(CmdParser.ParseArguments(dataset.extraSettings, cmd));
            cmd.command           = Command.Train;
            cmd.modelfile         = modelFilename;
            cmd.datafile          = dataFilename;
            cmd.instancesSettings = dataset.settings;
            cmd.classifierName    = TestLearners.linearSVM.Trainer;
            RunExperiments.Run(cmd);

            // Load and make predictions with a previously saved model.

            IDataModel dataModel;
            IDataStats dataStats;
            var        predictor = (IDistributionPredictor <Instance, Float, Float>)PredictorUtils.LoadPredictor(
                out dataModel, out dataStats, modelFilename);
            var instanceFactory = ReflectionUtilsOld.CreateInstanceOld <IInstanceFactory, SignatureInstances>(
                cmd.instancesClass, cmd.instancesSettings, null, dataModel);

            bool         headerSkip    = true;
            List <Float> outputs       = new List <Float>();
            List <Float> probabilities = new List <Float>();

            using (StreamReader reader = new StreamReader(testDataFilename))
            {
                List <string> features = new List <string>();
                string        text;
                long          line = 0;
                while ((text = reader.ReadLine()) != null)
                {
                    ++line;
                    if (string.IsNullOrWhiteSpace(text))
                    {
                        continue;
                    }

                    string[] cols = text.Split(',');
                    Assert.True(cols.Length == 15);

                    if (headerSkip)
                    {
                        // skip header line
                        headerSkip = false;
                        continue;
                    }

                    features.Clear();
                    // Add in the "max dimensionality"
                    features.Add("15");
                    for (int col = 0; col < cols.Length; col++)
                    {
                        string s = cols[col].Trim();
                        switch (col)
                        {
                        case 0:
                        case 2:
                        case 4:
                        case 10:
                        case 11:
                        case 12:
                        case 14:
                            // numeric feature or label -- add if non-zero
                            Float val = InstancesUtils.FloatParse(s);
                            if (val == 0) // Beware of NaNs - they should be recorded!
                            {
                                continue;
                            }
                            break;
                        }
                        features.Add(col + ":" + s);
                    }

                    Instance instance = instanceFactory.ProduceInstance(line, features.ToArray());
                    Float    rawOutput, probability;
                    probability = predictor.PredictDistribution(instance, out rawOutput);
                    outputs.Add(rawOutput);
                    probabilities.Add(probability);
                }
            }

            List <Float> originalOutputs       = new List <Float>();
            List <Float> originalProbabilities = new List <Float>();
            var          env       = new LocalEnvironment(SysRandom.Wrap(RunExperiments.GetRandom(cmd)));
            Instances    instances = RunExperiments.CreateTestData(cmd, testDataFilename, dataModel, null, env);

            foreach (Instance instance in instances)
            {
                Float rawOutput, probability;
                probability = predictor.PredictDistribution(instance, out rawOutput);
                originalOutputs.Add(rawOutput);
                originalProbabilities.Add(probability);
            }

            CollectionAssert.Equal(outputs, originalOutputs);
            CollectionAssert.Equal(probabilities, originalProbabilities);

            File.Delete(modelFilename);

            Done();
        }