private static VBuffer <Float>[] GaussianMatrix(int k, int d, int seed) { var rv = Zeros(k, d); var rng = new SysRandom(seed); // REVIEW: use a faster Gaussian random matrix generator //MKL has a fast vectorized random number generation. for (var i = 0; i < k; ++i) { for (var j = 0; j < d; ++j) { rv[i].Values[j] = (Float)Stats.SampleFromGaussian(rng); // not fast for large matrix generation } } return(rv); }
/// <summary> /// Draws n random integers in [0, N-1]. /// They can be distinct or not. /// The function is not efficient if n is close to N and distinct is true. /// </summary> public static int[] RandomIntegers(int n, int N, bool distinct = false, IRandom rand = null) { var res = new int[n]; if (rand == null) { rand = new SysRandom(); } if (distinct) { if (n > N) { throw new DataValueError($"Cannot draw more than {N} distinct values."); } var hash = new HashSet <int>(); int nb = 0; int i; while (nb < n) { i = rand.Next(N); if (hash.Contains(i)) { continue; } hash.Add(i); res[nb] = i; ++nb; } } else { for (int i = 0; i < n; ++i) { res[i] = rand.Next(N); } } return(res); }
/// <summary> /// Sample child configuration from configuration centered at parent, using fitness proportional mutation. /// </summary> /// <param name="parent">Starting parent configuration (used as mean in multivariate Gaussian).</param> /// <param name="fitness">Numeric value indicating how good a configuration parent is.</param> /// <param name="n">Count of how many items currently in history.</param> /// <param name="previousRuns">Run history.</param> /// <param name="rMean">Mean metric value of previous random runs.</param> /// <param name="rVar">Metric value empirical variance of previous random runs.</param> /// <param name="isMetricMaximizing">Flag for if we are minimizing or maximizing values.</param> /// <returns>A mutated version of parent (i.e., point sampled near parent).</returns> private ParameterSet SampleChild(ParameterSet parent, double fitness, int n, IRunResult[] previousRuns, double rMean, double rVar, bool isMetricMaximizing) { Float[] child = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, parent, false); List <int> numericParamIndices = new List <int>(); List <double> numericParamValues = new List <double>(); int loopCount = 0; // Interleave uniform random samples, according to proportion defined. if (_spu.SampleUniform() <= _args.ProportionRandom) { ParameterSet ps = _randomSweeper.ProposeSweeps(1)[0]; _randomParamSets.Add(ps); return(ps); } do { for (int i = 0; i < _sweepParameters.Length; i++) { // This allows us to query possible values of this parameter. var sweepParam = _sweepParameters[i]; if (sweepParam is DiscreteValueGenerator parameterDiscrete) { // Sample categorical parameter. double[] categoryWeights = _args.LegacyDpBehavior ? CategoriesToWeightsOld(parameterDiscrete, previousRuns) : CategoriesToWeights(parameterDiscrete, previousRuns); child[i] = SampleCategoricalDist(1, categoryWeights)[0]; } else { var parameterNumeric = sweepParam as INumericValueGenerator; _host.Check(parameterNumeric != null, "KDO sweeper can only sweep over discrete and numeric parameters"); numericParamIndices.Add(i); numericParamValues.Add(child[i]); } } if (numericParamIndices.Count > 0) { if (!_args.Beta) { // Sample point from multivariate Gaussian, centered on parent values, with mutation proportional to fitness. double[] mu = numericParamValues.ToArray(); double correctedVal = isMetricMaximizing ? 1.0 - SweeperProbabilityUtils.NormalCdf(fitness, rMean, rVar) : 1.0 - SweeperProbabilityUtils.NormalCdf(2 * rMean - fitness, rMean, rVar); double bandwidthScale = Math.Max(_args.MinimumMutationSpread, correctedVal); double[] stddevs = Enumerable.Repeat(_args.Simple ? 0.2 : bandwidthScale, mu.Length).ToArray(); double[][] bandwidthMatrix = BuildBandwidthMatrix(n, stddevs); double[] sampledPoint = SampleDiagonalCovMultivariateGaussian(1, mu, bandwidthMatrix)[0]; for (int j = 0; j < sampledPoint.Length; j++) { child[numericParamIndices[j]] = (Float)Corral(sampledPoint[j]); } } else { // If Beta flag set, sample from independent Beta distributions instead. SysRandom rng = new SysRandom(); double alpha = 1 + 15 * fitness; foreach (int index in numericParamIndices) { const double epsCutoff = 1e-10; double eps = Math.Min(Math.Max(child[index], epsCutoff), 1 - epsCutoff); double beta = alpha / eps - alpha; child[index] = (Float)Stats.SampleFromBeta(rng, alpha, beta); } } } // Don't get stuck at local point. loopCount++; if (loopCount > 10) { return(_randomSweeper.ProposeSweeps(1, null)[0]); } } while (_alreadySeenConfigs.Contains(child)); _alreadySeenConfigs.Add(child); return(SweeperProbabilityUtils.FloatArrayAsParameterSet(_host, _sweepParameters, child, false)); }
public void FactoryExampleTest() { var dataset = TestDatasets.adultText; string dataFilename = GetDataPath(dataset.trainFilename); string testDataFilename = GetDataPath(dataset.testFilename); ///********* Training a model *******// string modelFilename = Path.GetTempFileName(); TLCArguments cmd = new TLCArguments(); Assert.True(CmdParser.ParseArguments(dataset.extraSettings, cmd)); cmd.command = Command.Train; cmd.modelfile = modelFilename; cmd.datafile = dataFilename; cmd.instancesSettings = dataset.settings; cmd.classifierName = TestLearners.linearSVM.Trainer; RunExperiments.Run(cmd); // Load and make predictions with a previously saved model. IDataModel dataModel; IDataStats dataStats; var predictor = (IDistributionPredictor <Instance, Float, Float>)PredictorUtils.LoadPredictor( out dataModel, out dataStats, modelFilename); var instanceFactory = ReflectionUtilsOld.CreateInstanceOld <IInstanceFactory, SignatureInstances>( cmd.instancesClass, cmd.instancesSettings, null, dataModel); bool headerSkip = true; List <Float> outputs = new List <Float>(); List <Float> probabilities = new List <Float>(); using (StreamReader reader = new StreamReader(testDataFilename)) { List <string> features = new List <string>(); string text; long line = 0; while ((text = reader.ReadLine()) != null) { ++line; if (string.IsNullOrWhiteSpace(text)) { continue; } string[] cols = text.Split(','); Assert.True(cols.Length == 15); if (headerSkip) { // skip header line headerSkip = false; continue; } features.Clear(); // Add in the "max dimensionality" features.Add("15"); for (int col = 0; col < cols.Length; col++) { string s = cols[col].Trim(); switch (col) { case 0: case 2: case 4: case 10: case 11: case 12: case 14: // numeric feature or label -- add if non-zero Float val = InstancesUtils.FloatParse(s); if (val == 0) // Beware of NaNs - they should be recorded! { continue; } break; } features.Add(col + ":" + s); } Instance instance = instanceFactory.ProduceInstance(line, features.ToArray()); Float rawOutput, probability; probability = predictor.PredictDistribution(instance, out rawOutput); outputs.Add(rawOutput); probabilities.Add(probability); } } List <Float> originalOutputs = new List <Float>(); List <Float> originalProbabilities = new List <Float>(); var env = new LocalEnvironment(SysRandom.Wrap(RunExperiments.GetRandom(cmd))); Instances instances = RunExperiments.CreateTestData(cmd, testDataFilename, dataModel, null, env); foreach (Instance instance in instances) { Float rawOutput, probability; probability = predictor.PredictDistribution(instance, out rawOutput); originalOutputs.Add(rawOutput); originalProbabilities.Add(probability); } CollectionAssert.Equal(outputs, originalOutputs); CollectionAssert.Equal(probabilities, originalProbabilities); File.Delete(modelFilename); Done(); }