public static (Vector[] dataX, double[] dataY) GenerateRandomData(int numData, double proportionCorrupt) { int randomSeed = 9876; Random rng = new Random(randomSeed); Rand.Restart(randomSeed); InferenceEngine engine = Utilities.GetInferenceEngine(); // The points to evaluate Vector[] randomInputs = Utilities.VectorRange(0, 1, numData, null); var gaussianProcessGenerator = new GaussianProcessRegressor(randomInputs); // The basis Vector[] basis = Utilities.VectorRange(0, 1, 6, rng); // The kernel var kf = new SummationKernel(new SquaredExponential(-1)) + new WhiteNoise(); // Fill in the sparse GP prior GaussianProcess gp = new GaussianProcess(new ConstantFunction(0), kf); gaussianProcessGenerator.Prior.ObservedValue = new SparseGP(new SparseGPFixed(gp, basis)); // Infer the posterior Sparse GP, and sample a random function from it SparseGP sgp = engine.Infer <SparseGP>(gaussianProcessGenerator.F); var randomFunc = sgp.Sample(); double[] randomOutputs = new double[randomInputs.Length]; int numCorrupted = (int)Math.Ceiling(numData * proportionCorrupt); var subset = Enumerable.Range(0, randomInputs.Length + 1).OrderBy(x => rng.Next()).Take(numCorrupted); // get random data for (int i = 0; i < randomInputs.Length; i++) { double post = randomFunc.Evaluate(randomInputs[i]); // corrupt data point if it we haven't exceed the proportion we wish to corrupt if (subset.Contains(i)) { double sign = rng.NextDouble() > 0.5 ? 1 : -1; double distance = rng.NextDouble() * 1; post = (sign * distance) + post; } randomOutputs[i] = post; } Console.WriteLine("Model complete: Generated {0} points with {1} corrupted", numData, numCorrupted); return(randomInputs, randomOutputs); }
static void FitDataset(bool useSynthetic) { Vector[] trainingInputs; double[] trainingOutputs; if (!useSynthetic) { var trainingData = Utilities.LoadAISDataset(); trainingInputs = trainingData.Select(tup => Vector.FromArray(new double[1] { tup.x })).ToArray(); trainingOutputs = trainingData.Select(tup => tup.y).ToArray(); } else { (trainingInputs, trainingOutputs) = GaussianProcessDataGenerator.GenerateRandomData(30, 0.3); } InferenceEngine engine = Utilities.GetInferenceEngine(); // First fit standard GP, then fit Student-T GP foreach (var useStudentTLikelihood in new[] { false, true }) { var gaussianProcessRegressor = new GaussianProcessRegressor(trainingInputs, useStudentTLikelihood, trainingOutputs); // Log length scale estimated as -1 var noiseVariance = 0.8; var kf = new SummationKernel(new SquaredExponential(-1)) + new WhiteNoise(Math.Log(noiseVariance) / 2); GaussianProcess gp = new GaussianProcess(new ConstantFunction(0), kf); // Convert SparseGP to full Gaussian Process by evaluating at all the training points gaussianProcessRegressor.Prior.ObservedValue = new SparseGP(new SparseGPFixed(gp, trainingInputs.ToArray())); double logOdds = engine.Infer <Bernoulli>(gaussianProcessRegressor.Evidence).LogOdds; Console.WriteLine("{0} evidence = {1}", kf, logOdds.ToString("g4")); // Infer the posterior Sparse GP SparseGP sgp = engine.Infer <SparseGP>(gaussianProcessRegressor.F); #if NETFULL string datasetName = useSynthetic ? "Synthetic" : "AIS"; Utilities.PlotPredictions(sgp, trainingInputs, trainingOutputs, useStudentTLikelihood, datasetName); #endif } }