예제 #1
0
        public static (Vector[] dataX, double[] dataY) GenerateRandomData(int numData, double proportionCorrupt)
        {
            int randomSeed = 9876;

            Random rng = new Random(randomSeed);

            Rand.Restart(randomSeed);

            InferenceEngine engine = Utilities.GetInferenceEngine();

            // The points to evaluate
            Vector[] randomInputs = Utilities.VectorRange(0, 1, numData, null);

            var gaussianProcessGenerator = new GaussianProcessRegressor(randomInputs);

            // The basis
            Vector[] basis = Utilities.VectorRange(0, 1, 6, rng);

            // The kernel
            var kf = new SummationKernel(new SquaredExponential(-1)) + new WhiteNoise();

            // Fill in the sparse GP prior
            GaussianProcess gp = new GaussianProcess(new ConstantFunction(0), kf);

            gaussianProcessGenerator.Prior.ObservedValue = new SparseGP(new SparseGPFixed(gp, basis));

            // Infer the posterior Sparse GP, and sample a random function from it
            SparseGP sgp        = engine.Infer <SparseGP>(gaussianProcessGenerator.F);
            var      randomFunc = sgp.Sample();

            double[] randomOutputs = new double[randomInputs.Length];
            int      numCorrupted  = (int)Math.Ceiling(numData * proportionCorrupt);
            var      subset        = Enumerable.Range(0, randomInputs.Length + 1).OrderBy(x => rng.Next()).Take(numCorrupted);

            // get random data
            for (int i = 0; i < randomInputs.Length; i++)
            {
                double post = randomFunc.Evaluate(randomInputs[i]);
                // corrupt data point if it we haven't exceed the proportion we wish to corrupt
                if (subset.Contains(i))
                {
                    double sign     = rng.NextDouble() > 0.5 ? 1 : -1;
                    double distance = rng.NextDouble() * 1;
                    post = (sign * distance) + post;
                }

                randomOutputs[i] = post;
            }

            Console.WriteLine("Model complete: Generated {0} points with {1} corrupted", numData, numCorrupted);

            return(randomInputs, randomOutputs);
        }
예제 #2
0
        static void FitDataset(bool useSynthetic)
        {
            Vector[] trainingInputs;
            double[] trainingOutputs;

            if (!useSynthetic)
            {
                var trainingData = Utilities.LoadAISDataset();
                trainingInputs = trainingData.Select(tup => Vector.FromArray(new double[1] {
                    tup.x
                })).ToArray();
                trainingOutputs = trainingData.Select(tup => tup.y).ToArray();
            }
            else
            {
                (trainingInputs, trainingOutputs) = GaussianProcessDataGenerator.GenerateRandomData(30, 0.3);
            }

            InferenceEngine engine = Utilities.GetInferenceEngine();

            // First fit standard GP, then fit Student-T GP
            foreach (var useStudentTLikelihood in new[] { false, true })
            {
                var gaussianProcessRegressor = new GaussianProcessRegressor(trainingInputs, useStudentTLikelihood, trainingOutputs);

                // Log length scale estimated as -1
                var             noiseVariance = 0.8;
                var             kf            = new SummationKernel(new SquaredExponential(-1)) + new WhiteNoise(Math.Log(noiseVariance) / 2);
                GaussianProcess gp            = new GaussianProcess(new ConstantFunction(0), kf);

                // Convert SparseGP to full Gaussian Process by evaluating at all the training points
                gaussianProcessRegressor.Prior.ObservedValue = new SparseGP(new SparseGPFixed(gp, trainingInputs.ToArray()));
                double logOdds = engine.Infer <Bernoulli>(gaussianProcessRegressor.Evidence).LogOdds;
                Console.WriteLine("{0} evidence = {1}", kf, logOdds.ToString("g4"));

                // Infer the posterior Sparse GP
                SparseGP sgp = engine.Infer <SparseGP>(gaussianProcessRegressor.F);

#if NETFULL
                string datasetName = useSynthetic ? "Synthetic" : "AIS";
                Utilities.PlotPredictions(sgp, trainingInputs, trainingOutputs, useStudentTLikelihood, datasetName);
#endif
            }
        }