コード例 #1
0
        /// <summary>
        /// Runs the online experiment.
        /// </summary>
        /// <param name="dataSet">Data set.</param>
        /// <param name="holdoutSet">Holdout set.</param>
        /// <param name="priors">Priors.</param>
        public void RunOnline(DataSet dataSet, DataSet holdoutSet, Marginals priors)
        {
            using (new CodeTimer("Running online experiment: " + Name))
            {
                Console.WriteLine();

                Metrics = new MetricsCollection();
                HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] };

                PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][];
                HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][];
                IndividualPosteriors = new Marginals[dataSet.NumberOfResidents];

                var accuracy = new double[dataSet.NumberOfResidents][];

                for (int i = 0; i < dataSet.NumberOfResidents; i++)
                {
                    var collection = new List<Metrics>();
                    HoldoutPosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]][];
                    accuracy[i] = new double[dataSet.NumberOfInstances[i]];

                    IndividualPosteriors[i] = new Marginals(priors);
                    PosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]];

                    for (int j = 0; j < dataSet.NumberOfInstances[i]; j++)
                    {
                        var datum = dataSet.GetSubSet(i, j);
                        PosteriorActivities[i][j] = TestModel.Test(datum, IndividualPosteriors[i])[0][0];
                        HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSet.GetSubSet(i), IndividualPosteriors[i])[0];

                        // Test on holdout set
                        var holdoutMetrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] };
                        accuracy[i][j] = holdoutMetrics.AverageAccuracy;

                        // PrintPrediction(i, temp[0][0], testLabels[0][i], testScores[0][i]);

                        // Now retrain using this label
                        IndividualPosteriors[i] = TrainModel.Train(datum, IndividualPosteriors[i], 10);

                        collection.Add(holdoutMetrics);
                    }

                    // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray());
                    Metrics.Add(new Metrics { Name = Name, Estimates = PosteriorActivities[i], TrueLabels = dataSet.Labels[i] }, true);

                    HoldoutMetrics.Metrics[i] = collection.ToArray();

                    Console.WriteLine("{0,20}, Resident {1}, Hold out accuracy {2:N2}", Name, i, collection.Average(ia => ia.AverageAccuracy));
                }

                HoldoutMetrics.RecomputeAggregateMetrics();
                Metrics.RecomputeAggregateMetrics();

                // Console.WriteLine("Accuracies " + string.Join(", ", accuracy.ColumnAverage().Select(x => x.ToString("N2"))));
                // Console.WriteLine("Std. dev.  " + string.Join(", ", accuracy.ColumnStandardDeviation().Select(x => x.ToString("N2"))));
                // Console.WriteLine("Accuracies " + string.Join(", ", HoldoutMetrics.AverageAccuracy.Select(x => x.ToString("N2"))));
            }
        }
コード例 #2
0
        /// <summary>
        /// Runs the active experiment.
        /// </summary>
        /// <param name="dataSet">Data set.</param>
        /// <param name="holdoutSet">Holdout set.</param>
        /// <param name="numberOfSelections">Number of selections.</param>
        /// <param name="priors">Priors.</param>
        public void RunActive(DataSet dataSet, DataSet holdoutSet, int numberOfSelections, Marginals priors)
        {
            if (ActiveLearners == null)
            {
                throw new InvalidOperationException("Active Learner not provided");
            }

            using (new CodeTimer("Running active experiment: " + Name))
            {
                Console.WriteLine();

                HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] };

                // Metrics = new MetricsCollection(numberOfSelections);
                PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][];
                HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][];
                IndividualPosteriors = new Marginals[dataSet.NumberOfResidents];

                var accuracy = new double[dataSet.NumberOfResidents][];

                for (int i = 0; i < dataSet.NumberOfResidents; i++)
                {
                    HoldoutMetrics.Metrics[i] = new Metrics[numberOfSelections];

                    var collection = new List<Metrics>();
                    IndividualPosteriors[i] = new Marginals(priors);

                    // Test on holdout set
                    HoldoutPosteriorActivities[i] = new Bernoulli[numberOfSelections][];
                    accuracy[i] = new double[numberOfSelections];

                    var dataSetForResident = dataSet.GetSubSet(i);
                    var holdoutSetForResident = holdoutSet.GetSubSet(i);
                    // ActiveLearners[i].Transfer(i, 1);

                    // var individualPosteriors = new Marginals(priors);
                    for (int j = 0; j < numberOfSelections; j++)
                    {
                        PosteriorActivities[i] = TestModel.Test(dataSetForResident, IndividualPosteriors[i])[0];
                        HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSetForResident, IndividualPosteriors[i])[0];

                        if (ActiveLearners[i].Unlabelled.Count == 0)
                        {
                            Console.WriteLine("Empty unlabelled set");
                            break;
                        }

                        // int index = ActiveLearner.GetValueOfInformation(i).ArgMax();
                        int index;
                        double val;
                        ActiveLearners[i].GetArgMaxVOI(PosteriorActivities[i], IndividualPosteriors[i], out index, out val);

                        // Console.WriteLine("Index {0,4}, VOI {1:N4}", index, value);

                        // Now retrain using this label
                        ActiveLearners[i].UpdateModel(index);
                        //IndividualPosteriors [i] = TrainModel.Train( dataSet.GetSubSet(i, ActiveLearners [i].Labelled.ToList()), priors, 10);
                        IndividualPosteriors[i] = TrainModel.Train(dataSet.GetSubSet(i, index), IndividualPosteriors[i], 50);

                        var metrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] };
                        accuracy[i][j] = metrics.AverageAccuracy;

                        collection.Add(metrics);
                    }

                    // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray());
                    HoldoutMetrics.Metrics[i] = collection.ToArray();

                    Console.WriteLine("{0,20}, Resident {1}, \n\t\tClass ratio       {5}, \n\t\tHold out accuracy {2:N2}, \n\t\tAccuracies {3} \n\t\tBriers     {4}\n",
                        Name, i, collection.Average(ia => ia.AverageAccuracy).ToString("N2"),
                        string.Join(", ", collection.Select(ia => ia.AverageAccuracy.ToString("N2"))),
                        string.Join(", ", collection.Select(ia => ia.BrierScore.ToString("N2"))),
                        holdoutSet.Labels[i].Average().ToString("N2")
                        );
                }

                HoldoutMetrics.RecomputeAggregateMetrics();

            }
        }
コード例 #3
0
 /// <summary> 
 /// Runs the batch.
 /// </summary>
 /// <param name="dataSet">Data set.</param>
 /// <param name="priors">Priors.</param>
 public void RunBatch(DataSet dataSet, Marginals priors, int niter = 1)
 {
     //			Posteriors = priors;
     //			for (int rr = 0; rr < dataSet.NumberOfResidents; ++rr)
     //			{
     //				Posteriors = TrainModel.Train(dataSet.GetSubSet(rr), Posteriors);
     //			}
     Posteriors = TrainModel.Train(dataSet, priors, niter);
 }
コード例 #4
0
        /// <summary>
        /// Train the specified dataSet and priors for the specified number of iterations.
        /// </summary>
        /// <param name="dataSet">Data set.</param>
        /// <param name="priors">Priors.</param>
        /// <param name="numberOfIterations">Number of iterations.</param>
        public Marginals Train(DataSet dataSet, Marginals priors, int numberOfIterations = 10)
        {
            SetObservedVariables(
                dataSet.Features,
                DistributionArrayHelpers.Copy(priors.WeightMeans),
                DistributionArrayHelpers.Copy(priors.WeightPrecisions),
                dataSet.Labels);

            #if !USE_PRECOMPILED_ALGORITHM
            engine.Algorithm.DefaultNumberOfIterations = numberOfIterations;
            var posteriorWeights = engine.Infer<Gaussian[][]>(weights);
            var posteriorWeightMeans = engine.Infer<Gaussian[]>(weightMeans);
            var posteriorWeightPrecisions = engine.Infer<Gamma[]>(weightPrecisions);
            #else
            algorithm.Execute(numberOfIterations);
            var posteriorWeights = algorithm.Marginal<Gaussian[][]>(weights.Name);
            var posteriorWeightMeans = algorithm.Marginal<Gaussian[]>(weightMeans.Name);
            var posteriorWeightPrecisions = algorithm.Marginal<Gamma[]>(weightPrecisions.Name);
            #endif

            return new Marginals { Weights = posteriorWeights, WeightMeans = posteriorWeightMeans, WeightPrecisions = posteriorWeightPrecisions };
        }
コード例 #5
0
        public DataSet GetDataSet(IEnumerable<int> subjects, bool addbias, HashSet<int> selected, double keepProportion = 1.0)
        {
            //var rng = new Random( 12345 );

            var features = new double[subjects.Count()][][];
            var labels = new bool[subjects.Count()][];

            var inds = new Dictionary<int, int>();
            for (int ss = 0; ss < subjects.Count(); ++ss)
                inds[subjects.ElementAt(ss)] = ss;

            for (var ss = 0; ss < subjects.Count(); ++ss)
            {
                var feats = new List<double[]>();
                var labs = new List<bool>();

                for (int ii = 0; ii < N; ++ii)
                {
                    if (inds.ContainsKey(s[ii]) && inds[s[ii]] == ss)
                    {
                        var thisfeat = new List<double>();

                        for (int ff = 0; ff < x[ii].Count(); ++ff)
                            if (selected == null || !selected.Any() || selected.Contains(ff))
                                thisfeat.Add(x[ii][ff]);

                        if (addbias)
                            thisfeat.Add(1.0);

                        feats.Add(thisfeat.ToArray());
                        labs.Add(y[ii]);
                    }
                }

                //var order = Enumerable.Range( 0, labs.Count() ).OrderBy( ii => rng.NextDouble() );
                var nKeep = Convert.ToInt32(keepProportion * feats.Count());
                //features [ss] = order.Select( ii => feats[ii] ).ToArray();
                //labels [ss]   = order.Select( ii => labs[ii] ).ToArray();
                features[ss] = feats.Take(nKeep).ToArray();
                labels[ss] = labs.Take(nKeep).ToArray();
            }

            var dataset = new DataSet
            {
                Features = features,
                Labels = labels
            };

            return dataset;
        }
コード例 #6
0
        /// <summary>
        /// Test the specified dataSet and priors.
        /// </summary>
        /// <param name="dataSet">Data set.</param>
        /// <param name="priors">Priors.</param>
        public Bernoulli[][] Test(DataSet dataSet, Marginals priors)
        {
            SetObservedVariables(
                dataSet.Features,
                DistributionArrayHelpers.Copy(priors.WeightMeans),
                DistributionArrayHelpers.Copy(priors.WeightPrecisions));

            #if !USE_PRECOMPILED_ALGORITHM
            var posteriorActivities = engine.Infer<Bernoulli[][]>(activities);
            #else
            algorithm.Execute(1);
            var posteriorActivities = algorithm.Marginal<Bernoulli[][]>(activities.Name);
            #endif

            return posteriorActivities;
        }
コード例 #7
0
        public Bernoulli ComputeEvidence(DataSet dataSet, Marginals priors)
        {
            SetObservedVariables(
                      dataSet.Features,
                      DistributionArrayHelpers.Copy(priors.WeightMeans),
                      DistributionArrayHelpers.Copy(priors.WeightPrecisions),
                      dataSet.Labels);

            engine.Algorithm.DefaultNumberOfIterations = 1;

            return engine.Infer<Bernoulli>(evidence);
        }
コード例 #8
0
ファイル: ToyData.cs プロジェクト: IRC-SPHERE/ActiveTransfer
        /// <summary>
        /// Generate the data using the specified noisy example proportion.
        /// </summary>
        /// <param name="noisyExampleProportion">Noisy example proportion.</param>
        /// <param name="holdout">If set to <c>true</c> holdout.</param>
        public void Generate(double noisyExampleProportion, int numberOfInstances, bool holdout = false)
        {
            if (NumberOfActivities != 2)
            {
                throw new InvalidOperationException("This version of the function is for binary data only");
            }

            if (numberOfInstances == 0)
            {
                return;
            }

            if (Weights == null)
            {
                ComputeWeights();
            }

            // int numberOfInstances = holdout ? NumberOfHoldoutInstances : NumberOfInstances;

            var scores = new double[NumberOfResidents][];
            var features = new double[NumberOfResidents][][];
            var labels = new bool[NumberOfResidents][];
            int numFeaturesIncludingBias = NumberOfFeatures + (UseBias ? 1 : 0);

            for (int i = 0; i < NumberOfResidents; i++)
            {
                features[i] = new double[numberOfInstances][];
                scores[i] = new double[numberOfInstances];
                labels[i] = new bool[numberOfInstances];

                // Generate weight per feature, and then sample from that per user, for this to match the model
                for (int j = 0; j < numberOfInstances; j++)
                {
                    bool noisyExample = Rand.Double() > noisyExampleProportion;
                    features[i][j] = new double[numFeaturesIncludingBias];

                    var products = new double[numFeaturesIncludingBias];
                    for (int k = 0; k < numFeaturesIncludingBias; k++)
                    {
                        // double feature = Rand.Double() > noisyExampleProportion ? (double)Rand.Int(2) - 0.5 : 0.0; // Rand.Double() - 0.5 : 0.0;
                        double feature = noisyExample ? Rand.Double() - 0.5 : 0.0; // (double)Rand.Int(2);
                        features[i][j][k] = (k == NumberOfFeatures) ? -1 : feature;
                        products[k] = Weights[i][k] * features[i][j][k];
                    }

                    scores[i][j] = new Gaussian(products.Sum(), 1).Sample();

                    labels[i][j] = scores[i][j] > 0;
                }
            }

            if (holdout)
            {
                HoldoutSet = new DataSet { Features = features, Labels = labels };
            }
            else
            {
                DataSet = new DataSet { Features = features, Labels = labels };
            }
        }
コード例 #9
0
ファイル: DataSet.cs プロジェクト: IRC-SPHERE/ActiveTransfer
        public void SplitTrainTest(double trainProportion, out DataSet trainSet, out DataSet testSet)
        {
            if (trainProportion < 0.0 || trainProportion > 1.0)
            {
                throw new ArgumentOutOfRangeException("trainProportion");
            }

            var counts = Labels.Select(ia => (int)Math.Ceiling(trainProportion * (double)ia.Length)).ToArray();

            trainSet = new DataSet { Features = Features.Select((ia, i) => ia.Take(counts[i]).ToArray()).ToArray(), Labels = Labels.Select((ia, i) => ia.Take(counts[i]).ToArray()).ToArray() };
            testSet = new DataSet { Features = Features.Select((ia, i) => ia.Skip(counts[i]).ToArray()).ToArray(), Labels = Labels.Select((ia, i) => ia.Skip(counts[i]).ToArray()).ToArray() };
        }
コード例 #10
0
ファイル: DataSet.cs プロジェクト: IRC-SPHERE/ActiveTransfer
        /// <summary>
        /// Gets the subset.
        /// </summary>
        /// <returns>The subset.</returns>
        /// <param name="residents">Residents.</param>
        /// <param name="indices">Indices.</param>
        public DataSet GetSubSet(IList<int> residents, IList<int> indices)
        {
            var dataSet = new DataSet { Features = new[] { new double[indices.Count][] }, Labels = new[] { new bool[indices.Count] } };

            for (int i = 0; i < residents.Count; i++)
            {
                for (int j = 0; j < indices.Count; j++)
                {
                    int resident = residents[i];
                    int index = indices[j];
                    dataSet.Features[i][j] = Features[resident][index];
                    dataSet.Labels[i][j] = Labels[resident][index];
                }
            }

            return dataSet;
        }