Example #1
0
        /// <summary>
        /// Creates the learners.
        /// </summary>
        /// <returns>The learners.</returns>
        /// <param name="dataSet">Data set.</param>
        /// <typeparam name="TLearner">The type of the learner.</typeparam>
        public static IList <IActiveLearner> CreateLearners <TLearner>(DataSet dataSet, BinaryModel trainModel, BinaryModel testModel, BinaryModel evidenceModel, bool reverse = false)
            where TLearner : IActiveLearner
        {
            var learners = new List <IActiveLearner>();

            for (int i = 0; i < dataSet.NumberOfResidents; i++)
            {
                var learner = (IActiveLearner)Activator.CreateInstance <TLearner>();
                learner.DataSet = dataSet.GetSubSet(i);
                if (learner is IReversableLearner && reverse)
                {
                    ((IReversableLearner)learner).Reversed = true;
                }

                if (learner is ActiveLearner)
                {
                    var activeLearner = (ActiveLearner)learner;
                    activeLearner.TrainModel = trainModel;
                    activeLearner.TestModel  = testModel;
                    activeLearner.RiskMatrix = new[, ] {
                        { 0.0, 1.0 }, { 1.0, 0.0 }
                    };
                    activeLearner.Gains = Enumerable.Range(0, 2).Select(ii => 1.0 / 3.0).ToArray();
                    activeLearner.Costs = Enumerable.Range(0, 2).Select(ii => 1.0).ToArray();
                }
                else if (learner is ActiveEvidence)
                {
                    var activeLearner = (ActiveEvidence)learner;
                    activeLearner.TrainModel    = trainModel;
                    activeLearner.TestModel     = testModel;
                    activeLearner.EvidenceModel = evidenceModel;
                }

                learners.Add(learner);
            }

            return(learners);
        }
Example #2
0
        /// <summary>
        /// Runs the active experiment.
        /// </summary>
        /// <param name="dataSet">Data set.</param>
        /// <param name="holdoutSet">Holdout set.</param>
        /// <param name="numberOfSelections">Number of selections.</param>
        /// <param name="priors">Priors.</param>
        public void RunActive(DataSet dataSet, DataSet holdoutSet, int numberOfSelections, Marginals priors)
        {
            if (ActiveLearners == null)
            {
                throw new InvalidOperationException("Active Learner not provided");
            }

            using (new CodeTimer("Running active experiment: " + Name))
            {
                Console.WriteLine();

                HoldoutMetrics = new HoldoutMetricsCollection {
                    Metrics = new Metrics[dataSet.NumberOfResidents][]
                };

                // Metrics = new MetricsCollection(numberOfSelections);
                PosteriorActivities        = new Bernoulli[dataSet.NumberOfResidents][];
                HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][];
                IndividualPosteriors       = new Marginals[dataSet.NumberOfResidents];

                var accuracy = new double[dataSet.NumberOfResidents][];

                for (int i = 0; i < dataSet.NumberOfResidents; i++)
                {
                    HoldoutMetrics.Metrics[i] = new Metrics[numberOfSelections];

                    var collection = new List <Metrics>();
                    IndividualPosteriors[i] = new Marginals(priors);

                    // Test on holdout set
                    HoldoutPosteriorActivities[i] = new Bernoulli[numberOfSelections][];
                    accuracy[i] = new double[numberOfSelections];

                    var dataSetForResident    = dataSet.GetSubSet(i);
                    var holdoutSetForResident = holdoutSet.GetSubSet(i);
                    // ActiveLearners[i].Transfer(i, 1);

                    // var individualPosteriors = new Marginals(priors);
                    for (int j = 0; j < numberOfSelections; j++)
                    {
                        PosteriorActivities[i]           = TestModel.Test(dataSetForResident, IndividualPosteriors[i])[0];
                        HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSetForResident, IndividualPosteriors[i])[0];

                        if (ActiveLearners[i].Unlabelled.Count == 0)
                        {
                            Console.WriteLine("Empty unlabelled set");
                            break;
                        }

                        // int index = ActiveLearner.GetValueOfInformation(i).ArgMax();
                        int    index;
                        double val;
                        ActiveLearners[i].GetArgMaxVOI(PosteriorActivities[i], IndividualPosteriors[i], out index, out val);

                        // Console.WriteLine("Index {0,4}, VOI {1:N4}", index, value);

                        // Now retrain using this label
                        ActiveLearners[i].UpdateModel(index);
                        //IndividualPosteriors [i] = TrainModel.Train( dataSet.GetSubSet(i, ActiveLearners [i].Labelled.ToList()), priors, 10);
                        IndividualPosteriors[i] = TrainModel.Train(dataSet.GetSubSet(i, index), IndividualPosteriors[i], 50);

                        var metrics = new Metrics {
                            Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i]
                        };
                        accuracy[i][j] = metrics.AverageAccuracy;

                        collection.Add(metrics);
                    }

                    // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray());
                    HoldoutMetrics.Metrics[i] = collection.ToArray();

                    Console.WriteLine("{0,20}, Resident {1}, \n\t\tClass ratio       {5}, \n\t\tHold out accuracy {2:N2}, \n\t\tAccuracies {3} \n\t\tBriers     {4}\n",
                                      Name, i, collection.Average(ia => ia.AverageAccuracy).ToString("N2"),
                                      string.Join(", ", collection.Select(ia => ia.AverageAccuracy.ToString("N2"))),
                                      string.Join(", ", collection.Select(ia => ia.BrierScore.ToString("N2"))),
                                      holdoutSet.Labels[i].Average().ToString("N2")
                                      );
                }

                HoldoutMetrics.RecomputeAggregateMetrics();
            }
        }
Example #3
0
        /// <summary>
        /// Runs the online experiment.
        /// </summary>
        /// <param name="dataSet">Data set.</param>
        /// <param name="holdoutSet">Holdout set.</param>
        /// <param name="priors">Priors.</param>
        public void RunOnline(DataSet dataSet, DataSet holdoutSet, Marginals priors)
        {
            using (new CodeTimer("Running online experiment: " + Name))
            {
                Console.WriteLine();

                Metrics        = new MetricsCollection();
                HoldoutMetrics = new HoldoutMetricsCollection {
                    Metrics = new Metrics[dataSet.NumberOfResidents][]
                };

                PosteriorActivities        = new Bernoulli[dataSet.NumberOfResidents][];
                HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][];
                IndividualPosteriors       = new Marginals[dataSet.NumberOfResidents];

                var accuracy = new double[dataSet.NumberOfResidents][];

                for (int i = 0; i < dataSet.NumberOfResidents; i++)
                {
                    var collection = new List <Metrics>();
                    HoldoutPosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]][];
                    accuracy[i] = new double[dataSet.NumberOfInstances[i]];

                    IndividualPosteriors[i] = new Marginals(priors);
                    PosteriorActivities[i]  = new Bernoulli[dataSet.NumberOfInstances[i]];

                    for (int j = 0; j < dataSet.NumberOfInstances[i]; j++)
                    {
                        var datum = dataSet.GetSubSet(i, j);
                        PosteriorActivities[i][j]        = TestModel.Test(datum, IndividualPosteriors[i])[0][0];
                        HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSet.GetSubSet(i), IndividualPosteriors[i])[0];

                        // Test on holdout set
                        var holdoutMetrics = new Metrics {
                            Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i]
                        };
                        accuracy[i][j] = holdoutMetrics.AverageAccuracy;

                        // PrintPrediction(i, temp[0][0], testLabels[0][i], testScores[0][i]);

                        // Now retrain using this label
                        IndividualPosteriors[i] = TrainModel.Train(datum, IndividualPosteriors[i], 10);

                        collection.Add(holdoutMetrics);
                    }

                    // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray());
                    Metrics.Add(new Metrics {
                        Name = Name, Estimates = PosteriorActivities[i], TrueLabels = dataSet.Labels[i]
                    }, true);

                    HoldoutMetrics.Metrics[i] = collection.ToArray();

                    Console.WriteLine("{0,20}, Resident {1}, Hold out accuracy {2:N2}", Name, i, collection.Average(ia => ia.AverageAccuracy));
                }

                HoldoutMetrics.RecomputeAggregateMetrics();
                Metrics.RecomputeAggregateMetrics();

                // Console.WriteLine("Accuracies " + string.Join(", ", accuracy.ColumnAverage().Select(x => x.ToString("N2"))));
                // Console.WriteLine("Std. dev.  " + string.Join(", ", accuracy.ColumnStandardDeviation().Select(x => x.ToString("N2"))));
                // Console.WriteLine("Accuracies " + string.Join(", ", HoldoutMetrics.AverageAccuracy.Select(x => x.ToString("N2"))));
            }
        }
        private double ExpectedEvidence(int index, Marginals priors)
        {
            var niter = 1;

            //var pp = hypothesisActivityPosteriors[index];
            bool trueLabel = DataSet.Labels[0][index];


            var labelled = new HashSet <int>(Labelled);

            labelled.Add(index);

            //evidenceData.Labels[0] = evidenceData.Labels[0].Select( ll => !ll ).ToArray();


            // Learn as if positive
            DataSet.Labels[0][index] = true;

            Marginals positivePosteriors = priors;

            try
            {
                if (Reversed)
                {
                    positivePosteriors = priors;
                }
                else
                {
                    positivePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, niter);
                }
            }
            catch (ImproperMessageException)
            {
                // As fallback use priors
            }

            var positivePriorEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), priors);
            var positivePostrEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), positivePosteriors);



            // Learn as if negative
            DataSet.Labels[0][index] = false;

            Marginals negativePosteriors = priors;

            try
            {
                if (Reversed)
                {
                    negativePosteriors = priors;
                }
                else
                {
                    negativePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, niter);
                }
            }
            catch (ImproperMessageException)
            {
                // As fallback use priors
            }

            var negativePriorEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), priors);
            var negativePostrEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), negativePosteriors);



            DataSet.Labels[0][index] = trueLabel;



            var returns = new List <double>();

            returns.Add(
                (positivePriorEvidence.LogOdds) /
                (negativePriorEvidence.LogOdds)
                );
            //return Math.Max( returns.Last(), 1.0 / returns.Last() );

            returns.Add(
                (positivePostrEvidence.LogOdds) /
                (negativePostrEvidence.LogOdds)
                );
            //return Math.Max( returns.Last(), 1.0 / returns.Last() );

            returns.Add(
                (positivePriorEvidence.LogOdds + positivePostrEvidence.GetLogProbTrue()) /
                (negativePriorEvidence.LogOdds + negativePostrEvidence.GetLogProbTrue())
                );
            return(Math.Max(returns.Last(), 1.0 / returns.Last()));
        }
Example #5
0
        /// <summary>
        /// Runs the online experiment.
        /// </summary>
        /// <param name="dataSet">Data set.</param>
        /// <param name="holdoutSet">Holdout set.</param>
        /// <param name="priors">Priors.</param>
        public void RunOnline(DataSet dataSet, DataSet holdoutSet, Marginals priors)
        {
            using (new CodeTimer("Running online experiment: " + Name))
            {
                Console.WriteLine();

                Metrics = new MetricsCollection();
                HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] };

                PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][];
                HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][];
                IndividualPosteriors = new Marginals[dataSet.NumberOfResidents];

                var accuracy = new double[dataSet.NumberOfResidents][];

                for (int i = 0; i < dataSet.NumberOfResidents; i++)
                {
                    var collection = new List<Metrics>();
                    HoldoutPosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]][];
                    accuracy[i] = new double[dataSet.NumberOfInstances[i]];

                    IndividualPosteriors[i] = new Marginals(priors);
                    PosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]];

                    for (int j = 0; j < dataSet.NumberOfInstances[i]; j++)
                    {
                        var datum = dataSet.GetSubSet(i, j);
                        PosteriorActivities[i][j] = TestModel.Test(datum, IndividualPosteriors[i])[0][0];
                        HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSet.GetSubSet(i), IndividualPosteriors[i])[0];

                        // Test on holdout set
                        var holdoutMetrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] };
                        accuracy[i][j] = holdoutMetrics.AverageAccuracy;

                        // PrintPrediction(i, temp[0][0], testLabels[0][i], testScores[0][i]);

                        // Now retrain using this label
                        IndividualPosteriors[i] = TrainModel.Train(datum, IndividualPosteriors[i], 10);

                        collection.Add(holdoutMetrics);
                    }

                    // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray());
                    Metrics.Add(new Metrics { Name = Name, Estimates = PosteriorActivities[i], TrueLabels = dataSet.Labels[i] }, true);

                    HoldoutMetrics.Metrics[i] = collection.ToArray();

                    Console.WriteLine("{0,20}, Resident {1}, Hold out accuracy {2:N2}", Name, i, collection.Average(ia => ia.AverageAccuracy));
                }

                HoldoutMetrics.RecomputeAggregateMetrics();
                Metrics.RecomputeAggregateMetrics();

                // Console.WriteLine("Accuracies " + string.Join(", ", accuracy.ColumnAverage().Select(x => x.ToString("N2"))));
                // Console.WriteLine("Std. dev.  " + string.Join(", ", accuracy.ColumnStandardDeviation().Select(x => x.ToString("N2"))));
                // Console.WriteLine("Accuracies " + string.Join(", ", HoldoutMetrics.AverageAccuracy.Select(x => x.ToString("N2"))));
            }
        }
Example #6
0
        /// <summary>
        /// Runs the active experiment.
        /// </summary>
        /// <param name="dataSet">Data set.</param>
        /// <param name="holdoutSet">Holdout set.</param>
        /// <param name="numberOfSelections">Number of selections.</param>
        /// <param name="priors">Priors.</param>
        public void RunActive(DataSet dataSet, DataSet holdoutSet, int numberOfSelections, Marginals priors)
        {
            if (ActiveLearners == null)
            {
                throw new InvalidOperationException("Active Learner not provided");
            }

            using (new CodeTimer("Running active experiment: " + Name))
            {
                Console.WriteLine();

                HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] };

                // Metrics = new MetricsCollection(numberOfSelections);
                PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][];
                HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][];
                IndividualPosteriors = new Marginals[dataSet.NumberOfResidents];

                var accuracy = new double[dataSet.NumberOfResidents][];

                for (int i = 0; i < dataSet.NumberOfResidents; i++)
                {
                    HoldoutMetrics.Metrics[i] = new Metrics[numberOfSelections];

                    var collection = new List<Metrics>();
                    IndividualPosteriors[i] = new Marginals(priors);

                    // Test on holdout set
                    HoldoutPosteriorActivities[i] = new Bernoulli[numberOfSelections][];
                    accuracy[i] = new double[numberOfSelections];

                    var dataSetForResident = dataSet.GetSubSet(i);
                    var holdoutSetForResident = holdoutSet.GetSubSet(i);
                    // ActiveLearners[i].Transfer(i, 1);

                    // var individualPosteriors = new Marginals(priors);
                    for (int j = 0; j < numberOfSelections; j++)
                    {
                        PosteriorActivities[i] = TestModel.Test(dataSetForResident, IndividualPosteriors[i])[0];
                        HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSetForResident, IndividualPosteriors[i])[0];

                        if (ActiveLearners[i].Unlabelled.Count == 0)
                        {
                            Console.WriteLine("Empty unlabelled set");
                            break;
                        }

                        // int index = ActiveLearner.GetValueOfInformation(i).ArgMax();
                        int index;
                        double val;
                        ActiveLearners[i].GetArgMaxVOI(PosteriorActivities[i], IndividualPosteriors[i], out index, out val);

                        // Console.WriteLine("Index {0,4}, VOI {1:N4}", index, value);

                        // Now retrain using this label
                        ActiveLearners[i].UpdateModel(index);
                        //IndividualPosteriors [i] = TrainModel.Train( dataSet.GetSubSet(i, ActiveLearners [i].Labelled.ToList()), priors, 10);
                        IndividualPosteriors[i] = TrainModel.Train(dataSet.GetSubSet(i, index), IndividualPosteriors[i], 50);

                        var metrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] };
                        accuracy[i][j] = metrics.AverageAccuracy;

                        collection.Add(metrics);
                    }

                    // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray());
                    HoldoutMetrics.Metrics[i] = collection.ToArray();

                    Console.WriteLine("{0,20}, Resident {1}, \n\t\tClass ratio       {5}, \n\t\tHold out accuracy {2:N2}, \n\t\tAccuracies {3} \n\t\tBriers     {4}\n",
                        Name, i, collection.Average(ia => ia.AverageAccuracy).ToString("N2"),
                        string.Join(", ", collection.Select(ia => ia.AverageAccuracy.ToString("N2"))),
                        string.Join(", ", collection.Select(ia => ia.BrierScore.ToString("N2"))),
                        holdoutSet.Labels[i].Average().ToString("N2")
                        );
                }

                HoldoutMetrics.RecomputeAggregateMetrics();

            }
        }
Example #7
0
        private double JAll_j(int index, Bernoulli[] activityPosteriors, Marginals priors)
        {
            // var prevProbs = activityPosteriors.Select(ia => new Bernoulli(ia)).ToArray();
            hypothesisActivityPosteriors = activityPosteriors.Select(ia => new Bernoulli(ia)).ToArray();

            // Get datum
            // var datum = DataSet.GetSubSet(0, index);
            bool trueLabel = DataSet.Labels[0][index];

            // Create copies of the Labelled an Unlabelled sets
            var labelled   = new HashSet <int>(Labelled);
            var unlabelled = new HashSet <int>(Unlabelled);

            labelled.Add(index);
            unlabelled.Remove(index);

            // datum.Labels[0][0] = true;
            DataSet.Labels[0][index] = true;

            // Learn as if positive
            // var positivePosteriors = TrainModel.Train(datum, priors, 1);
            Marginals positivePosteriors = priors;

            try
            {
                positivePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, 1);
            }
            catch (ImproperMessageException)
            {
                // As fallback use priors
            }

            // recompute probabilities
            CalculateProbabilities(positivePosteriors);

            //var jjposl = JL(labelled);
            //var jjposu = JU(unlabelled);
            var Jjpos = (JAll()) * (1.0 - hypothesisActivityPosteriors[index].GetMean());

            // Restore posteriors

            labelled.Add(index);
            unlabelled.Remove(index);

            // datum.Labels[0][0] = false;
            DataSet.Labels[0][index] = false;

            // Learn as if negative
            // var negativePosteriors = TrainModel.Train(datum, priors, 1);
            Marginals negativePosteriors = priors;

            try
            {
                negativePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, 1);
            }
            catch (ImproperMessageException)
            {
                // As fallback use priors
            }

            // recompute probabilities
            CalculateProbabilities(negativePosteriors);

            //var jjnegl = JL(labelled);
            //var jjnegu = JU(unlabelled);
            var Jjneg = (JAll()) * (hypothesisActivityPosteriors[index].GetMean());

            // restore posteriors
            // activityPosteriors = prevProbs;
            DataSet.Labels[0][index] = trueLabel;

            var voi = Jjpos + Jjneg;

            return(voi);
        }
Example #8
0
 /// <summary>
 /// Gets the probability of the given index.
 /// </summary>
 /// <returns>The probability of.</returns>
 /// <param name="index">Index.</param>
 private Bernoulli GetProbabilityOf(int index, Marginals priors)
 {
     return(TestModel.Test(DataSet.GetSubSet(index), priors)[0][0]);
 }