private double VOI(double jall, int index, Bernoulli[] activityPosteriors, Marginals priors) { var prob = activityPosteriors[index].GetMean(); var voij = JAll_j(index, activityPosteriors, priors); return((jall - voij) - Cj(prob, index)); }
/// <summary> /// Gets or sets the active posteriors. /// </summary> /// <value>The active posteriors.</value> /// <summary> /// Calculates the probabilities. /// </summary> /// <param name="priors">Priors.</param> /// <summary> /// Gets the argument maxising the Value of information. /// </summary> /// <param name="activityPosteriors">Activity posteriors.</param> /// <param name="priors">Priors.</param> /// <param name="argMax">Argument max.</param> /// <param name="maxVal">Max value.</param> public override void GetArgMaxVOI(Bernoulli[] activityPosteriors, Marginals priors, out int argMax, out double maxVal) { // More efficient to only loop over this once ... argMax = -1; bool isSet = false; maxVal = Reversed ? double.NegativeInfinity : double.PositiveInfinity; foreach (var index in Unlabelled) { double diff = Math.Abs(0.5 - activityPosteriors[index].GetMean()); if (Reversed) { if (diff > maxVal || !isSet) { argMax = index; maxVal = diff; isSet = true; } } else { if (diff < maxVal || !isSet) { argMax = index; maxVal = diff; isSet = true; } } } }
public void VOITest(int numActivelySelected, Marginals priors) { var onlineEstimates = new List <Bernoulli>(); var onlineTargets = new List <bool>(); Metrics metrics = null; for (int jj = 0; jj < numActivelySelected; ++jj) { CalculateProbabilities(priors); //Console.WriteLine( "\nJL: {0}", JL() ); //Console.WriteLine( "JU: {0}", JU() ); int argMax; double maxVal; GetArgMaxVOI(hypothesisActivityPosteriors, priors, out argMax, out maxVal); Unlabelled.Remove(argMax); Labelled.Add(argMax); UpdateModel(argMax); onlineEstimates.Add(GetProbabilityOf(argMax, priors)); onlineTargets.Add(DataSet.Labels[0][argMax]); metrics = new Metrics { Name = "active", Estimates = onlineEstimates.Select(ia => new Bernoulli(ia)).ToArray(), TrueLabels = onlineTargets.ToArray() }; // metrics.PrintSummary(); } if (Unlabelled.Any()) { CalculateProbabilities(priors); foreach (var index in Unlabelled) { onlineEstimates.Add(hypothesisActivityPosteriors[index]); onlineTargets.Add(DataSet.Labels[0][index]); } metrics = new Metrics { Name = "active", Estimates = onlineEstimates.Select(ia => new Bernoulli(ia)).ToArray(), TrueLabels = onlineTargets.ToArray() }; } if (metrics != null) { metrics.PrintSummary(); } }
/// <summary> /// Prints the weight priors. /// </summary> /// <param name="posteriors">Posteriors.</param> /// <param name="communityWeights">Community weights.</param> public static void PrintWeightPriors(Marginals posteriors, Gaussian[] communityWeights) { for (int i = 0; i < posteriors.WeightMeans.Length; i++) { Console.WriteLine("Feature {0} Posterior Mean {1}, Posterior Precision {2}, Weight mean {3}, weight precision {4}", i, posteriors.WeightMeans[i], posteriors.WeightPrecisions[i], communityWeights[i].GetMean(), communityWeights[i].Precision); } }
/// <summary> /// Runs the batch. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="priors">Priors.</param> public void RunBatch(DataSet dataSet, Marginals priors, int niter = 1) { // Posteriors = priors; // for (int rr = 0; rr < dataSet.NumberOfResidents; ++rr) // { // Posteriors = TrainModel.Train(dataSet.GetSubSet(rr), Posteriors); // } Posteriors = TrainModel.Train(dataSet, priors, niter); }
public Bernoulli ComputeEvidence(DataSet dataSet, Marginals priors) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions), dataSet.Labels); engine.Algorithm.DefaultNumberOfIterations = 1; return(engine.Infer <Bernoulli>(evidence)); }
/// <summary> /// Test the specified dataSet and priors. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="priors">Priors.</param> public Bernoulli[][] Test(DataSet dataSet, Marginals priors) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions)); #if !USE_PRECOMPILED_ALGORITHM var posteriorActivities = engine.Infer <Bernoulli[][]>(activities); #else algorithm.Execute(1); var posteriorActivities = algorithm.Marginal <Bernoulli[][]>(activities.Name); #endif return(posteriorActivities); }
// /// <summary> // /// Gets the value of information. // /// </summary> // /// <value>The value of information.</value> // public double[] GetValueOfInformation() // { // double jall = JAll(); // return Unlabelled.Select(index => VOI(jall, index)).ToArray(); // } /// <summary> /// Gets the argument maxising the Value of information. /// </summary> /// <param name="activityPosteriors">Activity posteriors.</param> /// <param name="priors">Priors.</param> /// <param name="argMax">Argument max.</param> /// <param name="maxVal">Max value.</param> public override void GetArgMaxVOI(Bernoulli[] activityPosteriors, Marginals priors, out int argMax, out double maxVal) { hypothesisActivityPosteriors = activityPosteriors.Select(ia => new Bernoulli(ia)).ToArray(); double jall = JAll(); var unlabelled = Unlabelled.ToArray(); argMax = -1; maxVal = Reversed ? double.PositiveInfinity : double.NegativeInfinity; var signs = new double[unlabelled.Length]; var vois = new double[unlabelled.Length]; for (int i = 0; i < unlabelled.Length; i++) { var index = unlabelled[i]; vois[i] = VOI(jall, index, activityPosteriors, priors); signs[i] = Math.Sign(vois[i]) / 2.0 + 0.5; //Console.Write( "." ); //Console.WriteLine( "y_true: {0}", labels[0][ind] ); //Console.WriteLine( "y_hat: {0}", probs[ind] ); //Console.WriteLine( "VOJ_{0}: {1}", ind, voi ); //Console.WriteLine(); if (Reversed) { if (vois[i] < maxVal || argMax < 0) { maxVal = vois[i]; argMax = index; } } else { if (vois[i] > maxVal || argMax < 0) { maxVal = vois[i]; argMax = index; } } } //Console.WriteLine(); //Console.WriteLine( "\n+ivity: {0}", signs.Average() ); }
/// <summary> /// Active Transfer tests. /// </summary> /// <param name="trainModel">The train model.</param> /// <param name="testModel">The test model.</param> /// <param name="data">The datasets.</param> public static void ActiveTransfer(BinaryModel trainModel, BinaryModel testModel, IList<ToyData> data, string title, Marginals priors) { var learners = new Dictionary<string, IList<IActiveLearner>> { { "Random", Utils.CreateLearners<RandomLearner>(data[2].DataSet, trainModel, testModel, null) }, { "US", Utils.CreateLearners<UncertainActiveLearner>(data[2].DataSet, trainModel, testModel, null) }, { "CS", Utils.CreateLearners<UncertainActiveLearner>(data[2].DataSet, trainModel, testModel, null, true) }, { "VOI+", Utils.CreateLearners<ActiveLearner>(data[2].DataSet, trainModel, testModel, null) }, { "VOI-", Utils.CreateLearners<ActiveLearner>(data[2].DataSet, trainModel, testModel, null, true) } }; var experiments = new List<Experiment>(); foreach (var learner in learners) { Console.WriteLine("Testing {0} ({1})", title, learner.Key); var experiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = learner.Key, ActiveLearners = learner.Value }; experiment.RunActive(data[2].DataSet, data[2].HoldoutSet, ActiveSteps, priors); experiments.Add(experiment); } Utils.PlotHoldoutMetrics(experiments, title, "", true); }
/// <summary> /// Train the specified dataSet and priors for the specified number of iterations. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="priors">Priors.</param> /// <param name="numberOfIterations">Number of iterations.</param> public Marginals Train(DataSet dataSet, Marginals priors, int numberOfIterations = 10) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions), dataSet.Labels); #if !USE_PRECOMPILED_ALGORITHM engine.Algorithm.DefaultNumberOfIterations = numberOfIterations; var posteriorWeights = engine.Infer <Gaussian[][]>(weights); var posteriorWeightMeans = engine.Infer <Gaussian[]>(weightMeans); var posteriorWeightPrecisions = engine.Infer <Gamma[]>(weightPrecisions); #else algorithm.Execute(numberOfIterations); var posteriorWeights = algorithm.Marginal <Gaussian[][]>(weights.Name); var posteriorWeightMeans = algorithm.Marginal <Gaussian[]>(weightMeans.Name); var posteriorWeightPrecisions = algorithm.Marginal <Gamma[]>(weightPrecisions.Name); #endif return(new Marginals { Weights = posteriorWeights, WeightMeans = posteriorWeightMeans, WeightPrecisions = posteriorWeightPrecisions }); }
private double ExpectedEvidence(int index, Marginals priors) { var niter = 1; //var pp = hypothesisActivityPosteriors[index]; bool trueLabel = DataSet.Labels[0][index]; var labelled = new HashSet <int>(Labelled); labelled.Add(index); //evidenceData.Labels[0] = evidenceData.Labels[0].Select( ll => !ll ).ToArray(); // Learn as if positive DataSet.Labels[0][index] = true; Marginals positivePosteriors = priors; try { if (Reversed) { positivePosteriors = priors; } else { positivePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, niter); } } catch (ImproperMessageException) { // As fallback use priors } var positivePriorEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), priors); var positivePostrEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), positivePosteriors); // Learn as if negative DataSet.Labels[0][index] = false; Marginals negativePosteriors = priors; try { if (Reversed) { negativePosteriors = priors; } else { negativePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, niter); } } catch (ImproperMessageException) { // As fallback use priors } var negativePriorEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), priors); var negativePostrEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), negativePosteriors); DataSet.Labels[0][index] = trueLabel; var returns = new List <double>(); returns.Add( (positivePriorEvidence.LogOdds) / (negativePriorEvidence.LogOdds) ); //return Math.Max( returns.Last(), 1.0 / returns.Last() ); returns.Add( (positivePostrEvidence.LogOdds) / (negativePostrEvidence.LogOdds) ); //return Math.Max( returns.Last(), 1.0 / returns.Last() ); returns.Add( (positivePriorEvidence.LogOdds + positivePostrEvidence.GetLogProbTrue()) / (negativePriorEvidence.LogOdds + negativePostrEvidence.GetLogProbTrue()) ); return(Math.Max(returns.Last(), 1.0 / returns.Last())); }
public override void GetArgMaxVOI(MicrosoftResearch.Infer.Distributions.Bernoulli[] activityPosteriors, Marginals priors, out int argMax, out double maxVal) { CalculateProbabilities(priors); var evidences = new Dictionary <int, double>(); var sortedUnlabelled = Unlabelled .OrderBy(_ => rng.NextDouble()) .OrderBy(uu => Math.Abs(hypothesisActivityPosteriors[uu].GetMean() - 0.5)) .Take(10) ; foreach (var index in sortedUnlabelled) { var evidence = ExpectedEvidence(index, priors); evidences.Add(index, evidence); } var ordered = evidences.OrderBy(ee => ee.Value); argMax = ordered.First().Key; maxVal = ordered.First().Value; }
/// <summary> /// Train the specified dataSet and priors for the specified number of iterations. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="priors">Priors.</param> /// <param name="numberOfIterations">Number of iterations.</param> public Marginals Train(DataSet dataSet, Marginals priors, int numberOfIterations = 10) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions), dataSet.Labels); #if !USE_PRECOMPILED_ALGORITHM engine.Algorithm.DefaultNumberOfIterations = numberOfIterations; var posteriorWeights = engine.Infer<Gaussian[][]>(weights); var posteriorWeightMeans = engine.Infer<Gaussian[]>(weightMeans); var posteriorWeightPrecisions = engine.Infer<Gamma[]>(weightPrecisions); #else algorithm.Execute(numberOfIterations); var posteriorWeights = algorithm.Marginal<Gaussian[][]>(weights.Name); var posteriorWeightMeans = algorithm.Marginal<Gaussian[]>(weightMeans.Name); var posteriorWeightPrecisions = algorithm.Marginal<Gamma[]>(weightPrecisions.Name); #endif return new Marginals { Weights = posteriorWeights, WeightMeans = posteriorWeightMeans, WeightPrecisions = posteriorWeightPrecisions }; }
public Bernoulli ComputeEvidence(DataSet dataSet, Marginals priors) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions), dataSet.Labels); engine.Algorithm.DefaultNumberOfIterations = 1; return engine.Infer<Bernoulli>(evidence); }
/// <summary> /// Gets the argument maxising the Value of information. /// </summary> /// <param name="activityPosteriors">Activity posteriors.</param> /// <param name="priors">Priors.</param> /// <param name="argMax">Argument max.</param> /// <param name="maxVal">Max value.</param> public override void GetArgMaxVOI(Bernoulli[] activityPosteriors, Marginals priors, out int argMax, out double maxVal) { argMax = Unlabelled.ToArray()[random.Next(Unlabelled.Count)]; maxVal = 0.0; }
private double VOI(double jall, int index, Bernoulli[] activityPosteriors, Marginals priors) { var prob = activityPosteriors[index].GetMean(); var voij = JAll_j(index, activityPosteriors, priors); return (jall - voij) - Cj(prob, index); }
/// <summary> /// Initializes a new instance of the <see cref="ActiveTransfer.ToyDataRunner"/> class. /// </summary> /// <param name="trainModel">Train model.</param> /// <param name="testModel">Test model.</param> public void Run(BinaryModel trainModel, BinaryModel testModel, BinaryModel evidenceModel, bool testVOI, bool testActiveEvidence) { const int NumberOfResidents = 7; const double KeepProportion = 1.0; var selectedFeatures = new HashSet<int>(Enumerable.Range(0, 48)); var ted = Source.GetDataSet(Enumerable.Range(1, 14), AddBias, selectedFeatures, KeepProportion); var trd = Target.GetDataSet(Enumerable.Range(1, 25), AddBias, selectedFeatures, KeepProportion); // var ted = Source.GetDataSet( Enumerable.Range( 1, 1 ), AddBias, selectedFeatures, KeepProportion ); // var trd = Target.GetDataSet( Enumerable.Range( 1, 20 ), AddBias, selectedFeatures, KeepProportion ); // var hod = Target.GetDataSet( Enumerable.Range( 1 + NumberOfResidents * 1, NumberOfResidents ) ); DataSet testSet; DataSet holdoutSet; ted.SplitTrainTest(0.5, out testSet, out holdoutSet); var NumFeatures = trd.Features.First().First().Count(); var trainData = new ToyData { NumberOfResidents = trd.NumberOfResidents, NumberOfFeatures = NumFeatures, NumberOfActivities = 2, UseBias = false, DataSet = trd }; var testData = new ToyData { NumberOfResidents = NumberOfResidents, NumberOfFeatures = NumFeatures, NumberOfActivities = 2, UseBias = false, DataSet = testSet, HoldoutSet = holdoutSet }; var priors = new Marginals { WeightMeans = DistributionArrayHelpers.CreateGaussianArray(trainData.NumberOfFeatures, 0.0, 1.0).ToArray(), WeightPrecisions = DistributionArrayHelpers.CreateGammaArray(trainData.NumberOfFeatures, 1.0, 1.0).ToArray() }; // TODO: Create meta-features that allow us to do the first form of transfer learning // Train the community model var communityExperiment = new Experiment { TrainModel = trainModel, TestModel = testModel, EvidenceModel = evidenceModel, Name = "Community" }; communityExperiment.RunBatch(trainData.DataSet, priors); // communityExperiment.Posteriors.WeightPrecisions = priors.WeightPrecisions; // if (false) // { // Utils.PlotPosteriors(communityExperiment.Posteriors.WeightMeans, communityExperiment.Posteriors.WeightPrecisions, null, "Community weights", "Feature", ShowPlots); // Utils.PlotPosteriors(communityExperiment.Posteriors.WeightMeans, communityExperiment.Posteriors.WeightPrecisions, null, "Community weights (prior precision)", "Feature", ShowPlots); // } // Print top features // var topWeights = communityExperiment.Posteriors.WeightMeans.Zip(communityExperiment.Posteriors.WeightPrecisions, (m, p) => new { m, p }).Select((ia, i) => new { ia, i }) // .OrderByDescending(x => Math.Abs(x.ia.m.GetMean())).ToList(); // Console.WriteLine("Top 20 weights:\n {0}", string.Join("\n", topWeights.Take(20).Select(pair => string.Format("{0}: {1}", pair.i, new Gaussian(pair.ia.m.GetMean(), pair.ia.p.GetMean()))))); // //communityExperiment.Posteriors.WeightPrecisions = DistributionArrayHelpers.Copy( priors.WeightPrecisions ).ToArray(); var sourcePosteriors = new Marginals { WeightMeans = communityExperiment.Posteriors.WeightMeans, WeightPrecisions = priors.WeightPrecisions, //communityExperiment.Posteriors.WeightMeans, Weights = null }; // Select half the features /* trainData.DataSet.Features = trainData.DataSet.Features.Select( ia => ia.Select( ib => topWeights.Take(topWeights.Count / 2).Select(pair => ib[pair.i]).ToArray()) .ToArray()) .ToArray(); // Retrain using these weights */ // if (false) // { // // Do online learning // var onlineExperiment = new Experiment // { // TrainModel = trainModel, // TestModel = testModel, // Name = "Online" // }; // onlineExperiment.RunOnline(testData.DataSet, testData.HoldoutSet, priors); // // Do transfer learning // var personalisationExperiment = new Experiment // { // TrainModel = trainModel, // TestModel = testModel, // Name = "Community" // }; // personalisationExperiment.RunOnline(testData.DataSet, testData.HoldoutSet, communityExperiment.Posteriors); // // Plot cumulative metrics // Utils.PlotCumulativeMetrics(new [] { onlineExperiment, personalisationExperiment }, "Active", ShowPlots); // } // ACTIVE MODEL foreach (var doTransfer in new[] { false, true }) { var experiments = new List<Experiment>(); var learners = CreateLearners(trainModel, testModel, evidenceModel, testData, testVOI, testActiveEvidence); foreach (var learner in learners) { Console.WriteLine("Testing Active{0} Learning ({1})", doTransfer ? " Real Transfer" : "Real Online", learner.Key); var experiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = learner.Key, ActiveLearners = learner.Value }; experiment.RunActive(testData.DataSet, testData.HoldoutSet, ActiveSteps, doTransfer ? sourcePosteriors : priors); experiments.Add(experiment); if (false) { Utils.PlotPosteriors( experiment.IndividualPosteriors[0].WeightMeans, experiment.IndividualPosteriors[0].WeightPrecisions, null, "Posterior weights for " + learner.Key + " " + (doTransfer ? " (transfer)" : ""), "Feature", ShowPlots); } } Utils.PlotHoldoutMetrics(experiments, doTransfer ? "Real Active Transfer" : "Real Active", "", ShowPlots); } }
/// <summary> /// Initializes a new instance of the <see cref="Marginals"/> class. /// </summary> /// <param name="marginals">Marginals.</param> public Marginals(Marginals marginals) { Weights = marginals.Weights == null ? null : marginals.Weights.Select(ia => DistributionArrayHelpers.Copy(ia).ToArray()).ToArray(); WeightMeans = marginals.WeightMeans == null ? null : DistributionArrayHelpers.Copy(marginals.WeightMeans).ToArray(); WeightPrecisions = marginals.WeightPrecisions == null ? null : DistributionArrayHelpers.Copy(marginals.WeightPrecisions).ToArray(); }
/// <summary> /// Initializes a new instance of the <see cref="ToyDataRunner"/> class. /// </summary> /// <param name="trainModel">Train model.</param> /// <param name="testModel">Test model.</param> public static void Run(BinaryModel trainModel, BinaryModel testModel, bool testTransfer, bool testActive, bool testActiveTransfer) { var phase1PriorMean = new Gaussian(4, 1); var phase1PriorPrecision = new Gamma(1, 1); var phase2PriorMean = new Gaussian(4, 1); var phase2PriorPrecision = new Gamma(1, 1); // Generate data for 5 individuals var data = new List <ToyData>(); for (int i = 0; i < 3; i++) { var toy = new ToyData { // NumberOfInstances = 200, // NumberOfHoldoutInstances = i == 0 ? 0 : 1000, NumberOfResidents = 5, NumberOfFeatures = NumberOfFeatures, NumberOfActivities = 2, UseBias = false, TruePriorMean = i == 0 ? phase1PriorMean : phase2PriorMean, TruePriorPrecision = i == 0 ? phase1PriorPrecision : phase2PriorPrecision }; toy.Generate(i == 2 ? NoisyExampleProportion : 0.0, 200); if (i != 0) { // no need for holdout data in training set toy.Generate(0.0, 1000, true); } data.Add(toy); } var priors = new Marginals { WeightMeans = DistributionArrayHelpers.CreateGaussianArray(NumberOfFeatures, 0, 1).ToArray(), WeightPrecisions = DistributionArrayHelpers.CreateGammaArray(NumberOfFeatures, 1, 1).ToArray() }; Console.WriteLine("Data Generated"); // TODO: Create meta-features that allow us to do the first form of transfer learning // Train the community model Console.WriteLine("Training Community Model"); var communityExperiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = "Community" }; communityExperiment.RunBatch(data[0].DataSet, priors); // PrintWeightPriors(communityExperiment.Posteriors, trainData.CommunityWeights); // Utils.PlotPosteriors(communityExperiment.Posteriors.Weights, data[0].Weights); // Utils.PlotPosteriors(communityExperiment.Posteriors.WeightMeans, communityExperiment.Posteriors.WeightPrecisions, null, "Community weights", "Feature"); // return; if (testTransfer) { // Do online learning // Console.WriteLine("Testing Online Model"); var onlineExperiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = "Online" }; onlineExperiment.RunOnline(data[1].DataSet, data[1].HoldoutSet, priors); // Do transfer learning // Console.WriteLine("Testing Community Model"); var personalisationExperiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = "Community" }; personalisationExperiment.RunOnline(data[1].DataSet, data[1].HoldoutSet, communityExperiment.Posteriors); // Plot cumulative metrics Utils.PlotCumulativeMetrics(new[] { onlineExperiment, personalisationExperiment }, "Toy Transfer"); } else { Console.WriteLine("Skipping Transfer Learning"); } // ACTIVE MODEL if (testActive) { ActiveTransfer(trainModel, testModel, data, "Toy Active", priors); } else { Console.WriteLine("Skipping Active Learning"); } if (testActiveTransfer) { Console.WriteLine("Note that the transfer learning is very effective here, so the active learning doesn't add much"); ActiveTransfer(trainModel, testModel, data, "Toy Active Transfer", communityExperiment.Posteriors); } else { Console.WriteLine("Skipping Active Transfer Learning"); } // Now create different costs for acquiring labels - want to demonstrate that we choose from all 3 possible labels }
/// <summary> /// Active Transfer tests. /// </summary> /// <param name="trainModel">The train model.</param> /// <param name="testModel">The test model.</param> /// <param name="data">The datasets.</param> public static void ActiveTransfer(BinaryModel trainModel, BinaryModel testModel, IList <ToyData> data, string title, Marginals priors) { var learners = new Dictionary <string, IList <IActiveLearner> > { { "Random", Utils.CreateLearners <RandomLearner>(data[2].DataSet, trainModel, testModel, null) }, { "US", Utils.CreateLearners <UncertainActiveLearner>(data[2].DataSet, trainModel, testModel, null) }, { "CS", Utils.CreateLearners <UncertainActiveLearner>(data[2].DataSet, trainModel, testModel, null, true) }, { "VOI+", Utils.CreateLearners <ActiveLearner>(data[2].DataSet, trainModel, testModel, null) }, { "VOI-", Utils.CreateLearners <ActiveLearner>(data[2].DataSet, trainModel, testModel, null, true) } }; var experiments = new List <Experiment>(); foreach (var learner in learners) { Console.WriteLine("Testing {0} ({1})", title, learner.Key); var experiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = learner.Key, ActiveLearners = learner.Value }; experiment.RunActive(data[2].DataSet, data[2].HoldoutSet, ActiveSteps, priors); experiments.Add(experiment); } Utils.PlotHoldoutMetrics(experiments, title, "", true); }
/// <summary> /// Gets the probability of the given index. /// </summary> /// <returns>The probability of.</returns> /// <param name="index">Index.</param> private Bernoulli GetProbabilityOf(int index, Marginals priors) { return(TestModel.Test(DataSet.GetSubSet(index), priors)[0][0]); }
private double JAll_j(int index, Bernoulli[] activityPosteriors, Marginals priors) { // var prevProbs = activityPosteriors.Select(ia => new Bernoulli(ia)).ToArray(); hypothesisActivityPosteriors = activityPosteriors.Select(ia => new Bernoulli(ia)).ToArray(); // Get datum // var datum = DataSet.GetSubSet(0, index); bool trueLabel = DataSet.Labels[0][index]; // Create copies of the Labelled an Unlabelled sets var labelled = new HashSet <int>(Labelled); var unlabelled = new HashSet <int>(Unlabelled); labelled.Add(index); unlabelled.Remove(index); // datum.Labels[0][0] = true; DataSet.Labels[0][index] = true; // Learn as if positive // var positivePosteriors = TrainModel.Train(datum, priors, 1); Marginals positivePosteriors = priors; try { positivePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, 1); } catch (ImproperMessageException) { // As fallback use priors } // recompute probabilities CalculateProbabilities(positivePosteriors); //var jjposl = JL(labelled); //var jjposu = JU(unlabelled); var Jjpos = (JAll()) * (1.0 - hypothesisActivityPosteriors[index].GetMean()); // Restore posteriors labelled.Add(index); unlabelled.Remove(index); // datum.Labels[0][0] = false; DataSet.Labels[0][index] = false; // Learn as if negative // var negativePosteriors = TrainModel.Train(datum, priors, 1); Marginals negativePosteriors = priors; try { negativePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, 1); } catch (ImproperMessageException) { // As fallback use priors } // recompute probabilities CalculateProbabilities(negativePosteriors); //var jjnegl = JL(labelled); //var jjnegu = JU(unlabelled); var Jjneg = (JAll()) * (hypothesisActivityPosteriors[index].GetMean()); // restore posteriors // activityPosteriors = prevProbs; DataSet.Labels[0][index] = trueLabel; var voi = Jjpos + Jjneg; return(voi); }
public override void GetArgMaxVOI(MicrosoftResearch.Infer.Distributions.Bernoulli[] activityPosteriors, Marginals priors, out int argMax, out double maxVal) { CalculateProbabilities(priors); var evidences = new Dictionary<int, double>(); var sortedUnlabelled = Unlabelled .OrderBy(_ => rng.NextDouble()) .OrderBy(uu => Math.Abs(hypothesisActivityPosteriors[uu].GetMean() - 0.5)) .Take(10) ; foreach (var index in sortedUnlabelled) { var evidence = ExpectedEvidence(index, priors); evidences.Add(index, evidence); } var ordered = evidences.OrderBy(ee => ee.Value); argMax = ordered.First().Key; maxVal = ordered.First().Value; }
private double JAll_j(int index, Bernoulli[] activityPosteriors, Marginals priors) { // var prevProbs = activityPosteriors.Select(ia => new Bernoulli(ia)).ToArray(); hypothesisActivityPosteriors = activityPosteriors.Select(ia => new Bernoulli(ia)).ToArray(); // Get datum // var datum = DataSet.GetSubSet(0, index); bool trueLabel = DataSet.Labels[0][index]; // Create copies of the Labelled an Unlabelled sets var labelled = new HashSet<int>(Labelled); var unlabelled = new HashSet<int>(Unlabelled); labelled.Add(index); unlabelled.Remove(index); // datum.Labels[0][0] = true; DataSet.Labels[0][index] = true; // Learn as if positive // var positivePosteriors = TrainModel.Train(datum, priors, 1); Marginals positivePosteriors = priors; try { positivePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, 1); } catch (ImproperMessageException) { // As fallback use priors } // recompute probabilities CalculateProbabilities(positivePosteriors); //var jjposl = JL(labelled); //var jjposu = JU(unlabelled); var Jjpos = (JAll()) * (1.0 - hypothesisActivityPosteriors[index].GetMean()); // Restore posteriors labelled.Add(index); unlabelled.Remove(index); // datum.Labels[0][0] = false; DataSet.Labels[0][index] = false; // Learn as if negative // var negativePosteriors = TrainModel.Train(datum, priors, 1); Marginals negativePosteriors = priors; try { negativePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, 1); } catch (ImproperMessageException) { // As fallback use priors } // recompute probabilities CalculateProbabilities(negativePosteriors); //var jjnegl = JL(labelled); //var jjnegu = JU(unlabelled); var Jjneg = (JAll()) * (hypothesisActivityPosteriors[index].GetMean()); // restore posteriors // activityPosteriors = prevProbs; DataSet.Labels[0][index] = trueLabel; var voi = Jjpos + Jjneg; return voi; }
private double ExpectedEvidence(int index, Marginals priors) { var niter = 1; //var pp = hypothesisActivityPosteriors[index]; bool trueLabel = DataSet.Labels[0][index]; var labelled = new HashSet<int>(Labelled); labelled.Add(index); //evidenceData.Labels[0] = evidenceData.Labels[0].Select( ll => !ll ).ToArray(); // Learn as if positive DataSet.Labels[0][index] = true; Marginals positivePosteriors = priors; try { if (Reversed) positivePosteriors = priors; else positivePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, niter); } catch (ImproperMessageException) { // As fallback use priors } var positivePriorEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), priors); var positivePostrEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), positivePosteriors); // Learn as if negative DataSet.Labels[0][index] = false; Marginals negativePosteriors = priors; try { if (Reversed) negativePosteriors = priors; else negativePosteriors = TrainModel.Train(DataSet.GetSubSet(0, index), priors, niter); } catch (ImproperMessageException) { // As fallback use priors } var negativePriorEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), priors); var negativePostrEvidence = EvidenceModel.ComputeEvidence(DataSet.GetSubSet(0, labelled.ToList()), negativePosteriors); DataSet.Labels[0][index] = trueLabel; var returns = new List<double>(); returns.Add( (positivePriorEvidence.LogOdds) / (negativePriorEvidence.LogOdds) ); //return Math.Max( returns.Last(), 1.0 / returns.Last() ); returns.Add( (positivePostrEvidence.LogOdds) / (negativePostrEvidence.LogOdds) ); //return Math.Max( returns.Last(), 1.0 / returns.Last() ); returns.Add( (positivePriorEvidence.LogOdds + positivePostrEvidence.GetLogProbTrue()) / (negativePriorEvidence.LogOdds + negativePostrEvidence.GetLogProbTrue()) ); return Math.Max(returns.Last(), 1.0 / returns.Last()); }
/// <summary> /// Gets or sets the active posteriors. /// </summary> /// <value>The active posteriors.</value> /// <summary> /// Calculates the probabilities. /// </summary> /// <param name="priors">Priors.</param> /// <summary> /// Gets the argument maxising the Value of information. /// </summary> /// <param name="activityPosteriors">Activity posteriors.</param> /// <param name="priors">Priors.</param> /// <param name="argMax">Argument max.</param> /// <param name="maxVal">Max value.</param> public virtual void GetArgMaxVOI(Bernoulli[] activityPosteriors, Marginals priors, out int argMax, out double maxVal) { throw new NotImplementedException(); }
/// <summary> /// Runs the online experiment. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="holdoutSet">Holdout set.</param> /// <param name="priors">Priors.</param> public void RunOnline(DataSet dataSet, DataSet holdoutSet, Marginals priors) { using (new CodeTimer("Running online experiment: " + Name)) { Console.WriteLine(); Metrics = new MetricsCollection(); HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] }; PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][]; HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][]; IndividualPosteriors = new Marginals[dataSet.NumberOfResidents]; var accuracy = new double[dataSet.NumberOfResidents][]; for (int i = 0; i < dataSet.NumberOfResidents; i++) { var collection = new List <Metrics>(); HoldoutPosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]][]; accuracy[i] = new double[dataSet.NumberOfInstances[i]]; IndividualPosteriors[i] = new Marginals(priors); PosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]]; for (int j = 0; j < dataSet.NumberOfInstances[i]; j++) { var datum = dataSet.GetSubSet(i, j); PosteriorActivities[i][j] = TestModel.Test(datum, IndividualPosteriors[i])[0][0]; HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSet.GetSubSet(i), IndividualPosteriors[i])[0]; // Test on holdout set var holdoutMetrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] }; accuracy[i][j] = holdoutMetrics.AverageAccuracy; // PrintPrediction(i, temp[0][0], testLabels[0][i], testScores[0][i]); // Now retrain using this label IndividualPosteriors[i] = TrainModel.Train(datum, IndividualPosteriors[i], 10); collection.Add(holdoutMetrics); } // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray()); Metrics.Add(new Metrics { Name = Name, Estimates = PosteriorActivities[i], TrueLabels = dataSet.Labels[i] }, true); HoldoutMetrics.Metrics[i] = collection.ToArray(); Console.WriteLine("{0,20}, Resident {1}, Hold out accuracy {2:N2}", Name, i, collection.Average(ia => ia.AverageAccuracy)); } HoldoutMetrics.RecomputeAggregateMetrics(); Metrics.RecomputeAggregateMetrics(); // Console.WriteLine("Accuracies " + string.Join(", ", accuracy.ColumnAverage().Select(x => x.ToString("N2")))); // Console.WriteLine("Std. dev. " + string.Join(", ", accuracy.ColumnStandardDeviation().Select(x => x.ToString("N2")))); // Console.WriteLine("Accuracies " + string.Join(", ", HoldoutMetrics.AverageAccuracy.Select(x => x.ToString("N2")))); } }
/// <summary> /// Test the specified dataSet and priors. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="priors">Priors.</param> public Bernoulli[][] Test(DataSet dataSet, Marginals priors) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions)); #if !USE_PRECOMPILED_ALGORITHM var posteriorActivities = engine.Infer<Bernoulli[][]>(activities); #else algorithm.Execute(1); var posteriorActivities = algorithm.Marginal<Bernoulli[][]>(activities.Name); #endif return posteriorActivities; }
/// <summary> /// Calculates the probabilities. /// </summary> public virtual void CalculateProbabilities(Marginals priors) { hypothesisActivityPosteriors = TestModel.Test(DataSet, priors)[0]; }
/// <summary> /// Runs the active experiment. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="holdoutSet">Holdout set.</param> /// <param name="numberOfSelections">Number of selections.</param> /// <param name="priors">Priors.</param> public void RunActive(DataSet dataSet, DataSet holdoutSet, int numberOfSelections, Marginals priors) { if (ActiveLearners == null) { throw new InvalidOperationException("Active Learner not provided"); } using (new CodeTimer("Running active experiment: " + Name)) { Console.WriteLine(); HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] }; // Metrics = new MetricsCollection(numberOfSelections); PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][]; HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][]; IndividualPosteriors = new Marginals[dataSet.NumberOfResidents]; var accuracy = new double[dataSet.NumberOfResidents][]; for (int i = 0; i < dataSet.NumberOfResidents; i++) { HoldoutMetrics.Metrics[i] = new Metrics[numberOfSelections]; var collection = new List<Metrics>(); IndividualPosteriors[i] = new Marginals(priors); // Test on holdout set HoldoutPosteriorActivities[i] = new Bernoulli[numberOfSelections][]; accuracy[i] = new double[numberOfSelections]; var dataSetForResident = dataSet.GetSubSet(i); var holdoutSetForResident = holdoutSet.GetSubSet(i); // ActiveLearners[i].Transfer(i, 1); // var individualPosteriors = new Marginals(priors); for (int j = 0; j < numberOfSelections; j++) { PosteriorActivities[i] = TestModel.Test(dataSetForResident, IndividualPosteriors[i])[0]; HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSetForResident, IndividualPosteriors[i])[0]; if (ActiveLearners[i].Unlabelled.Count == 0) { Console.WriteLine("Empty unlabelled set"); break; } // int index = ActiveLearner.GetValueOfInformation(i).ArgMax(); int index; double val; ActiveLearners[i].GetArgMaxVOI(PosteriorActivities[i], IndividualPosteriors[i], out index, out val); // Console.WriteLine("Index {0,4}, VOI {1:N4}", index, value); // Now retrain using this label ActiveLearners[i].UpdateModel(index); //IndividualPosteriors [i] = TrainModel.Train( dataSet.GetSubSet(i, ActiveLearners [i].Labelled.ToList()), priors, 10); IndividualPosteriors[i] = TrainModel.Train(dataSet.GetSubSet(i, index), IndividualPosteriors[i], 50); var metrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] }; accuracy[i][j] = metrics.AverageAccuracy; collection.Add(metrics); } // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray()); HoldoutMetrics.Metrics[i] = collection.ToArray(); Console.WriteLine("{0,20}, Resident {1}, \n\t\tClass ratio {5}, \n\t\tHold out accuracy {2:N2}, \n\t\tAccuracies {3} \n\t\tBriers {4}\n", Name, i, collection.Average(ia => ia.AverageAccuracy).ToString("N2"), string.Join(", ", collection.Select(ia => ia.AverageAccuracy.ToString("N2"))), string.Join(", ", collection.Select(ia => ia.BrierScore.ToString("N2"))), holdoutSet.Labels[i].Average().ToString("N2") ); } HoldoutMetrics.RecomputeAggregateMetrics(); } }
/// <summary> /// Runs the online experiment. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="holdoutSet">Holdout set.</param> /// <param name="priors">Priors.</param> public void RunOnline(DataSet dataSet, DataSet holdoutSet, Marginals priors) { using (new CodeTimer("Running online experiment: " + Name)) { Console.WriteLine(); Metrics = new MetricsCollection(); HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] }; PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][]; HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][]; IndividualPosteriors = new Marginals[dataSet.NumberOfResidents]; var accuracy = new double[dataSet.NumberOfResidents][]; for (int i = 0; i < dataSet.NumberOfResidents; i++) { var collection = new List<Metrics>(); HoldoutPosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]][]; accuracy[i] = new double[dataSet.NumberOfInstances[i]]; IndividualPosteriors[i] = new Marginals(priors); PosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]]; for (int j = 0; j < dataSet.NumberOfInstances[i]; j++) { var datum = dataSet.GetSubSet(i, j); PosteriorActivities[i][j] = TestModel.Test(datum, IndividualPosteriors[i])[0][0]; HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSet.GetSubSet(i), IndividualPosteriors[i])[0]; // Test on holdout set var holdoutMetrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] }; accuracy[i][j] = holdoutMetrics.AverageAccuracy; // PrintPrediction(i, temp[0][0], testLabels[0][i], testScores[0][i]); // Now retrain using this label IndividualPosteriors[i] = TrainModel.Train(datum, IndividualPosteriors[i], 10); collection.Add(holdoutMetrics); } // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray()); Metrics.Add(new Metrics { Name = Name, Estimates = PosteriorActivities[i], TrueLabels = dataSet.Labels[i] }, true); HoldoutMetrics.Metrics[i] = collection.ToArray(); Console.WriteLine("{0,20}, Resident {1}, Hold out accuracy {2:N2}", Name, i, collection.Average(ia => ia.AverageAccuracy)); } HoldoutMetrics.RecomputeAggregateMetrics(); Metrics.RecomputeAggregateMetrics(); // Console.WriteLine("Accuracies " + string.Join(", ", accuracy.ColumnAverage().Select(x => x.ToString("N2")))); // Console.WriteLine("Std. dev. " + string.Join(", ", accuracy.ColumnStandardDeviation().Select(x => x.ToString("N2")))); // Console.WriteLine("Accuracies " + string.Join(", ", HoldoutMetrics.AverageAccuracy.Select(x => x.ToString("N2")))); } }
public void VOITest(int numActivelySelected, Marginals priors) { var onlineEstimates = new List<Bernoulli>(); var onlineTargets = new List<bool>(); Metrics metrics = null; for (int jj = 0; jj < numActivelySelected; ++jj) { CalculateProbabilities(priors); //Console.WriteLine( "\nJL: {0}", JL() ); //Console.WriteLine( "JU: {0}", JU() ); int argMax; double maxVal; GetArgMaxVOI(hypothesisActivityPosteriors, priors, out argMax, out maxVal); Unlabelled.Remove(argMax); Labelled.Add(argMax); UpdateModel(argMax); onlineEstimates.Add(GetProbabilityOf(argMax, priors)); onlineTargets.Add(DataSet.Labels[0][argMax]); metrics = new Metrics { Name = "active", Estimates = onlineEstimates.Select(ia => new Bernoulli(ia)).ToArray(), TrueLabels = onlineTargets.ToArray() }; // metrics.PrintSummary(); } if (Unlabelled.Any()) { CalculateProbabilities(priors); foreach (var index in Unlabelled) { onlineEstimates.Add(hypothesisActivityPosteriors[index]); onlineTargets.Add(DataSet.Labels[0][index]); } metrics = new Metrics { Name = "active", Estimates = onlineEstimates.Select(ia => new Bernoulli(ia)).ToArray(), TrueLabels = onlineTargets.ToArray() }; } if (metrics != null) { metrics.PrintSummary(); } }
/// <summary> /// Calculates the probabilities. /// </summary> public void CalculateProbabilities(Marginals priors) { hypothesisActivityPosteriors = TestModel.Test(DataSet, priors)[0]; }
/// <summary> /// Gets the probability of the given index. /// </summary> /// <returns>The probability of.</returns> /// <param name="index">Index.</param> private Bernoulli GetProbabilityOf(int index, Marginals priors) { return TestModel.Test(DataSet.GetSubSet(index), priors)[0][0]; }
/// <summary> /// Initializes a new instance of the <see cref="ToyDataRunner"/> class. /// </summary> /// <param name="trainModel">Train model.</param> /// <param name="testModel">Test model.</param> public static void Run(BinaryModel trainModel, BinaryModel testModel, bool testTransfer, bool testActive, bool testActiveTransfer) { var phase1PriorMean = new Gaussian(4, 1); var phase1PriorPrecision = new Gamma(1, 1); var phase2PriorMean = new Gaussian(4, 1); var phase2PriorPrecision = new Gamma(1, 1); // Generate data for 5 individuals var data = new List<ToyData>(); for (int i = 0; i < 3; i++) { var toy = new ToyData { // NumberOfInstances = 200, // NumberOfHoldoutInstances = i == 0 ? 0 : 1000, NumberOfResidents = 5, NumberOfFeatures = NumberOfFeatures, NumberOfActivities = 2, UseBias = false, TruePriorMean = i == 0 ? phase1PriorMean : phase2PriorMean, TruePriorPrecision = i == 0 ? phase1PriorPrecision : phase2PriorPrecision }; toy.Generate(i == 2 ? NoisyExampleProportion : 0.0, 200); if (i != 0) { // no need for holdout data in training set toy.Generate(0.0, 1000, true); } data.Add(toy); } var priors = new Marginals { WeightMeans = DistributionArrayHelpers.CreateGaussianArray(NumberOfFeatures, 0, 1).ToArray(), WeightPrecisions = DistributionArrayHelpers.CreateGammaArray(NumberOfFeatures, 1, 1).ToArray() }; Console.WriteLine("Data Generated"); // TODO: Create meta-features that allow us to do the first form of transfer learning // Train the community model Console.WriteLine("Training Community Model"); var communityExperiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = "Community" }; communityExperiment.RunBatch(data[0].DataSet, priors); // PrintWeightPriors(communityExperiment.Posteriors, trainData.CommunityWeights); // Utils.PlotPosteriors(communityExperiment.Posteriors.Weights, data[0].Weights); // Utils.PlotPosteriors(communityExperiment.Posteriors.WeightMeans, communityExperiment.Posteriors.WeightPrecisions, null, "Community weights", "Feature"); // return; if (testTransfer) { // Do online learning // Console.WriteLine("Testing Online Model"); var onlineExperiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = "Online" }; onlineExperiment.RunOnline(data[1].DataSet, data[1].HoldoutSet, priors); // Do transfer learning // Console.WriteLine("Testing Community Model"); var personalisationExperiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = "Community" }; personalisationExperiment.RunOnline(data[1].DataSet, data[1].HoldoutSet, communityExperiment.Posteriors); // Plot cumulative metrics Utils.PlotCumulativeMetrics(new[] { onlineExperiment, personalisationExperiment }, "Toy Transfer"); } else { Console.WriteLine("Skipping Transfer Learning"); } // ACTIVE MODEL if (testActive) { ActiveTransfer(trainModel, testModel, data, "Toy Active", priors); } else { Console.WriteLine("Skipping Active Learning"); } if (testActiveTransfer) { Console.WriteLine("Note that the transfer learning is very effective here, so the active learning doesn't add much"); ActiveTransfer(trainModel, testModel, data, "Toy Active Transfer", communityExperiment.Posteriors); } else { Console.WriteLine("Skipping Active Transfer Learning"); } // Now create different costs for acquiring labels - want to demonstrate that we choose from all 3 possible labels }
/// <summary> /// Initializes a new instance of the <see cref="ActiveTransfer.ToyDataRunner"/> class. /// </summary> /// <param name="trainModel">Train model.</param> /// <param name="testModel">Test model.</param> public void Run(BinaryModel trainModel, BinaryModel testModel, BinaryModel evidenceModel, bool testVOI, bool testActiveEvidence) { const int NumberOfResidents = 7; const double KeepProportion = 1.0; var selectedFeatures = new HashSet <int>(Enumerable.Range(0, 48)); var ted = Source.GetDataSet(Enumerable.Range(1, 14), AddBias, selectedFeatures, KeepProportion); var trd = Target.GetDataSet(Enumerable.Range(1, 25), AddBias, selectedFeatures, KeepProportion); // var ted = Source.GetDataSet( Enumerable.Range( 1, 1 ), AddBias, selectedFeatures, KeepProportion ); // var trd = Target.GetDataSet( Enumerable.Range( 1, 20 ), AddBias, selectedFeatures, KeepProportion ); // var hod = Target.GetDataSet( Enumerable.Range( 1 + NumberOfResidents * 1, NumberOfResidents ) ); DataSet testSet; DataSet holdoutSet; ted.SplitTrainTest(0.5, out testSet, out holdoutSet); var NumFeatures = trd.Features.First().First().Count(); var trainData = new ToyData { NumberOfResidents = trd.NumberOfResidents, NumberOfFeatures = NumFeatures, NumberOfActivities = 2, UseBias = false, DataSet = trd }; var testData = new ToyData { NumberOfResidents = NumberOfResidents, NumberOfFeatures = NumFeatures, NumberOfActivities = 2, UseBias = false, DataSet = testSet, HoldoutSet = holdoutSet }; var priors = new Marginals { WeightMeans = DistributionArrayHelpers.CreateGaussianArray(trainData.NumberOfFeatures, 0.0, 1.0).ToArray(), WeightPrecisions = DistributionArrayHelpers.CreateGammaArray(trainData.NumberOfFeatures, 1.0, 1.0).ToArray() }; // TODO: Create meta-features that allow us to do the first form of transfer learning // Train the community model var communityExperiment = new Experiment { TrainModel = trainModel, TestModel = testModel, EvidenceModel = evidenceModel, Name = "Community" }; communityExperiment.RunBatch(trainData.DataSet, priors); // communityExperiment.Posteriors.WeightPrecisions = priors.WeightPrecisions; // if (false) // { // Utils.PlotPosteriors(communityExperiment.Posteriors.WeightMeans, communityExperiment.Posteriors.WeightPrecisions, null, "Community weights", "Feature", ShowPlots); // Utils.PlotPosteriors(communityExperiment.Posteriors.WeightMeans, communityExperiment.Posteriors.WeightPrecisions, null, "Community weights (prior precision)", "Feature", ShowPlots); // } // Print top features // var topWeights = communityExperiment.Posteriors.WeightMeans.Zip(communityExperiment.Posteriors.WeightPrecisions, (m, p) => new { m, p }).Select((ia, i) => new { ia, i }) // .OrderByDescending(x => Math.Abs(x.ia.m.GetMean())).ToList(); // Console.WriteLine("Top 20 weights:\n {0}", string.Join("\n", topWeights.Take(20).Select(pair => string.Format("{0}: {1}", pair.i, new Gaussian(pair.ia.m.GetMean(), pair.ia.p.GetMean()))))); // //communityExperiment.Posteriors.WeightPrecisions = DistributionArrayHelpers.Copy( priors.WeightPrecisions ).ToArray(); var sourcePosteriors = new Marginals { WeightMeans = communityExperiment.Posteriors.WeightMeans, WeightPrecisions = priors.WeightPrecisions, //communityExperiment.Posteriors.WeightMeans, Weights = null }; // Select half the features /* * trainData.DataSet.Features = trainData.DataSet.Features.Select( * ia => ia.Select( * ib => topWeights.Take(topWeights.Count / 2).Select(pair => ib[pair.i]).ToArray()) * .ToArray()) * .ToArray(); * * // Retrain using these weights */ // if (false) // { // // Do online learning // var onlineExperiment = new Experiment // { // TrainModel = trainModel, // TestModel = testModel, // Name = "Online" // }; // onlineExperiment.RunOnline(testData.DataSet, testData.HoldoutSet, priors); // // Do transfer learning // var personalisationExperiment = new Experiment // { // TrainModel = trainModel, // TestModel = testModel, // Name = "Community" // }; // personalisationExperiment.RunOnline(testData.DataSet, testData.HoldoutSet, communityExperiment.Posteriors); // // Plot cumulative metrics // Utils.PlotCumulativeMetrics(new [] { onlineExperiment, personalisationExperiment }, "Active", ShowPlots); // } // ACTIVE MODEL foreach (var doTransfer in new[] { false, true }) { var experiments = new List <Experiment>(); var learners = CreateLearners(trainModel, testModel, evidenceModel, testData, testVOI, testActiveEvidence); foreach (var learner in learners) { Console.WriteLine("Testing Active{0} Learning ({1})", doTransfer ? " Real Transfer" : "Real Online", learner.Key); var experiment = new Experiment { TrainModel = trainModel, TestModel = testModel, Name = learner.Key, ActiveLearners = learner.Value }; experiment.RunActive(testData.DataSet, testData.HoldoutSet, ActiveSteps, doTransfer ? sourcePosteriors : priors); experiments.Add(experiment); if (false) { Utils.PlotPosteriors( experiment.IndividualPosteriors[0].WeightMeans, experiment.IndividualPosteriors[0].WeightPrecisions, null, "Posterior weights for " + learner.Key + " " + (doTransfer ? " (transfer)" : ""), "Feature", ShowPlots); } } Utils.PlotHoldoutMetrics(experiments, doTransfer ? "Real Active Transfer" : "Real Active", "", ShowPlots); } }
/// <summary> /// Runs the active experiment. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="holdoutSet">Holdout set.</param> /// <param name="numberOfSelections">Number of selections.</param> /// <param name="priors">Priors.</param> public void RunActive(DataSet dataSet, DataSet holdoutSet, int numberOfSelections, Marginals priors) { if (ActiveLearners == null) { throw new InvalidOperationException("Active Learner not provided"); } using (new CodeTimer("Running active experiment: " + Name)) { Console.WriteLine(); HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] }; // Metrics = new MetricsCollection(numberOfSelections); PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][]; HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][]; IndividualPosteriors = new Marginals[dataSet.NumberOfResidents]; var accuracy = new double[dataSet.NumberOfResidents][]; for (int i = 0; i < dataSet.NumberOfResidents; i++) { HoldoutMetrics.Metrics[i] = new Metrics[numberOfSelections]; var collection = new List <Metrics>(); IndividualPosteriors[i] = new Marginals(priors); // Test on holdout set HoldoutPosteriorActivities[i] = new Bernoulli[numberOfSelections][]; accuracy[i] = new double[numberOfSelections]; var dataSetForResident = dataSet.GetSubSet(i); var holdoutSetForResident = holdoutSet.GetSubSet(i); // ActiveLearners[i].Transfer(i, 1); // var individualPosteriors = new Marginals(priors); for (int j = 0; j < numberOfSelections; j++) { PosteriorActivities[i] = TestModel.Test(dataSetForResident, IndividualPosteriors[i])[0]; HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSetForResident, IndividualPosteriors[i])[0]; if (ActiveLearners[i].Unlabelled.Count == 0) { Console.WriteLine("Empty unlabelled set"); break; } // int index = ActiveLearner.GetValueOfInformation(i).ArgMax(); int index; double val; ActiveLearners[i].GetArgMaxVOI(PosteriorActivities[i], IndividualPosteriors[i], out index, out val); // Console.WriteLine("Index {0,4}, VOI {1:N4}", index, value); // Now retrain using this label ActiveLearners[i].UpdateModel(index); //IndividualPosteriors [i] = TrainModel.Train( dataSet.GetSubSet(i, ActiveLearners [i].Labelled.ToList()), priors, 10); IndividualPosteriors[i] = TrainModel.Train(dataSet.GetSubSet(i, index), IndividualPosteriors[i], 50); var metrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] }; accuracy[i][j] = metrics.AverageAccuracy; collection.Add(metrics); } // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray()); HoldoutMetrics.Metrics[i] = collection.ToArray(); Console.WriteLine("{0,20}, Resident {1}, \n\t\tClass ratio {5}, \n\t\tHold out accuracy {2:N2}, \n\t\tAccuracies {3} \n\t\tBriers {4}\n", Name, i, collection.Average(ia => ia.AverageAccuracy).ToString("N2"), string.Join(", ", collection.Select(ia => ia.AverageAccuracy.ToString("N2"))), string.Join(", ", collection.Select(ia => ia.BrierScore.ToString("N2"))), holdoutSet.Labels[i].Average().ToString("N2") ); } HoldoutMetrics.RecomputeAggregateMetrics(); } }