/// <summary> /// Sets the priors of CBCC. /// </summary> /// <param name="workerCount">The number of workers.</param> /// <param name="priors">The priors.</param> protected override void SetPriors(int workerCount, BCCPosteriors priors) { int communityCount = m.SizeAsInt; int labelCount = c.SizeAsInt; WorkerCount.ObservedValue = workerCount; NoiseMatrix.ObservedValue = PositiveDefiniteMatrix.IdentityScaledBy(labelCount, NoisePrecision); CBCCPosteriors cbccPriors = (CBCCPosteriors)priors; if (cbccPriors == null || cbccPriors.BackgroundLabelProb == null) { BackgroundLabelProbPrior.ObservedValue = Dirichlet.Uniform(labelCount); } else { BackgroundLabelProbPrior.ObservedValue = cbccPriors.BackgroundLabelProb; } if (cbccPriors == null || cbccPriors.CommunityProb == null) { CommunityProbPrior.ObservedValue = CommunityProbPriorObserved; } else { CommunityProbPrior.ObservedValue = cbccPriors.CommunityProb; } if (cbccPriors == null || cbccPriors.CommunityScoreMatrix == null) { CommunityScoreMatrixPrior.ObservedValue = CommunityScoreMatrixPriorObserved; } else { CommunityScoreMatrixPrior.ObservedValue = cbccPriors.CommunityScoreMatrix; } if (cbccPriors == null || cbccPriors.TrueLabelConstraint == null) { TrueLabelConstraint.ObservedValue = Util.ArrayInit(TaskCount, t => Discrete.Uniform(labelCount)); } else { TrueLabelConstraint.ObservedValue = cbccPriors.TrueLabelConstraint; } }
/// <summary> /// Sets the priors of BCC. /// </summary> /// <param name="workerCount">The number of workers.</param> /// <param name="priors">The priors.</param> protected virtual void SetPriors(int workerCount, BCCPosteriors priors) { int numClasses = c.SizeAsInt; WorkerCount.ObservedValue = workerCount; if (priors == null) { BackgroundLabelProbPrior.ObservedValue = Dirichlet.Uniform(numClasses); var confusionMatrixPrior = GetConfusionMatrixPrior(); ConfusionMatrixPrior.ObservedValue = Util.ArrayInit(workerCount, worker => Util.ArrayInit(numClasses, lab => confusionMatrixPrior[lab])); TrueLabelConstraint.ObservedValue = Util.ArrayInit(TaskCount, t => Discrete.Uniform(numClasses)); } else { BackgroundLabelProbPrior.ObservedValue = priors.BackgroundLabelProb; ConfusionMatrixPrior.ObservedValue = priors.WorkerConfusionMatrix; TrueLabelConstraint.ObservedValue = priors.TrueLabelConstraint; } }
/// <summary> /// Infers the posteriors of BCC using the attached data and priors. /// </summary> /// <param name="taskIndices">The matrix of the task indices (columns) of each worker (rows).</param> /// <param name="workerLabels">The matrix of the labels (columns) of each worker (rows).</param> /// <param name="priors">The priors of the BCC parameters.</param> /// <returns></returns> public virtual BCCPosteriors Infer(int[][] taskIndices, int[][] workerLabels, BCCPosteriors priors) { int workerCount = workerLabels.Length; SetPriors(workerCount, priors); AttachData(taskIndices, workerLabels, null); var result = new BCCPosteriors(); Engine.NumberOfIterations = NumberOfIterations; result.Evidence = Engine.Infer <Bernoulli>(Evidence); result.BackgroundLabelProb = Engine.Infer <Dirichlet>(BackgroundLabelProb); result.WorkerConfusionMatrix = Engine.Infer <Dirichlet[][]>(WorkerConfusionMatrix); result.TrueLabel = Engine.Infer <Discrete[]>(TrueLabel); result.TrueLabelConstraint = Engine.Infer <Discrete[]>(TrueLabel, QueryTypes.MarginalDividedByPrior); // Prediction mode is indicated by none of the workers having a label. // We can just look at the first one if (workerLabels[0] == null) { result.WorkerPrediction = Engine.Infer <Discrete[][]>(WorkerLabel); } return(result); }
/// <summary> /// Infers the posteriors of CBCC using the attached data. /// </summary> /// <param name="taskIndices">The matrix of the task indices (columns) of each worker (rows).</param> /// <param name="workerLabels">The matrix of the labels (columns) of each worker (rows).</param> /// <param name="priors">The priors.</param> /// <returns></returns> public override BCCPosteriors Infer(int[][] taskIndices, int[][] workerLabels, BCCPosteriors priors) { var cbccPriors = (CBCCPosteriors)priors; VectorGaussian[][] scoreConstraint = (cbccPriors == null ? null : cbccPriors.WorkerScoreMatrixConstraint); Discrete[] communityConstraint = (cbccPriors == null ? null : cbccPriors.WorkerCommunityConstraint); SetPriors(workerLabels.Length, priors); AttachData(taskIndices, workerLabels, scoreConstraint, communityConstraint); var result = new CBCCPosteriors(); Engine.NumberOfIterations = NumberOfIterations; result.Evidence = Engine.Infer <Bernoulli>(Evidence); result.BackgroundLabelProb = Engine.Infer <Dirichlet>(BackgroundLabelProb); result.WorkerConfusionMatrix = Engine.Infer <Dirichlet[][]>(WorkerConfusionMatrix); result.TrueLabel = Engine.Infer <Discrete[]>(TrueLabel); result.TrueLabelConstraint = Engine.Infer <Discrete[]>(TrueLabel, QueryTypes.MarginalDividedByPrior); result.CommunityScoreMatrix = Engine.Infer <VectorGaussian[][]>(CommunityScoreMatrix); result.CommunityConfusionMatrix = Engine.Infer <Dirichlet[][]>(CommunityConfusionMatrix); result.WorkerScoreMatrixConstraint = Engine.Infer <VectorGaussian[][]>(ScoreMatrix, QueryTypes.MarginalDividedByPrior); result.CommunityProb = Engine.Infer <Dirichlet>(CommunityProb); result.Community = Engine.Infer <Discrete[]>(Community); result.WorkerCommunityConstraint = Engine.Infer <Discrete[]>(Community, QueryTypes.MarginalDividedByPrior); return(result); }
/// <summary> /// Infers the posteriors of BCC using the attached data and priors. /// </summary> /// <param name="taskIndices">The matrix of the task indices (columns) of each worker (rows).</param> /// <param name="workerLabels">The matrix of the labels (columns) of each worker (rows).</param> /// <param name="priors">The priors of the BCC parameters.</param> /// <returns></returns> public virtual BCCPosteriors Infer(int[][] taskIndices, int[][] workerLabels, BCCPosteriors priors) { int workerCount = workerLabels.Length; SetPriors(workerCount, priors); AttachData(taskIndices, workerLabels, null); var result = new BCCPosteriors(); Engine.NumberOfIterations = NumberOfIterations; result.Evidence = Engine.Infer<Bernoulli>(Evidence); result.BackgroundLabelProb = Engine.Infer<Dirichlet>(BackgroundLabelProb); result.WorkerConfusionMatrix = Engine.Infer<Dirichlet[][]>(WorkerConfusionMatrix); result.TrueLabel = Engine.Infer<Discrete[]>(TrueLabel); result.TrueLabelConstraint = Engine.Infer<Discrete[]>(TrueLabel, QueryTypes.MarginalDividedByPrior); // Prediction mode is indicated by none of the workers having a label. // We can just look at the first one if (workerLabels[0] == null) { result.WorkerPrediction = Engine.Infer<Discrete[][]>(WorkerLabel); } return result; }
/// <summary> /// Updates the inference results /// </summary> /// <param name="posteriors">The inferred posteriors.</param> /// <param name="mode">The run mode.</param> protected virtual void UpdateResults(BCCPosteriors posteriors, RunMode mode) { if (mode == RunMode.LookAheadExperiment) { for (int t = 0; t < posteriors.TrueLabel.Length; t++) { LookAheadTrueLabel[Mapping.TaskIndexToId[t]] = posteriors.TrueLabel[t]; } for (int w = 0; w < posteriors.WorkerConfusionMatrix.Length; w++) { LookAheadWorkerConfusionMatrix[Mapping.WorkerIndexToId[w]] = posteriors.WorkerConfusionMatrix[w]; } } else if (mode == RunMode.Prediction) { for (int w = 0; w < posteriors.WorkerConfusionMatrix.Length; w++) { WorkerPrediction[Mapping.WorkerIndexToId[w]] = new Dictionary<string, Discrete>(); for (int tw = 0; tw < posteriors.WorkerPrediction[w].Length; tw++) { WorkerPrediction[Mapping.WorkerIndexToId[w]][Mapping.TaskIndexToId[tw]] = posteriors.WorkerPrediction[w][tw]; } } } else { // Update results for BCC BackgroundLabelProb = posteriors.BackgroundLabelProb; for (int w = 0; w < posteriors.WorkerConfusionMatrix.Length; w++) { WorkerConfusionMatrix[Mapping.WorkerIndexToId[w]] = posteriors.WorkerConfusionMatrix[w]; WorkerConfusionMatrixMean[Mapping.WorkerIndexToId[w]] = posteriors.WorkerConfusionMatrix[w].Select(cm => cm.GetMean()).ToArray(); } for (int t = 0; t < posteriors.TrueLabel.Length; t++) { TrueLabel[Mapping.TaskIndexToId[t]] = posteriors.TrueLabel[t]; } for (int t = 0; t < posteriors.TrueLabelConstraint.Length; t++) { TrueLabelConstraint[Mapping.TaskIndexToId[t]] = posteriors.TrueLabelConstraint[t]; } // Update results for CBCC if (IsCommunityModel) { CBCCPosteriors cbccPosteriors = posteriors as CBCCPosteriors; CommunityConfusionMatrix = cbccPosteriors.CommunityConfusionMatrix; for (int w = 0; w < cbccPosteriors.WorkerScoreMatrixConstraint.Length; w++) { WorkerScoreMatrixConstraint[Mapping.WorkerIndexToId[w]] = cbccPosteriors.WorkerScoreMatrixConstraint[w]; CommunityConstraint[Mapping.WorkerIndexToId[w]] = cbccPosteriors.WorkerCommunityConstraint[w]; WorkerCommunity[Mapping.WorkerIndexToId[w]] = cbccPosteriors.Community[w]; } CommunityProb = cbccPosteriors.CommunityProb; CommunityScoreMatrix = cbccPosteriors.CommunityScoreMatrix; } this.ModelEvidence = posteriors.Evidence; } }
BCCPosteriors ToPriors() { int numClasses = Mapping.LabelCount; int numTasks = Mapping.TaskCount; int numWorkers = Mapping.WorkerCount; CBCCPosteriors cbccPriors = new CBCCPosteriors(); BCCPosteriors priors = IsCommunityModel ? cbccPriors : new BCCPosteriors(); priors.BackgroundLabelProb = BackgroundLabelProb; priors.WorkerConfusionMatrix = Util.ArrayInit(numWorkers, w => { string wid = Mapping.WorkerIndexToId[w]; if (WorkerConfusionMatrix.ContainsKey(wid)) { return(Util.ArrayInit(numClasses, c => WorkerConfusionMatrix[wid][c])); } else { return(Util.ArrayInit(numClasses, c => Dirichlet.Uniform(numClasses))); } }); priors.TrueLabelConstraint = Util.ArrayInit(numTasks, t => { string tid = Mapping.TaskIndexToId[t]; if (TrueLabelConstraint.ContainsKey(tid)) { return(TrueLabelConstraint[Mapping.TaskIndexToId[t]]); } else { return(Discrete.Uniform(numClasses)); } }); if (IsCommunityModel) { cbccPriors.CommunityConfusionMatrix = CommunityConfusionMatrix; cbccPriors.WorkerScoreMatrixConstraint = Util.ArrayInit(numWorkers, w => { string wid = Mapping.WorkerIndexToId[w]; if (WorkerScoreMatrixConstraint.ContainsKey(wid)) { return(Util.ArrayInit(numClasses, c => WorkerScoreMatrixConstraint[wid][c])); } else { return(Util.ArrayInit(numClasses, c => VectorGaussian.Uniform(numClasses))); } }); cbccPriors.CommunityProb = CommunityProb; cbccPriors.CommunityScoreMatrix = CommunityScoreMatrix; cbccPriors.WorkerCommunityConstraint = Util.ArrayInit(numWorkers, w => { string wid = Mapping.WorkerIndexToId[w]; if (CommunityConstraint.ContainsKey(wid)) { return(CommunityConstraint[wid]); } else { return(Discrete.Uniform(CommunityCount)); } }); } priors.Evidence = ModelEvidence; return(priors); }
/// <summary> /// Updates the inference results /// </summary> /// <param name="posteriors">The inferred posteriors.</param> /// <param name="mode">The run mode.</param> protected virtual void UpdateResults(BCCPosteriors posteriors, RunMode mode) { if (mode == RunMode.LookAheadExperiment) { for (int t = 0; t < posteriors.TrueLabel.Length; t++) { LookAheadTrueLabel[Mapping.TaskIndexToId[t]] = posteriors.TrueLabel[t]; } for (int w = 0; w < posteriors.WorkerConfusionMatrix.Length; w++) { LookAheadWorkerConfusionMatrix[Mapping.WorkerIndexToId[w]] = posteriors.WorkerConfusionMatrix[w]; } } else if (mode == RunMode.Prediction) { for (int w = 0; w < posteriors.WorkerConfusionMatrix.Length; w++) { WorkerPrediction[Mapping.WorkerIndexToId[w]] = new Dictionary <string, Discrete>(); for (int tw = 0; tw < posteriors.WorkerPrediction[w].Length; tw++) { WorkerPrediction[Mapping.WorkerIndexToId[w]][Mapping.TaskIndexToId[tw]] = posteriors.WorkerPrediction[w][tw]; } } } else { // Update results for BCC BackgroundLabelProb = posteriors.BackgroundLabelProb; for (int w = 0; w < posteriors.WorkerConfusionMatrix.Length; w++) { WorkerConfusionMatrix[Mapping.WorkerIndexToId[w]] = posteriors.WorkerConfusionMatrix[w]; WorkerConfusionMatrixMean[Mapping.WorkerIndexToId[w]] = posteriors.WorkerConfusionMatrix[w].Select(cm => cm.GetMean()).ToArray(); } for (int t = 0; t < posteriors.TrueLabel.Length; t++) { TrueLabel[Mapping.TaskIndexToId[t]] = posteriors.TrueLabel[t]; } for (int t = 0; t < posteriors.TrueLabelConstraint.Length; t++) { TrueLabelConstraint[Mapping.TaskIndexToId[t]] = posteriors.TrueLabelConstraint[t]; } // Update results for CBCC if (IsCommunityModel) { CBCCPosteriors cbccPosteriors = posteriors as CBCCPosteriors; CommunityConfusionMatrix = cbccPosteriors.CommunityConfusionMatrix; for (int w = 0; w < cbccPosteriors.WorkerScoreMatrixConstraint.Length; w++) { WorkerScoreMatrixConstraint[Mapping.WorkerIndexToId[w]] = cbccPosteriors.WorkerScoreMatrixConstraint[w]; CommunityConstraint[Mapping.WorkerIndexToId[w]] = cbccPosteriors.WorkerCommunityConstraint[w]; WorkerCommunity[Mapping.WorkerIndexToId[w]] = cbccPosteriors.Community[w]; } CommunityProb = cbccPosteriors.CommunityProb; CommunityScoreMatrix = cbccPosteriors.CommunityScoreMatrix; } this.ModelEvidence = posteriors.Evidence; } }
/// <summary> /// Run the BCC models /// </summary> /// <param name="modelName"></param> /// <param name="data"></param> /// <param name="fullData"></param> /// <param name="model"></param> /// <param name="mode"></param> /// <param name="calculateAccuracy"></param> /// <param name="numCommunities"></param> /// <param name="serialize"></param> /// <param name="serializeCommunityPosteriors"></param> public void RunBCC(string modelName, IList <Datum> data, IList <Datum> fullData, BCC model, RunMode mode, bool calculateAccuracy, int numCommunities = -1, bool serialize = false, bool serializeCommunityPosteriors = false) { CBCC communityModel = model as CBCC; IsCommunityModel = communityModel != null; bool IsBCC = !(IsCommunityModel); if (this.Mapping == null) { this.Mapping = new DataMapping(fullData, numCommunities); this.FullMapping = Mapping; this.GoldLabels = this.Mapping.GetGoldLabelsPerTaskId(); } bool createModel = (Mapping.LabelCount != model.LabelCount) || (Mapping.TaskCount != model.TaskCount); if (IsCommunityModel) { //Console.WriteLine("--- CBCC ---"); CommunityCount = numCommunities; createModel = createModel || (numCommunities != communityModel.CommunityCount); if (createModel) { communityModel.CreateModel(Mapping.TaskCount, Mapping.LabelCount, numCommunities); } } else if (createModel) { model.CreateModel(Mapping.TaskCount, Mapping.LabelCount); } BCCPosteriors priors = null; switch (mode) { case RunMode.Prediction: priors = ToPriors(); break; default: ClearResults(); if (mode == RunMode.LoadAndUseCommunityPriors && IsCommunityModel) { priors = DeserializeCommunityPosteriors(modelName, numCommunities); } break; } // Get data structures int[][] taskIndices = Mapping.GetTaskIndicesPerWorkerIndex(data); int[][] workerLabels = Mapping.GetLabelsPerWorkerIndex(data); if (mode == RunMode.Prediction) { // Signal prediction mode by setting all labels to null workerLabels = workerLabels.Select(arr => (int[])null).ToArray(); } // Call inference BCCPosteriors posteriors = model.Infer( taskIndices, workerLabels, priors); UpdateResults(posteriors, mode); if (calculateAccuracy) { UpdateAccuracy(); } if (serialize) { using (FileStream stream = new FileStream(modelName + ".xml", FileMode.Create)) { var serializer = new System.Xml.Serialization.XmlSerializer(IsCommunityModel ? typeof(CBCCPosteriors) : typeof(BCCPosteriors)); serializer.Serialize(stream, posteriors); } } if (serializeCommunityPosteriors && IsCommunityModel) { SerializeCommunityPosteriors(modelName); } }
/// <summary> /// Infers the posteriors of CBCC using the attached data. /// </summary> /// <param name="taskIndices">The matrix of the task indices (columns) of each worker (rows).</param> /// <param name="workerLabels">The matrix of the labels (columns) of each worker (rows).</param> /// <param name="priors">The priors.</param> /// <returns></returns> public override BCCPosteriors Infer(int[][] taskIndices, int[][] workerLabels, BCCPosteriors priors) { var cbccPriors = (CBCCPosteriors)priors; VectorGaussian[][] scoreConstraint = (cbccPriors == null ? null : cbccPriors.WorkerScoreMatrixConstraint); Discrete[] communityConstraint = (cbccPriors == null ? null : cbccPriors.WorkerCommunityConstraint); SetPriors(workerLabels.Length, priors); AttachData(taskIndices, workerLabels, scoreConstraint, communityConstraint); var result = new CBCCPosteriors(); Engine.NumberOfIterations = NumberOfIterations; result.Evidence = Engine.Infer<Bernoulli>(Evidence); result.BackgroundLabelProb = Engine.Infer<Dirichlet>(BackgroundLabelProb); result.WorkerConfusionMatrix = Engine.Infer<Dirichlet[][]>(WorkerConfusionMatrix); result.TrueLabel = Engine.Infer<Discrete[]>(TrueLabel); result.TrueLabelConstraint = Engine.Infer<Discrete[]>(TrueLabel, QueryTypes.MarginalDividedByPrior); result.CommunityScoreMatrix = Engine.Infer<VectorGaussian[][]>(CommunityScoreMatrix); result.CommunityConfusionMatrix = Engine.Infer<Dirichlet[][]>(CommunityConfusionMatrix); result.WorkerScoreMatrixConstraint = Engine.Infer<VectorGaussian[][]>(ScoreMatrix, QueryTypes.MarginalDividedByPrior); result.CommunityProb = Engine.Infer<Dirichlet>(CommunityProb); result.Community = Engine.Infer<Discrete[]>(Community); result.WorkerCommunityConstraint = Engine.Infer<Discrete[]>(Community, QueryTypes.MarginalDividedByPrior); return result; }
/// <summary> /// Sets the priors of CBCC. /// </summary> /// <param name="workerCount">The number of workers.</param> /// <param name="priors">The priors.</param> protected override void SetPriors(int workerCount, BCCPosteriors priors) { int communityCount = m.SizeAsInt; int labelCount = c.SizeAsInt; WorkerCount.ObservedValue = workerCount; NoiseMatrix.ObservedValue = PositiveDefiniteMatrix.IdentityScaledBy(labelCount, NoisePrecision); CBCCPosteriors cbccPriors = (CBCCPosteriors)priors; if (cbccPriors == null || cbccPriors.BackgroundLabelProb == null) BackgroundLabelProbPrior.ObservedValue = Dirichlet.Uniform(labelCount); else BackgroundLabelProbPrior.ObservedValue = cbccPriors.BackgroundLabelProb; if (cbccPriors == null || cbccPriors.CommunityProb == null) CommunityProbPrior.ObservedValue = CommunityProbPriorObserved; else CommunityProbPrior.ObservedValue = cbccPriors.CommunityProb; if (cbccPriors == null || cbccPriors.CommunityScoreMatrix == null) CommunityScoreMatrixPrior.ObservedValue = CommunityScoreMatrixPriorObserved; else CommunityScoreMatrixPrior.ObservedValue = cbccPriors.CommunityScoreMatrix; if (cbccPriors == null || cbccPriors.TrueLabelConstraint == null) TrueLabelConstraint.ObservedValue = Util.ArrayInit(TaskCount, t => Discrete.Uniform(labelCount)); else TrueLabelConstraint.ObservedValue = cbccPriors.TrueLabelConstraint; }