/// <summary> /// Gets conditional probability table plot data. /// </summary> /// <param name="array"> /// The array representing the conditional probability table. /// </param> /// <param name="mapping"> /// The data mapping. /// </param> /// <param name="rowLabel"> /// The row label. /// </param> /// <param name="columnLabel"> /// The column label. /// </param> /// <returns> /// The conditional probability table plot data. /// </returns> public static Dictionary <string, Dictionary <string, List <Tweet> > > GetTweetMatrix( List <Tweet>[,] array, CrowdDataMapping mapping, string rowLabel, string columnLabel) { Debug.Assert( array.GetLength(0) == mapping.LabelCount && array.GetLength(1) == mapping.LabelCount, "Inconsistent arguments"); var result = new Dictionary <string, Dictionary <string, List <Tweet> > >(); for (var i = 0; i < mapping.LabelCount; i++) { var rowName = $"{mapping.LabelValueToString[mapping.LabelIndexToValue[i]]} ({rowLabel})"; var row = new Dictionary <string, List <Tweet> >(); for (var j = 0; j < mapping.LabelCount; j++) { var colName = $"{mapping.LabelValueToString[mapping.LabelIndexToValue[j]]} ({columnLabel})"; row[colName] = array[i, j]; } result[rowName] = row; } return(result); }
/// <summary> /// Gets confusion matrix or conditional probability table plot data. /// </summary> /// <param name="array"> /// The array representing the conditional probability table. /// </param> /// <param name="mapping"> /// The data mapping. /// </param> /// <param name="rowLabel"> /// The row label. /// </param> /// <param name="columnLabel"> /// The column label. /// </param> /// <param name="asPercentages">Entries as percentages.</param> /// <returns> /// The conditional probability table plot data. /// </returns> public static Dictionary <string, List <object> > GetConfusionMatrix( double[,] array, CrowdDataMapping mapping, string rowLabel, string columnLabel, bool asPercentages = false) { Debug.Assert( array.GetLength(0) == mapping.LabelCount && array.GetLength(1) == mapping.LabelCount, "Inconsistent arguments"); var result = new Dictionary <string, List <object> >(); var labelIndexToString = mapping.LabelIndexToString; var rowSums = new double[mapping.LabelCount]; var namesColumn = new List <object>(); for (var row = 0; row < mapping.LabelCount; row++) { var rowName = $"{labelIndexToString[row]} ({rowLabel})"; namesColumn.Add(rowName); for (var col = 0; col < mapping.LabelCount; col++) { rowSums[row] += array[row, col]; } } result[string.Empty] = namesColumn; for (var col = 0; col < mapping.LabelCount; col++) { var columnName = $"{labelIndexToString[col]} ({columnLabel})"; var column = new List <object>(); for (var row = 0; row < mapping.LabelCount; row++) { var value = array[row, col]; if (asPercentages) { column.Add(new Percentage(value / rowSums[row])); } else { column.Add(value); } } result[columnName] = column; } return(result); }
/// <summary> /// Gets conditional probability table plot data. /// </summary> /// <param name="workerCpts"> /// The worker conditional probability tables. /// </param> /// <param name="mapping"> /// The data mapping. /// </param> /// <param name="asPercentages">Entries as percentages</param> /// <returns> /// The conditional probability table plot data. /// </returns> public static Dictionary <string, Dictionary <string, List <object> > > GetWorkerCpts( Dictionary <string, Dirichlet[]> workerCpts, CrowdDataMapping mapping, bool asPercentages = false) { return(workerCpts.ToDictionary( kvp => kvp.Key, kvp => { var labelCount = kvp.Value.Length; var meanConfusionMatrix = kvp.Value.Select(cm => cm.GetMean()).ToArray(); return GetConfusionMatrix( Util.ArrayInit(labelCount, labelCount, (i, j) => meanConfusionMatrix[i][j]), mapping, ConfusionMatrixRowLabel, ConfusionMatrixColLabel, asPercentages); })); }
/// <summary> /// Gets conditional probability table plot data. /// </summary> /// <param name="cpt"> /// The array representing the conditional probability table. /// </param> /// <param name="mapping"> /// The data mapping. /// </param> /// <param name="rowLabel"> /// The row label. /// </param> /// <param name="columnLabel"> /// The column label. /// </param> /// <returns> /// The conditional probability table plot data. /// </returns> public static Dictionary <string, PointWithBounds <string>[]> GetCptWithBounds( Dirichlet[] cpt, CrowdDataMapping mapping, string rowLabel, string columnLabel) { Debug.Assert( cpt.Length == mapping.LabelCount && cpt.All(mat => mat.Dimension == mapping.LabelCount), "Inconsistent arguments"); var result = new Dictionary <string, PointWithBounds <string>[]>(); var labelIndexToString = mapping.LabelIndexToString; for (var i = 0; i < mapping.LabelCount; i++) { var rowName = $"{labelIndexToString[i]} ({rowLabel})"; result[rowName] = DirichletWithErrorBars(cpt[i], labelIndexToString); } return(result); }
/// <summary> /// Gets the prominent workers for a data set - i.e. those who have given many labels. /// </summary> /// <param name="data"> /// The data. /// </param> /// <param name="maxNumberWorkers"> /// The maximum number of workers for which to get metrics. /// </param> /// <returns> /// The metrics. /// </returns> private static HashSet <string> GetProminentWorkers(CrowdData data, int maxNumberWorkers = 20) { var mapping = new CrowdDataMapping( data, LabelValuesToString); var labelCounts = new Dictionary <string, object>(); var labelsGroupedByWorker = data.CrowdLabels.GroupBy(cd => cd.WorkerId); foreach (var worker in labelsGroupedByWorker) { var workerId = worker.Key; var workerLabels = worker.Distinct(CrowdData.WorkerTweetEqualityComparer.Instance) .ToDictionary(lab => lab.TweetId, lab => lab.WorkerLabel); var workerMetrics = GetMetrics(mapping, workerLabels); labelCounts[workerId] = workerMetrics[Metric.Count]; } return(new HashSet <string>( labelCounts.OrderByDescending(kvp => (int)kvp.Value).Take(maxNumberWorkers).Select(kvp => kvp.Key))); }
/// <summary> /// Initializes a new instance of the <see cref="BiasedWorkerModelRunner"/> class. /// </summary> /// <param name="dataMapping"> /// The data mapping. /// </param> /// <param name="model"> /// The model. /// </param> /// <param name="trainingRunner"> /// The training runner. /// </param> public BiasedWorkerModelRunner(CrowdDataMapping dataMapping, BiasedWorkerModel model, ModelRunnerBase trainingRunner = null) : base(dataMapping, model, trainingRunner) { }
/// <inheritdoc /> /// <param name="dataMapping"> /// The data mapping /// </param> /// <param name="model"> /// The model. /// </param> /// <param name="trainingRunner"> /// The training Runner. This should be null if /// (a) we are running training, or /// (b) we are running validation and the model has no training /// </param> protected ModelRunnerBase(CrowdDataMapping dataMapping, ModelBase model, ModelRunnerBase trainingRunner = null) : base(dataMapping) { this.Model = model; this.TrainingRunner = trainingRunner; }
/// <summary> /// Initializes a new instance of the <see cref="HonestWorkerRunner"/> class. /// </summary> /// <param name="dataMapping"> /// The data mapping. /// </param> /// <param name="model"> /// The model. /// </param> /// <param name="trainingRunner"> /// The training runner. /// </param> public HonestWorkerRunner(CrowdDataMapping dataMapping, HonestWorkerModel model, ModelRunnerBase trainingRunner = null) : base(dataMapping, model, trainingRunner) { }
/// <summary> /// Initializes a new instance of the <see cref="RunnerBase"/> class. /// </summary> /// <param name="dataMapping"> /// The mapping between data and indices. /// </param> protected RunnerBase(CrowdDataMapping dataMapping) { this.DataMapping = dataMapping; }
/// <summary> /// Gets the metrics. /// </summary> /// <param name="dataMapping"> /// The data mapping. /// </param> /// <param name="predictions"> /// The predictions. /// </param> /// <param name="trueLabels"> /// The true labels. If null, then the gold labels are used. /// </param> /// <returns> /// The dictionary of metric values. /// </returns> public static Dictionary <Metric, object> GetMetrics(CrowdDataMapping dataMapping, Dictionary <string, int> predictions, Dictionary <string, int> trueLabels = null) { var result = new Dictionary <Metric, object>(); var labelCount = dataMapping.LabelCount; var data = dataMapping.Data as CrowdDataWithText; var confusionMatrix = Util.ArrayInit(labelCount, labelCount, (i, j) => 0.0); var tweetMatrix = Util.ArrayInit(labelCount, labelCount, (i, j) => new List <Tweet>()); var correct = 0.0; var trueLabelCount = 0; if (trueLabels == null) { trueLabels = dataMapping.Data.GoldLabels; } foreach (var kvp in trueLabels) { var trueLabel = dataMapping.LabelValueToIndex[kvp.Value]; if (predictions.ContainsKey(kvp.Key)) { trueLabelCount++; var predictedLabel = dataMapping.LabelValueToIndex[predictions[kvp.Key]]; confusionMatrix[trueLabel, predictedLabel] = confusionMatrix[trueLabel, predictedLabel] + 1.0; if (data != null) { if (data.Tweets.ContainsKey(kvp.Key)) { tweetMatrix[trueLabel, predictedLabel].Add(data.Tweets[kvp.Key]); } } if (trueLabel == predictedLabel) { correct++; } } } result[Metric.Count] = trueLabelCount; result[Metric.Accuracy] = correct / trueLabelCount; result[Metric.ConfusionMatrix] = confusionMatrix; result[Metric.TweetMatrix] = tweetMatrix; // Average recall double sumRec = 0; for (var i = 0; i < labelCount; i++) { double classSum = 0; for (var j = 0; j < labelCount; j++) { classSum += confusionMatrix[i, j]; } sumRec += confusionMatrix[i, i] / classSum; } result[Metric.AverageRecall] = sumRec / labelCount; return(result); }