예제 #1
0
        /// <summary>
        /// Gets conditional probability table plot data.
        /// </summary>
        /// <param name="array">
        /// The array representing the conditional probability table.
        /// </param>
        /// <param name="mapping">
        /// The data mapping.
        /// </param>
        /// <param name="rowLabel">
        /// The row label.
        /// </param>
        /// <param name="columnLabel">
        /// The column label.
        /// </param>
        /// <returns>
        /// The conditional probability table plot data.
        /// </returns>
        public static Dictionary <string, Dictionary <string, List <Tweet> > > GetTweetMatrix(
            List <Tweet>[,] array,
            CrowdDataMapping mapping,
            string rowLabel,
            string columnLabel)
        {
            Debug.Assert(
                array.GetLength(0) == mapping.LabelCount && array.GetLength(1) == mapping.LabelCount,
                "Inconsistent arguments");

            var result = new Dictionary <string, Dictionary <string, List <Tweet> > >();

            for (var i = 0; i < mapping.LabelCount; i++)
            {
                var rowName = $"{mapping.LabelValueToString[mapping.LabelIndexToValue[i]]} ({rowLabel})";
                var row     = new Dictionary <string, List <Tweet> >();

                for (var j = 0; j < mapping.LabelCount; j++)
                {
                    var colName = $"{mapping.LabelValueToString[mapping.LabelIndexToValue[j]]} ({columnLabel})";
                    row[colName] = array[i, j];
                }

                result[rowName] = row;
            }

            return(result);
        }
예제 #2
0
        /// <summary>
        /// Gets confusion matrix or conditional probability table plot data.
        /// </summary>
        /// <param name="array">
        /// The array representing the conditional probability table.
        /// </param>
        /// <param name="mapping">
        /// The data mapping.
        /// </param>
        /// <param name="rowLabel">
        /// The row label.
        /// </param>
        /// <param name="columnLabel">
        /// The column label.
        /// </param>
        /// <param name="asPercentages">Entries as percentages.</param>
        /// <returns>
        /// The conditional probability table plot data.
        /// </returns>
        public static Dictionary <string, List <object> > GetConfusionMatrix(
            double[,] array,
            CrowdDataMapping mapping,
            string rowLabel,
            string columnLabel,
            bool asPercentages = false)
        {
            Debug.Assert(
                array.GetLength(0) == mapping.LabelCount && array.GetLength(1) == mapping.LabelCount,
                "Inconsistent arguments");

            var result             = new Dictionary <string, List <object> >();
            var labelIndexToString = mapping.LabelIndexToString;
            var rowSums            = new double[mapping.LabelCount];
            var namesColumn        = new List <object>();

            for (var row = 0; row < mapping.LabelCount; row++)
            {
                var rowName = $"{labelIndexToString[row]} ({rowLabel})";
                namesColumn.Add(rowName);
                for (var col = 0; col < mapping.LabelCount; col++)
                {
                    rowSums[row] += array[row, col];
                }
            }

            result[string.Empty] = namesColumn;
            for (var col = 0; col < mapping.LabelCount; col++)
            {
                var columnName = $"{labelIndexToString[col]} ({columnLabel})";
                var column     = new List <object>();
                for (var row = 0; row < mapping.LabelCount; row++)
                {
                    var value = array[row, col];
                    if (asPercentages)
                    {
                        column.Add(new Percentage(value / rowSums[row]));
                    }
                    else
                    {
                        column.Add(value);
                    }
                }

                result[columnName] = column;
            }

            return(result);
        }
예제 #3
0
 /// <summary>
 /// Gets conditional probability table plot data.
 /// </summary>
 /// <param name="workerCpts">
 /// The worker conditional probability tables.
 /// </param>
 /// <param name="mapping">
 /// The data mapping.
 /// </param>
 /// <param name="asPercentages">Entries as percentages</param>
 /// <returns>
 /// The conditional probability table plot data.
 /// </returns>
 public static Dictionary <string, Dictionary <string, List <object> > > GetWorkerCpts(
     Dictionary <string, Dirichlet[]> workerCpts,
     CrowdDataMapping mapping,
     bool asPercentages = false)
 {
     return(workerCpts.ToDictionary(
                kvp => kvp.Key,
                kvp =>
     {
         var labelCount = kvp.Value.Length;
         var meanConfusionMatrix = kvp.Value.Select(cm => cm.GetMean()).ToArray();
         return GetConfusionMatrix(
             Util.ArrayInit(labelCount, labelCount, (i, j) => meanConfusionMatrix[i][j]),
             mapping,
             ConfusionMatrixRowLabel,
             ConfusionMatrixColLabel,
             asPercentages);
     }));
 }
예제 #4
0
        /// <summary>
        /// Gets conditional probability table plot data.
        /// </summary>
        /// <param name="cpt">
        /// The array representing the conditional probability table.
        /// </param>
        /// <param name="mapping">
        /// The data mapping.
        /// </param>
        /// <param name="rowLabel">
        /// The row label.
        /// </param>
        /// <param name="columnLabel">
        /// The column label.
        /// </param>
        /// <returns>
        /// The conditional probability table plot data.
        /// </returns>
        public static Dictionary <string, PointWithBounds <string>[]> GetCptWithBounds(
            Dirichlet[] cpt,
            CrowdDataMapping mapping,
            string rowLabel,
            string columnLabel)
        {
            Debug.Assert(
                cpt.Length == mapping.LabelCount && cpt.All(mat => mat.Dimension == mapping.LabelCount),
                "Inconsistent arguments");

            var result             = new Dictionary <string, PointWithBounds <string>[]>();
            var labelIndexToString = mapping.LabelIndexToString;

            for (var i = 0; i < mapping.LabelCount; i++)
            {
                var rowName = $"{labelIndexToString[i]} ({rowLabel})";
                result[rowName] = DirichletWithErrorBars(cpt[i], labelIndexToString);
            }

            return(result);
        }
예제 #5
0
        /// <summary>
        ///     Gets the prominent workers for a data set - i.e. those who have given many labels.
        /// </summary>
        /// <param name="data">
        ///     The data.
        /// </param>
        /// <param name="maxNumberWorkers">
        ///     The maximum number of workers for which to get metrics.
        /// </param>
        /// <returns>
        ///     The metrics.
        /// </returns>
        private static HashSet <string> GetProminentWorkers(CrowdData data, int maxNumberWorkers = 20)
        {
            var mapping = new CrowdDataMapping(
                data,
                LabelValuesToString);

            var labelCounts = new Dictionary <string, object>();

            var labelsGroupedByWorker = data.CrowdLabels.GroupBy(cd => cd.WorkerId);

            foreach (var worker in labelsGroupedByWorker)
            {
                var workerId     = worker.Key;
                var workerLabels = worker.Distinct(CrowdData.WorkerTweetEqualityComparer.Instance)
                                   .ToDictionary(lab => lab.TweetId, lab => lab.WorkerLabel);
                var workerMetrics = GetMetrics(mapping, workerLabels);
                labelCounts[workerId] = workerMetrics[Metric.Count];
            }

            return(new HashSet <string>(
                       labelCounts.OrderByDescending(kvp => (int)kvp.Value).Take(maxNumberWorkers).Select(kvp => kvp.Key)));
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="BiasedWorkerModelRunner"/> class.
 /// </summary>
 /// <param name="dataMapping">
 /// The data mapping.
 /// </param>
 /// <param name="model">
 /// The model.
 /// </param>
 /// <param name="trainingRunner">
 /// The training runner.
 /// </param>
 public BiasedWorkerModelRunner(CrowdDataMapping dataMapping, BiasedWorkerModel model, ModelRunnerBase trainingRunner = null)
     : base(dataMapping, model, trainingRunner)
 {
 }
예제 #7
0
 /// <inheritdoc />
 /// <param name="dataMapping">
 /// The data mapping
 /// </param>
 /// <param name="model">
 /// The model.
 /// </param>
 /// <param name="trainingRunner">
 /// The training Runner. This should be null if
 /// (a) we are running training, or
 /// (b) we are running validation and the model has no training
 /// </param>
 protected ModelRunnerBase(CrowdDataMapping dataMapping, ModelBase model, ModelRunnerBase trainingRunner = null)
     : base(dataMapping)
 {
     this.Model          = model;
     this.TrainingRunner = trainingRunner;
 }
예제 #8
0
 /// <summary>
 /// Initializes a new instance of the <see cref="HonestWorkerRunner"/> class.
 /// </summary>
 /// <param name="dataMapping">
 /// The data mapping.
 /// </param>
 /// <param name="model">
 /// The model.
 /// </param>
 /// <param name="trainingRunner">
 /// The training runner.
 /// </param>
 public HonestWorkerRunner(CrowdDataMapping dataMapping, HonestWorkerModel model, ModelRunnerBase trainingRunner = null)
     : base(dataMapping, model, trainingRunner)
 {
 }
예제 #9
0
 /// <summary>
 /// Initializes a new instance of the <see cref="RunnerBase"/> class.
 /// </summary>
 /// <param name="dataMapping">
 ///     The mapping between data and indices.
 /// </param>
 protected RunnerBase(CrowdDataMapping dataMapping)
 {
     this.DataMapping = dataMapping;
 }
예제 #10
0
        /// <summary>
        /// Gets the metrics.
        /// </summary>
        /// <param name="dataMapping">
        ///     The data mapping.
        /// </param>
        /// <param name="predictions">
        ///     The predictions.
        /// </param>
        /// <param name="trueLabels">
        ///     The true labels. If null, then the gold labels are used.
        /// </param>
        /// <returns>
        /// The dictionary of metric values.
        /// </returns>
        public static Dictionary <Metric, object> GetMetrics(CrowdDataMapping dataMapping, Dictionary <string, int> predictions, Dictionary <string, int> trueLabels = null)
        {
            var result          = new Dictionary <Metric, object>();
            var labelCount      = dataMapping.LabelCount;
            var data            = dataMapping.Data as CrowdDataWithText;
            var confusionMatrix = Util.ArrayInit(labelCount, labelCount, (i, j) => 0.0);
            var tweetMatrix     = Util.ArrayInit(labelCount, labelCount, (i, j) => new List <Tweet>());
            var correct         = 0.0;

            var trueLabelCount = 0;

            if (trueLabels == null)
            {
                trueLabels = dataMapping.Data.GoldLabels;
            }

            foreach (var kvp in trueLabels)
            {
                var trueLabel = dataMapping.LabelValueToIndex[kvp.Value];
                if (predictions.ContainsKey(kvp.Key))
                {
                    trueLabelCount++;
                    var predictedLabel = dataMapping.LabelValueToIndex[predictions[kvp.Key]];

                    confusionMatrix[trueLabel, predictedLabel] = confusionMatrix[trueLabel, predictedLabel] + 1.0;
                    if (data != null)
                    {
                        if (data.Tweets.ContainsKey(kvp.Key))
                        {
                            tweetMatrix[trueLabel, predictedLabel].Add(data.Tweets[kvp.Key]);
                        }
                    }

                    if (trueLabel == predictedLabel)
                    {
                        correct++;
                    }
                }
            }

            result[Metric.Count]           = trueLabelCount;
            result[Metric.Accuracy]        = correct / trueLabelCount;
            result[Metric.ConfusionMatrix] = confusionMatrix;
            result[Metric.TweetMatrix]     = tweetMatrix;

            // Average recall
            double sumRec = 0;

            for (var i = 0; i < labelCount; i++)
            {
                double classSum = 0;
                for (var j = 0; j < labelCount; j++)
                {
                    classSum += confusionMatrix[i, j];
                }

                sumRec += confusionMatrix[i, i] / classSum;
            }

            result[Metric.AverageRecall] = sumRec / labelCount;

            return(result);
        }