Exemple #1
0
        /// <summary>
        /// Run Dawid-Skene on the data.
        /// </summary>
        /// <param name="data">The data.</param>
        /// <param name="fullData">The full data.</param>
        /// <param name="calculateAccuracy">Whether to calculate accuracy</param>
        /// <returns>A results instance</returns>
        public Results RunDawidSkene(IList <Datum> data, IList <Datum> fullData, bool calculateAccuracy)
        {
            // If you want to run Dawid-Skene code, download his code, integrate it into
            // the project, and change false to true below.
            Console.WriteLine("--- Dawid Skene ---");
            PredictedLabel = new Dictionary <string, int?>();
            Mapping        = new DataMapping(data);
            var        fullDataMapping = new DataMapping(fullData);
            var        labelings       = data.Select(d => new Labeling(d.WorkerId, d.TaskId, d.WorkerLabel.ToString(), d.GoldLabel.ToString())).ToList();
            DawidSkene ds = new DawidSkene(labelings, null, null);

            // The labels may be in a different order from our data labeling - we need to create a map.
            int[] labelIndexMap = new int[Mapping.LabelCount];
            var   dwLabels      = ds.classes.Keys.ToArray();

            for (int i = 0; i < Mapping.LabelCount; i++)
            {
                labelIndexMap[i] = Array.IndexOf(dwLabels, (i + Mapping.LabelMin).ToString());
            }

            GoldLabels = fullDataMapping.GetGoldLabelsPerTaskId().
                         ToDictionary(kvp => kvp.Key, kvp => kvp.Value == null ? (int?)null : (int?)labelIndexMap[kvp.Value.Value]);

            ds.Estimate(10);

            var inferredLabels = ds.GetObjectClassProbabilities().Select(r => new Discrete(r)).ToArray();

            TrueLabel = inferredLabels.Select((lab, i) => new
            {
                key = Mapping.TaskIndexToId[i],
                val = lab
            }).ToDictionary(a => a.key, a => a.val);

            if (calculateAccuracy)
            {
                UpdateAccuracy();
            }

            return(this);
        }
Exemple #2
0
        /// <summary>
        /// Runs the majority vote method on the data.
        /// </summary>
        /// <param name="data">The data</param>
        /// <param name="fullData">The full data</param>
        /// <param name="calculateAccuracy">Compute the accuracy (true).</param>
        /// <param name="useVoteDistribution">The true label is sampled from the vote distribution (true) or it is
        /// taken as the mode of the vote counts (false).
        /// In the latter case, ties are broken by sampling from the most voted classes.</param>
        /// <returns>The updated results</returns>
        public Results RunMajorityVote(IList <Datum> data, IList <Datum> fullData, bool calculateAccuracy, bool useVoteDistribution)
        {
            PredictedLabel = new Dictionary <string, int?>();
            Mapping        = new DataMapping(data);

            FullMapping = new DataMapping(fullData);
            GoldLabels  = FullMapping.GetGoldLabelsPerTaskId();

            var inferredLabels = useVoteDistribution ? Mapping.GetVoteDistribPerTaskIndex() : Mapping.GetMajorityVotesPerTaskIndex().Select(mv => mv == null ? (Discrete)null : Discrete.PointMass(mv.Value, Mapping.LabelCount)).ToArray();

            TrueLabel = inferredLabels.Select((lab, i) => new
            {
                key = Mapping.TaskIndexToId[i],
                val = lab
            }).ToDictionary(a => a.key, a => a.val);

            if (calculateAccuracy)
            {
                UpdateAccuracy();
            }
            return(this);
        }
        /// <summary>
        /// Run Dawid-Skene on the data.
        /// </summary>
        /// <param name="data">The data.</param>
        /// <param name="fullData">The full data.</param>
        /// <param name="calculateAccuracy">Whether to calculate accuracy</param>
        /// <returns>A results instance</returns>
        public Results RunDawidSkene(IList<Datum> data, IList<Datum> fullData, bool calculateAccuracy)
        {
            // If you want to run Dawid-Skene code, download his code, integrate it into
            // the project, and change false to true below.
            Console.WriteLine("--- Dawid Skene ---");
            PredictedLabel = new Dictionary<string, int?>();
            Mapping = new DataMapping(data);
            var fullDataMapping = new DataMapping(fullData);
            var labelings = data.Select(d => new Labeling(d.WorkerId, d.TaskId, d.WorkerLabel.ToString(), d.GoldLabel.ToString())).ToList();
            DawidSkene ds = new DawidSkene(labelings, null, null);
            // The labels may be in a different order from our data labeling - we need to create a map.
            int[] labelIndexMap = new int[Mapping.LabelCount];
            var dwLabels = ds.classes.Keys.ToArray();
            for (int i = 0; i < Mapping.LabelCount; i++)
            {
                labelIndexMap[i] = Array.IndexOf(dwLabels, (i + Mapping.LabelMin).ToString());
            }

            GoldLabels = fullDataMapping.GetGoldLabelsPerTaskId().
                ToDictionary(kvp => kvp.Key, kvp => kvp.Value == null ? (int?)null : (int?)labelIndexMap[kvp.Value.Value]);

            ds.Estimate(10);

            var inferredLabels = ds.GetObjectClassProbabilities().Select(r => new Discrete(r)).ToArray();
            TrueLabel = inferredLabels.Select((lab, i) => new
            {
                key = Mapping.TaskIndexToId[i],
                val = lab
            }).ToDictionary(a => a.key, a => a.val);

            if (calculateAccuracy)
            {
                UpdateAccuracy();
            }

            return this;
        }
        /// <summary>
        /// Runs the majority vote method on the data.
        /// </summary>
        /// <param name="data">The data</param>
        /// <param name="fullData">The full data</param>
        /// <param name="calculateAccuracy">Compute the accuracy (true).</param>
        /// <param name="useVoteDistribution">The true label is sampled from the vote distribution (true) or it is
        /// taken as the mode of the vote counts (false).
        /// In the latter case, ties are broken by sampling from the most voted classes.</param>
        /// <returns>The updated results</returns>
        public Results RunMajorityVote(IList<Datum> data, IList<Datum> fullData, bool calculateAccuracy, bool useVoteDistribution)
        {
            PredictedLabel = new Dictionary<string, int?>();
            Mapping = new DataMapping(data);

            FullMapping = new DataMapping(fullData);
            GoldLabels = FullMapping.GetGoldLabelsPerTaskId();

            var inferredLabels = useVoteDistribution ? Mapping.GetVoteDistribPerTaskIndex() : Mapping.GetMajorityVotesPerTaskIndex().Select(mv => mv == null ? (Discrete)null : Discrete.PointMass(mv.Value, Mapping.LabelCount)).ToArray();
            TrueLabel = inferredLabels.Select((lab, i) => new
            {
                key = Mapping.TaskIndexToId[i],
                val = lab
            }).ToDictionary(a => a.key, a => a.val);

            if (calculateAccuracy)
            {
                UpdateAccuracy();
            }
            return this;
        }