/// <summary> /// Run Dawid-Skene on the data. /// </summary> /// <param name="data">The data.</param> /// <param name="fullData">The full data.</param> /// <param name="calculateAccuracy">Whether to calculate accuracy</param> /// <returns>A results instance</returns> public Results RunDawidSkene(IList <Datum> data, IList <Datum> fullData, bool calculateAccuracy) { // If you want to run Dawid-Skene code, download his code, integrate it into // the project, and change false to true below. Console.WriteLine("--- Dawid Skene ---"); PredictedLabel = new Dictionary <string, int?>(); Mapping = new DataMapping(data); var fullDataMapping = new DataMapping(fullData); var labelings = data.Select(d => new Labeling(d.WorkerId, d.TaskId, d.WorkerLabel.ToString(), d.GoldLabel.ToString())).ToList(); DawidSkene ds = new DawidSkene(labelings, null, null); // The labels may be in a different order from our data labeling - we need to create a map. int[] labelIndexMap = new int[Mapping.LabelCount]; var dwLabels = ds.classes.Keys.ToArray(); for (int i = 0; i < Mapping.LabelCount; i++) { labelIndexMap[i] = Array.IndexOf(dwLabels, (i + Mapping.LabelMin).ToString()); } GoldLabels = fullDataMapping.GetGoldLabelsPerTaskId(). ToDictionary(kvp => kvp.Key, kvp => kvp.Value == null ? (int?)null : (int?)labelIndexMap[kvp.Value.Value]); ds.Estimate(10); var inferredLabels = ds.GetObjectClassProbabilities().Select(r => new Discrete(r)).ToArray(); TrueLabel = inferredLabels.Select((lab, i) => new { key = Mapping.TaskIndexToId[i], val = lab }).ToDictionary(a => a.key, a => a.val); if (calculateAccuracy) { UpdateAccuracy(); } return(this); }
/// <summary> /// Runs the majority vote method on the data. /// </summary> /// <param name="data">The data</param> /// <param name="fullData">The full data</param> /// <param name="calculateAccuracy">Compute the accuracy (true).</param> /// <param name="useVoteDistribution">The true label is sampled from the vote distribution (true) or it is /// taken as the mode of the vote counts (false). /// In the latter case, ties are broken by sampling from the most voted classes.</param> /// <returns>The updated results</returns> public Results RunMajorityVote(IList <Datum> data, IList <Datum> fullData, bool calculateAccuracy, bool useVoteDistribution) { PredictedLabel = new Dictionary <string, int?>(); Mapping = new DataMapping(data); FullMapping = new DataMapping(fullData); GoldLabels = FullMapping.GetGoldLabelsPerTaskId(); var inferredLabels = useVoteDistribution ? Mapping.GetVoteDistribPerTaskIndex() : Mapping.GetMajorityVotesPerTaskIndex().Select(mv => mv == null ? (Discrete)null : Discrete.PointMass(mv.Value, Mapping.LabelCount)).ToArray(); TrueLabel = inferredLabels.Select((lab, i) => new { key = Mapping.TaskIndexToId[i], val = lab }).ToDictionary(a => a.key, a => a.val); if (calculateAccuracy) { UpdateAccuracy(); } return(this); }
/// <summary> /// Run Dawid-Skene on the data. /// </summary> /// <param name="data">The data.</param> /// <param name="fullData">The full data.</param> /// <param name="calculateAccuracy">Whether to calculate accuracy</param> /// <returns>A results instance</returns> public Results RunDawidSkene(IList<Datum> data, IList<Datum> fullData, bool calculateAccuracy) { // If you want to run Dawid-Skene code, download his code, integrate it into // the project, and change false to true below. Console.WriteLine("--- Dawid Skene ---"); PredictedLabel = new Dictionary<string, int?>(); Mapping = new DataMapping(data); var fullDataMapping = new DataMapping(fullData); var labelings = data.Select(d => new Labeling(d.WorkerId, d.TaskId, d.WorkerLabel.ToString(), d.GoldLabel.ToString())).ToList(); DawidSkene ds = new DawidSkene(labelings, null, null); // The labels may be in a different order from our data labeling - we need to create a map. int[] labelIndexMap = new int[Mapping.LabelCount]; var dwLabels = ds.classes.Keys.ToArray(); for (int i = 0; i < Mapping.LabelCount; i++) { labelIndexMap[i] = Array.IndexOf(dwLabels, (i + Mapping.LabelMin).ToString()); } GoldLabels = fullDataMapping.GetGoldLabelsPerTaskId(). ToDictionary(kvp => kvp.Key, kvp => kvp.Value == null ? (int?)null : (int?)labelIndexMap[kvp.Value.Value]); ds.Estimate(10); var inferredLabels = ds.GetObjectClassProbabilities().Select(r => new Discrete(r)).ToArray(); TrueLabel = inferredLabels.Select((lab, i) => new { key = Mapping.TaskIndexToId[i], val = lab }).ToDictionary(a => a.key, a => a.val); if (calculateAccuracy) { UpdateAccuracy(); } return this; }
/// <summary> /// Runs the majority vote method on the data. /// </summary> /// <param name="data">The data</param> /// <param name="fullData">The full data</param> /// <param name="calculateAccuracy">Compute the accuracy (true).</param> /// <param name="useVoteDistribution">The true label is sampled from the vote distribution (true) or it is /// taken as the mode of the vote counts (false). /// In the latter case, ties are broken by sampling from the most voted classes.</param> /// <returns>The updated results</returns> public Results RunMajorityVote(IList<Datum> data, IList<Datum> fullData, bool calculateAccuracy, bool useVoteDistribution) { PredictedLabel = new Dictionary<string, int?>(); Mapping = new DataMapping(data); FullMapping = new DataMapping(fullData); GoldLabels = FullMapping.GetGoldLabelsPerTaskId(); var inferredLabels = useVoteDistribution ? Mapping.GetVoteDistribPerTaskIndex() : Mapping.GetMajorityVotesPerTaskIndex().Select(mv => mv == null ? (Discrete)null : Discrete.PointMass(mv.Value, Mapping.LabelCount)).ToArray(); TrueLabel = inferredLabels.Select((lab, i) => new { key = Mapping.TaskIndexToId[i], val = lab }).ToDictionary(a => a.key, a => a.val); if (calculateAccuracy) { UpdateAccuracy(); } return this; }