//getMaxWeightedGroupByWorkerStatistics public static ObjectCountingAggregatedResult getAggregatedResultUsingWorkerStatistics(List <SatyamResult> SatyamResults, int MinResults = TaskConstants.OBJECT_COUNTING_MTURK_MIN_RESULTS_TO_AGGREGATE, int MaxResults = TaskConstants.OBJECT_COUNTING_MTURK_MAX_RESULTS_TO_AGGREGATE, double MAX_ABSOLUTE_COUNT_DEVIATION_LOWERBOUND = TaskConstants.OBJECT_COUNTING_MTURK_MAX_ABSOLUTE_COUNT_DEVIATION_LOWERBOUND, double MAX_DEVIATION_FRACTION = TaskConstants.OBJECT_COUNTING_MTURK_MAX_DEVIATION_FRACTION, double SUPER_MAJORITY_VALUE = TaskConstants.OBJECT_COUNTING_MTURK_SUPER_MAJORITY_VALUE) { if (SatyamResults.Count < MinResults) { return(null); } Dictionary <int, List <SatyamResult> > resultsCount = new Dictionary <int, List <SatyamResult> >(); Dictionary <double, List <SatyamResult> > clusteredCounts = ClusterCountsSatyamResultByMaxDeviation(SatyamResults, MAX_DEVIATION_FRACTION, MAX_ABSOLUTE_COUNT_DEVIATION_LOWERBOUND, out resultsCount); List <double> mergedCounts = clusteredCounts.Keys.ToList(); double finalCount = -1; int maxCount = 0; //now check if there is consensus by super majority in the merged values if (clusteredCounts.Count == 1) //if there is only one cluster we are done!! { finalCount = mergedCounts[0]; maxCount = clusteredCounts[finalCount].Count; } else { ////find the one with the largest worker success rate, if one of the group has no statistics, just fall back to use maxCounts maxCount = -1; int index = 0; double maxSuccessRate = -1; int maxSuccRateIndex = 0; WorkerStatisticsAccess wsa = new WorkerStatisticsAccess(); bool useWorkerStatistics = true; for (int i = 0; i < clusteredCounts.Count; i++) { // using count if (maxCount < clusteredCounts[mergedCounts[i]].Count) { maxCount = clusteredCounts[mergedCounts[i]].Count; index = i; } // using succ rate //if (!useWorkerStatistics) continue; int ResultHasWorkerStatistics = 0; double successRate = 1; for (int j = 0; j < clusteredCounts[mergedCounts[i]].Count; j++) { WorkerStatisticsTableEntry stats = wsa.getWorkerStatistics(clusteredCounts[mergedCounts[i]][j].amazonInfo.WorkerID, TaskConstants.Counting_Image_MTurk); if (stats != null) { successRate *= (1 - stats.SuccessFraction); } else { successRate *= 0.93; } ResultHasWorkerStatistics++; } //if (ResultHasWorkerStatistics == 0) //{ // useWorkerStatistics = false; // continue; //} //successRate = Math.Pow(successRate, 1/(double)ResultHasWorkerStatistics); successRate = 1 - successRate; if (maxSuccessRate < successRate) { maxSuccessRate = successRate; maxSuccRateIndex = i; } } wsa.close(); if (useWorkerStatistics) { finalCount = mergedCounts[maxSuccRateIndex]; } else { finalCount = mergedCounts[index]; Console.WriteLine("Not using statistics"); } } if (maxCount < SatyamResults.Count * SUPER_MAJORITY_VALUE && SatyamResults.Count < MaxResults) //there was no consensus { return(null); } ObjectCountingAggregatedResult aggresult = new ObjectCountingAggregatedResult(); ObjectCountingAggregatedResultMetaData meta = new ObjectCountingAggregatedResultMetaData(); meta.TotalCount = SatyamResults.Count; meta.CountsHistogram = new Dictionary <string, int>(); foreach (KeyValuePair <int, List <SatyamResult> > entry in resultsCount) { meta.CountsHistogram.Add(entry.Key.ToString(), entry.Value.Count); } aggresult.Count = finalCount; aggresult.metaData = meta; return(aggresult); }
public static ObjectCountingAggregatedResult getAggregatedResult(List <ObjectCountingResult> results, int MinResults = TaskConstants.OBJECT_COUNTING_MTURK_MIN_RESULTS_TO_AGGREGATE, int MaxResults = TaskConstants.OBJECT_COUNTING_MTURK_MAX_RESULTS_TO_AGGREGATE, double MAX_ABSOLUTE_COUNT_DEVIATION_LOWERBOUND = TaskConstants.OBJECT_COUNTING_MTURK_MAX_ABSOLUTE_COUNT_DEVIATION_LOWERBOUND, double MAX_DEVIATION_FRACTION = TaskConstants.OBJECT_COUNTING_MTURK_MAX_DEVIATION_FRACTION, double SUPER_MAJORITY_VALUE = TaskConstants.OBJECT_COUNTING_MTURK_SUPER_MAJORITY_VALUE) { if (results.Count < MinResults) { return(null); } Dictionary <int, int> resultCounts = new Dictionary <int, int>(); foreach (ObjectCountingResult result in results) { if (!resultCounts.ContainsKey(result.Count)) { resultCounts.Add(result.Count, 0); } resultCounts[result.Count]++; } //First hierachically cluster the counts Dictionary <double, int> clusteredCounts = ClusterCountsByMaxDeviation(resultCounts, MAX_DEVIATION_FRACTION, MAX_ABSOLUTE_COUNT_DEVIATION_LOWERBOUND); List <double> mergedCounts = clusteredCounts.Keys.ToList(); double finalCount = -1; int maxCount = 0; //now check if there is consensus by super majority in the merged values if (clusteredCounts.Count == 1) //if there is only one cluster we are done!! { finalCount = mergedCounts[0]; maxCount = clusteredCounts[finalCount]; } else { ////find the one with the largest value //maxCount = clusteredCounts[mergedCounts[0]]; int index = 0; //for(int i=1;i<clusteredCounts.Count;i++) //{ // if(maxCount < clusteredCounts[mergedCounts[i]]) // { // maxCount = clusteredCounts[mergedCounts[i]]; // index = i; // } //} getMaxGroup(clusteredCounts, out maxCount, out index); finalCount = mergedCounts[index]; } if (maxCount < results.Count * SUPER_MAJORITY_VALUE && results.Count < MaxResults) //there was no consensus { return(null); } ObjectCountingAggregatedResult aggresult = new ObjectCountingAggregatedResult(); ObjectCountingAggregatedResultMetaData meta = new ObjectCountingAggregatedResultMetaData(); meta.TotalCount = results.Count; meta.CountsHistogram = new Dictionary <string, int>(); foreach (KeyValuePair <int, int> entry in resultCounts) { meta.CountsHistogram.Add(entry.Key.ToString(), entry.Value); } aggresult.Count = finalCount; aggresult.metaData = meta; return(aggresult); }