//筛选句(会删除某些句,故不科学) static private void SentenceSelectionForBoku(int r, Selector selector)//Variable.Data不变,只变Variable.Sentences { if (r >= Variable.Sentences.Count) { Console.WriteLine("Wrong Sentence Number: " + r); return; } IDictionary <Sentence, double> parameter = new Dictionary <Sentence, double>(); foreach (Sentence sentence in Variable.Sentences) { parameter.Add(sentence, 0); foreach (Annotator annotator in Variable.Annotators) { foreach (Annotation annotation in Variable.Data[annotator][sentence]) { switch (selector) { case Selector.Most: case Selector.Least: parameter[sentence] += annotation.NumberOfTrueLabel; break; case Selector.Best: case Selector.Worst: parameter[sentence] += SimilarityMeasure.Compare(annotation, sentence.BinaryGold); break; } } } } List <KeyValuePair <Sentence, double> > sortedElements = new List <KeyValuePair <Sentence, double> >(parameter); switch (selector) { case Selector.Most: case Selector.Best: sortedElements.Sort(delegate(KeyValuePair <Sentence, double> s1, KeyValuePair <Sentence, double> s2) { return(s2.Value.CompareTo(s1.Value)); }); break; case Selector.Least: case Selector.Worst: sortedElements.Sort(delegate(KeyValuePair <Sentence, double> s1, KeyValuePair <Sentence, double> s2) { return(s1.Value.CompareTo(s2.Value)); }); break; } Variable.Sentences.Clear(); for (int i = 0; i < r; ++i) { sortedElements[i].Key.ID = i; Variable.Sentences.Add(sortedElements[i].Key); } }
//计算工作质量 static public void SimilarityOfAnnotator() { Variable.TotalSimilarity = 0; Variable.TotalNumberOfAnnotatedTimes = 0; foreach (Annotator annotator in Variable.Annotators) { foreach (Sentence sentence in Variable.Sentences) { foreach (Annotation annotation in Variable.Data[annotator][sentence]) { double similarity = SimilarityMeasure.Compare(sentence.PreciseResult, annotation); annotator.Similarity.TotalSimilarity += similarity; ++annotator.Similarity.NumberOfAnnotatedSentences; Variable.TotalSimilarity += similarity; ++Variable.TotalNumberOfAnnotatedTimes; } } } List <Annotator> sortedByTotal = new List <Annotator>(Variable.Annotators); sortedByTotal.Sort(delegate(Annotator s1, Annotator s2) { return(s2.Similarity.TotalSimilarity.CompareTo(s1.Similarity.TotalSimilarity)); }); List <Annotator> sortedByAverage = new List <Annotator>(Variable.Annotators); sortedByAverage.Sort(delegate(Annotator s1, Annotator s2) { return(s2.Similarity.AverageSimilarity.CompareTo(s1.Similarity.AverageSimilarity)); }); List <Annotator> sortedByNumber = new List <Annotator>(Variable.Annotators); sortedByNumber.Sort(delegate(Annotator s1, Annotator s2) { return(s2.Similarity.NumberOfAnnotatedSentences.CompareTo(s1.Similarity.NumberOfAnnotatedSentences)); }); List <Annotator> sortedByPercent = new List <Annotator>(Variable.Annotators); sortedByPercent.Sort(delegate(Annotator s1, Annotator s2) { return(s2.Similarity.PercentOfTotalSimilarity.CompareTo(s1.Similarity.PercentOfTotalSimilarity)); }); //输出 StreamWriter workLoad = new StreamWriter("Result/WorkLoad.csv"); string result = "PercentOfTotalSimilarity:" + "\n" + "name" + "," + "average" + "," + "number" + "," + "total" + "," + "percentOfTotalSimilarity" + "," + "percentOfTotalWorkload" + "," + "differenceBetweenSimilarityAndWorkload" + "," + "\n"; foreach (Annotator s in sortedByPercent) { result += s.ID + "," + s.Similarity.AverageSimilarity + "," + s.Similarity.NumberOfAnnotatedSentences + "," + s.Similarity.TotalSimilarity + "," + s.Similarity.PercentOfTotalSimilarity + "," + s.Similarity.PercentOfWorkload + "," + s.Similarity.differenceBetweenSimilarityAndWorkload; result += "\n"; } workLoad.Write(result); workLoad.Close(); }
//结果与系列的标准对比 static public void GenerateSimilarityWithGolds(string algorithm, string golds) { StreamWriter similarityFile = new StreamWriter("Result/" + algorithm + "SimilarityWith" + golds + "Gold.csv"); double similarityPerGold = 0; similarityFile.WriteLine(golds + "," + algorithm); switch (golds) { case "IndependentGold": for (int r = 0; r < Variable.NumberOfAnnotationsPerSentenceAfterGrouping + 2; ++r) { double similarityPerSen = 0; switch (algorithm) { case "Independent": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.IndependentGold[r], sentence.IndependentResult); } break; case "Precise": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.IndependentGold[r], sentence.PreciseResult); } break; case "TreeForAll": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.IndependentGold[r], sentence.TreeForAllResult); } break; case "TreeForSen": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.IndependentGold[r], sentence.TreeForSenResult); } break; } similarityPerSen /= Variable.Sentences.Count; similarityFile.WriteLine(r + "," + similarityPerSen); similarityPerGold += similarityPerSen; } break; case "DependentGold": for (int r = 0; r < Variable.NumberOfAnnotationsPerSentenceAfterGrouping + 2; ++r) { double similarityPerSen = 0; switch (algorithm) { case "Independent": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.DependentGoldStandard[r], sentence.IndependentResult); } break; case "Precise": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.DependentGoldStandard[r], sentence.PreciseResult); } break; case "TreeForAll": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.DependentGoldStandard[r], sentence.TreeForAllResult); } break; case "TreeForSen": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.DependentGoldStandard[r], sentence.TreeForSenResult); } break; } similarityPerSen /= Variable.Sentences.Count; similarityFile.WriteLine(r + "," + similarityPerSen); similarityPerGold += similarityPerSen; } break; } similarityFile.WriteLine("similarity per gold:," + similarityPerGold / Variable.NumberOfAnnotationsPerSentenceAfterGrouping + 2); similarityFile.Close(); }
static private void ObtainAccuracy(IList <Thread> threads, SimilaritySelector[] SimilaritySelectors) { foreach (Thread t in threads.ToArray()) { if (t.Name == "PeTM" || t.Name == "PeT") { Thread personalityMVThread = new Thread(new ThreadStart(delegate() {})); personalityMVThread.Name = "PeMV"; threads.Add(personalityMVThread);//只为计算结果PersonalityMV的准确率 break; } } foreach (Thread t in threads) { IDictionary <SimilaritySelector, double> AverageAccuracy = new Dictionary <SimilaritySelector, double>(); foreach (SimilaritySelector ss in SimilaritySelectors) { AverageAccuracy.Add(ss, 0); } for (int groupIndex = 0; groupIndex < GroupVariable.AnnotatorGroups.Length; ++groupIndex) { IDictionary <SimilaritySelector, double> GroupAccuracy = new Dictionary <SimilaritySelector, double>(); foreach (SimilaritySelector ss in SimilaritySelectors) { GroupAccuracy.Add(ss, 0); } foreach (Sentence sentence in Variable.Sentences) { Result result = sentence.AnnotaitonGroups[groupIndex].GetResultFromAlgorithmName(t.Name); foreach (SimilaritySelector ss in SimilaritySelectors) { switch (ss) { case SimilaritySelector.Same: GroupAccuracy[ss] += Convert.ToDouble(result.Equals(sentence.BinaryGold)); break; case SimilaritySelector.Compare: GroupAccuracy[ss] += SimilarityMeasure.Compare(result, sentence.BinaryGold); break; case SimilaritySelector.BinaryResultAndNumericGold: GroupAccuracy[ss] += SimilarityMeasure.BinaryAndNumeric(result, sentence.NumericGold); break; case SimilaritySelector.Dice: GroupAccuracy[ss] += SimilarityMeasure.DicePlusANumber(result, sentence.BinaryGold); break; case SimilaritySelector.Jaccard: GroupAccuracy[ss] += SimilarityMeasure.JaccardPlusANumber(result, sentence.BinaryGold); break; } } } foreach (SimilaritySelector ss in SimilaritySelectors) { AverageAccuracy[ss] += GroupAccuracy[ss] / Variable.Sentences.Count; } } foreach (SimilaritySelector ss in SimilaritySelectors) { AverageAccuracy[ss] /= GroupVariable.AnnotatorGroups.Length; switch (ss) { case SimilaritySelector.Same: Function.ConsoleWriteLine(t.Name + "Same: " + AverageAccuracy[ss]); break; case SimilaritySelector.Compare: Function.ConsoleWriteLine(t.Name + "Compare: " + AverageAccuracy[ss]); break; case SimilaritySelector.Jaccard: Function.ConsoleWriteLine(t.Name + "Jaccard: " + AverageAccuracy[ss]); break; case SimilaritySelector.Dice: Function.ConsoleWriteLine(t.Name + "Dice: " + AverageAccuracy[ss]); break; case SimilaritySelector.BinaryResultAndNumericGold: Function.ConsoleWriteLine(t.Name + "Binary&Numeric: " + AverageAccuracy[ss]); break; } } } }
static private void AnnotationSelectionForMasa(int r, Selector selector) { Variable.NumberOfAnnotationsPerSentence = r; foreach (Sentence sentence in Variable.Sentences) { IDictionary <Annotator, double> annotator_value = new Dictionary <Annotator, double>();//因为强制让每人标同一句不超过一次,故对sentence来说,筛选annotation就相当于筛选annotator foreach (Annotator annotator in Variable.Annotators) { if (Variable.Data[annotator].ContainsKey(sentence)) { annotator_value.Add(annotator, 0); foreach (Annotation annotation in Variable.Data[annotator][sentence]) { switch (selector) { case Selector.Most: case Selector.Least: annotator_value[annotator] += annotation.NumberOfTrueLabel; break; case Selector.Best: case Selector.Worst: annotator_value[annotator] += SimilarityMeasure.Compare(annotation, sentence.BinaryGold); break; } } } } List <KeyValuePair <Annotator, double> > sortedElements = new List <KeyValuePair <Annotator, double> >(annotator_value); switch (selector) { case Selector.Most: case Selector.Best: sortedElements.Sort(delegate(KeyValuePair <Annotator, double> s1, KeyValuePair <Annotator, double> s2) { return(s2.Value.CompareTo(s1.Value)); }); break; case Selector.Least: case Selector.Worst: sortedElements.Sort(delegate(KeyValuePair <Annotator, double> s1, KeyValuePair <Annotator, double> s2) { return(s1.Value.CompareTo(s2.Value)); }); break; } for (int i = r; i < sortedElements.Count; ++i) { Variable.Data[sortedElements[i].Key].Remove(sentence);//从人的数据中删除sentence,修改Variable.Data了(Boku没修改) } foreach (Annotator annotator in Variable.Annotators.ToArray()) { if (Variable.Data[annotator].Count == 0) { Variable.Data.Remove(annotator); Variable.Annotators.Remove(annotator); } } } }
static private void AnnotatorSelectionForBoku(int r, Selector selector)//Variable.Data不变,只变Variable.Annotators { if (r >= Variable.Data.Count) { Console.WriteLine("Wrong Annotator Number: " + r); return; } Variable.NumberOfAnnotationsPerSentence = r; IDictionary <Annotator, double> annotatorAndValue = new Dictionary <Annotator, double>(); foreach (Annotator annotator in Variable.Annotators) { annotatorAndValue.Add(annotator, 0); foreach (Sentence sentence in Variable.Sentences) { foreach (Annotation annotation in Variable.Data[annotator][sentence]) { switch (selector) { case Selector.Most: case Selector.Least: annotatorAndValue[annotator] += annotation.NumberOfTrueLabel; break; case Selector.Best: case Selector.Worst: annotatorAndValue[annotator] += SimilarityMeasure.Compare(annotation, sentence.BinaryGold); break; } } } } List <KeyValuePair <Annotator, double> > sortedElements = new List <KeyValuePair <Annotator, double> >(annotatorAndValue); switch (selector) { case Selector.Most: case Selector.Best: sortedElements.Sort(delegate(KeyValuePair <Annotator, double> s1, KeyValuePair <Annotator, double> s2) { return(s2.Value.CompareTo(s1.Value)); }); break; case Selector.Least: case Selector.Worst: sortedElements.Sort(delegate(KeyValuePair <Annotator, double> s1, KeyValuePair <Annotator, double> s2) { return(s1.Value.CompareTo(s2.Value)); }); break; } //for (int i = r; i < sortedElements.Count; ++i) //{ // Variable.Annotators.Remove(sortedElements[i].Key); //} Variable.Annotators.Clear(); for (int i = 0; i < r; ++i) { Variable.Annotators.Add(sortedElements[i].Key); } }
//结果与单一的标准对比 static public void GenerateSimilarityWithGold(string algorithm, string gold) { double similarityPerSen = 0; switch (gold) { case "IndAndDepGold": switch (algorithm) { case "Independent": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.IndResultAndDepResult, sentence.IndependentResult); } break; case "Precise": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.IndResultAndDepResult, sentence.PreciseResult); } break; case "TreeForAll": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.IndResultAndDepResult, sentence.TreeForAllResult); } break; case "TreeForSen": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.IndResultAndDepResult, sentence.TreeForSenResult); } break; } break; case "DepMVGold": switch (algorithm) { case "Independent": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.BinaryGold, sentence.IndependentResult); } break; case "Precise": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.BinaryGold, sentence.PreciseResult); } break; case "TreeForAll": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.BinaryGold, sentence.TreeForAllResult); } break; case "TreeForSen": foreach (Sentence sentence in Variable.Sentences) { similarityPerSen += SimilarityMeasure.Compare(sentence.BinaryGold, sentence.TreeForSenResult); } break; } break; } similarityPerSen /= Variable.Sentences.Count; }