/// <summary> /// 论文公式2 /// </summary> static public void Pr_T_S(int groupindex) { //因为每换一组就要清空一次,所以在这里而不是MLEConstant的构造函数里初始化。 MLEConstant.Pr_T_S = new Dictionary <SourceAnnotation, IDictionary <TargetAnnotation, double> >(); //开始计算 foreach (Sentence sentence in TrainConstant.SentenceList) { SourceAnnotation sourceAnnotation = sentence.GoldSourceAnnotation; if (!MLEConstant.Pr_T_S.ContainsKey(sourceAnnotation)) { MLEConstant.Pr_T_S.Add(sourceAnnotation, new Dictionary <TargetAnnotation, double>()); } foreach (TargetAnnotation targetAnnotation in sentence.TargetWorkerTargetAnnotationDicGroup[groupindex].Values) { if (MLEConstant.Pr_T_S[sourceAnnotation].ContainsKey(targetAnnotation)) { ++MLEConstant.Pr_T_S[sourceAnnotation][targetAnnotation]; } else { MLEConstant.Pr_T_S[sourceAnnotation].Add(targetAnnotation, 1); } } } //为次数排序就行,不用除Pr_S,因为对每个T,其对应的Pr_S都一样 foreach (SourceAnnotation sourceAnnotation in MLEConstant.Pr_T_S.Keys.ToArray()) { MLEConstant.Pr_T_S[sourceAnnotation] = GeneralFunction.SortDictionary(MLEConstant.Pr_T_S[sourceAnnotation]); } }
static private void initializeEkmanAsSource(Corpus experiment, int startIndex, int endIndex, IList <Sentence> sentences, ref IList <SourceWorker> sourceWorkerList) { string[] data = File.ReadAllLines(experiment + "/EkmanData" + startIndex + "-" + endIndex + ".csv"); foreach (string row in data) { string[] labels = row.Split(','); SourceWorker worker = new SourceWorker(labels[0]); if (!sourceWorkerList.Contains(worker))//重复的人不再添加 { sourceWorkerList.Add(worker); } else { worker = sourceWorkerList.First(x => x.Equals(worker)); } IList <Label> trueLabels = new List <Label>(); for (int i = 1; i <= (endIndex - startIndex + 1) * (Constant.EkmanLabelArray.Length + 1); ++i) { switch (labels[i]) { case "Anger": trueLabels.Add(Label.Anger); break; case "Sadness": trueLabels.Add(Label.Sadness); break; case "Joy": trueLabels.Add(Label.Joy); break; case "Disgust": trueLabels.Add(Label.Disgust); break; case "Surprise": trueLabels.Add(Label.Surprise); break; case "Fear": trueLabels.Add(Label.Fear); break; } if (i % (Constant.EkmanLabelArray.Length + 1) == 0) { //取出SentenceList里的一个Sentence Sentence sentence = sentences[startIndex + (i - 1) / (Constant.EkmanLabelArray.Length + 1)]; SourceAnnotation sourceAnnotation = new SourceAnnotation(trueLabels.ToArray()); trueLabels.Clear(); worker.SentenceSourceAnnotationDic.Add(sentence, sourceAnnotation); sentence.SourceWorkerSourceAnnotationDic.Add(worker, sourceAnnotation); } } } }
/// <summary> /// Nakamura→Ekman. /// </summary> static public void _InitializeLoveSample() { Constant.SourceTaxonomy.LabelArray = Constant.EkmanLabelArray; Constant.TargetTaxonomy.LabelArray = Constant.NakaLabelArray; string[] data = File.ReadAllLines("LoveSample/EkmanGold-sample.csv"); for (int j = 0; j < data.Length; ++j) { string[] labels = data[j].Split(','); SourceAnnotation sourceAnnotation = new SourceAnnotation(); for (int i = 0; i < Constant.EkmanLabelArray.Length; ++i) { switch (labels[i]) { case "joy": sourceAnnotation.LabelAndTruthDic[Label.Joy] = true; break; case "anger": sourceAnnotation.LabelAndTruthDic[Label.Anger] = true; break; case "sadness": sourceAnnotation.LabelAndTruthDic[Label.Sadness] = true; break; case "fear": sourceAnnotation.LabelAndTruthDic[Label.Fear] = true; break; case "disgust": sourceAnnotation.LabelAndTruthDic[Label.Disgust] = true; break; case "surprise": sourceAnnotation.LabelAndTruthDic[Label.Surprise] = true; break; } } TrainConstant.SentenceList[j].GoldSourceAnnotation.LabelAndTruthDic = new Dictionary <Label, bool>(sourceAnnotation.LabelAndTruthDic); sourceAnnotation = new SourceAnnotation(); } initializeNakaAsTarget(Corpus.LoveSample, 0, 62, TrainConstant.SentenceList, ref TrainConstant.TargetWorkerList); SpaceConstant.TargetWorkerNumberPerSentence = 41; }
static private void initializeNakaAsSource(Corpus corpus, int startIndex, int endIndex, IList <Sentence> sentences, ref IList <SourceWorker> sourceWorkerList) { string[] data = File.ReadAllLines(corpus + "/NakaData" + startIndex + "-" + endIndex + ".csv");//不需要单独做一个data-sample,因为sentence-sample以外的sentence在下面的for循环里遍历不到。 foreach (string row in data) { string[] labels = row.Split(','); //labels[0]是用户名 SourceWorker worker = new SourceWorker(labels[0]); if (!sourceWorkerList.Contains(worker)) //重复的人不再添加 { sourceWorkerList.Add(worker); } else { worker = sourceWorkerList.First(x => x.Equals(worker)); } IList <Label> trueLabels = new List <Label>(); for (int i = 1; i <= (endIndex - startIndex + 1) * (Constant.NakaLabelArray.Length + 1); ++i) { switch (labels[i]) { case "happiness": trueLabels.Add(Label.喜Joy); break; case "fondness": trueLabels.Add(Label.好Fondness); break; case "relief": trueLabels.Add(Label.安Relief); break; case "anger": trueLabels.Add(Label.怒Anger); break; case "sadness": trueLabels.Add(Label.哀Sadness); break; case "fear": trueLabels.Add(Label.怖Fear); break; case "shame": trueLabels.Add(Label.恥Shame); break; case "disgust": trueLabels.Add(Label.厭Disgust); break; case "excitement": trueLabels.Add(Label.昂Excitement); break; case "surprise": trueLabels.Add(Label.驚Surprise); break; } if (i % (Constant.NakaLabelArray.Length + 1) == 0) { //取出SentenceList里的一个Sentence Sentence sentence = sentences[startIndex + (i - 1) / (Constant.NakaLabelArray.Length + 1)]; SourceAnnotation targetAnnotation = new SourceAnnotation(trueLabels.ToArray()); trueLabels.Clear(); worker.SentenceSourceAnnotationDic.Add(sentence, targetAnnotation); sentence.SourceWorkerSourceAnnotationDic.Add(worker, targetAnnotation); } } } }