static private void initializeEkmanAsSource(Corpus experiment, int startIndex, int endIndex, IList <Sentence> sentences, ref IList <SourceWorker> sourceWorkerList) { string[] data = File.ReadAllLines(experiment + "/EkmanData" + startIndex + "-" + endIndex + ".csv"); foreach (string row in data) { string[] labels = row.Split(','); SourceWorker worker = new SourceWorker(labels[0]); if (!sourceWorkerList.Contains(worker))//重复的人不再添加 { sourceWorkerList.Add(worker); } else { worker = sourceWorkerList.First(x => x.Equals(worker)); } IList <Label> trueLabels = new List <Label>(); for (int i = 1; i <= (endIndex - startIndex + 1) * (Constant.EkmanLabelArray.Length + 1); ++i) { switch (labels[i]) { case "Anger": trueLabels.Add(Label.Anger); break; case "Sadness": trueLabels.Add(Label.Sadness); break; case "Joy": trueLabels.Add(Label.Joy); break; case "Disgust": trueLabels.Add(Label.Disgust); break; case "Surprise": trueLabels.Add(Label.Surprise); break; case "Fear": trueLabels.Add(Label.Fear); break; } if (i % (Constant.EkmanLabelArray.Length + 1) == 0) { //取出SentenceList里的一个Sentence Sentence sentence = sentences[startIndex + (i - 1) / (Constant.EkmanLabelArray.Length + 1)]; SourceAnnotation sourceAnnotation = new SourceAnnotation(trueLabels.ToArray()); trueLabels.Clear(); worker.SentenceSourceAnnotationDic.Add(sentence, sourceAnnotation); sentence.SourceWorkerSourceAnnotationDic.Add(worker, sourceAnnotation); } } } }
/// <summary> /// 只过滤标注Nakamura的worker,不管Train还是NotTrain,Source还是Target /// </summary> static public void FilterTargetWorker() { if (Constant.SourceTaxonomy.Name == TaxonomyType.Nakamura) { foreach (Sentence sentence in TrainConstant.SentenceList) { for (int i = sentence.SourceWorkerSourceAnnotationDic.Count - 1; i >= 30; --i)//用.Count就不用管sentence被标了40次还是41次 { SourceWorker sourceWorker = sentence.SourceWorkerSourceAnnotationDic.ElementAt(i).Key; sentence.SourceWorkerSourceAnnotationDic.Remove(sourceWorker); sourceWorker.SentenceSourceAnnotationDic.Remove(sentence); } } foreach (SourceWorker sourceWorker in TrainConstant.SourceWorkerList.ToArray()) { if (sourceWorker.SentenceSourceAnnotationDic.Count == 0) { TrainConstant.SourceWorkerList.Remove(sourceWorker); } } foreach (Sentence sentence in NotTrainConstant.SentenceList) { for (int i = sentence.SourceWorkerSourceAnnotationDic.Count - 1; i >= 30; --i)//用.Count就不用管sentence被标了40次还是41次 { SourceWorker sourceWorker = sentence.SourceWorkerSourceAnnotationDic.ElementAt(i).Key; sentence.SourceWorkerSourceAnnotationDic.Remove(sourceWorker); sourceWorker.SentenceSourceAnnotationDic.Remove(sentence); } } foreach (SourceWorker sourceWorker in NotTrainConstant.SourceWorkerList.ToArray()) { if (sourceWorker.SentenceSourceAnnotationDic.Count == 0) { NotTrainConstant.SourceWorkerList.Remove(sourceWorker); } } } else if (Constant.TargetTaxonomy.Name == TaxonomyType.Nakamura) { foreach (Sentence sentence in TrainConstant.SentenceList) { for (int i = sentence.TargetWorkerTargetAnnotationDic.Count - 1; i >= 30; --i) { TargetWorker targetWorker = sentence.TargetWorkerTargetAnnotationDic.ElementAt(i).Key; sentence.TargetWorkerTargetAnnotationDic.Remove(targetWorker); targetWorker.SentenceTargetAnnotationDic.Remove(sentence); } } foreach (TargetWorker targetWorker in TrainConstant.TargetWorkerList.ToArray()) { if (targetWorker.SentenceTargetAnnotationDic.Count == 0) { TrainConstant.TargetWorkerList.Remove(targetWorker); } } foreach (Sentence sentence in NotTrainConstant.SentenceList) { for (int i = sentence.TargetWorkerTargetAnnotationDic.Count - 1; i >= 30; --i) { TargetWorker targetWorker = sentence.TargetWorkerTargetAnnotationDic.ElementAt(i).Key; sentence.TargetWorkerTargetAnnotationDic.Remove(targetWorker); targetWorker.SentenceTargetAnnotationDic.Remove(sentence); } } foreach (TargetWorker targetWorker in NotTrainConstant.TargetWorkerList.ToArray()) { if (targetWorker.SentenceTargetAnnotationDic.Count == 0) { NotTrainConstant.TargetWorkerList.Remove(targetWorker); } } } }
static private void initializeNakaAsSource(Corpus corpus, int startIndex, int endIndex, IList <Sentence> sentences, ref IList <SourceWorker> sourceWorkerList) { string[] data = File.ReadAllLines(corpus + "/NakaData" + startIndex + "-" + endIndex + ".csv");//不需要单独做一个data-sample,因为sentence-sample以外的sentence在下面的for循环里遍历不到。 foreach (string row in data) { string[] labels = row.Split(','); //labels[0]是用户名 SourceWorker worker = new SourceWorker(labels[0]); if (!sourceWorkerList.Contains(worker)) //重复的人不再添加 { sourceWorkerList.Add(worker); } else { worker = sourceWorkerList.First(x => x.Equals(worker)); } IList <Label> trueLabels = new List <Label>(); for (int i = 1; i <= (endIndex - startIndex + 1) * (Constant.NakaLabelArray.Length + 1); ++i) { switch (labels[i]) { case "happiness": trueLabels.Add(Label.喜Joy); break; case "fondness": trueLabels.Add(Label.好Fondness); break; case "relief": trueLabels.Add(Label.安Relief); break; case "anger": trueLabels.Add(Label.怒Anger); break; case "sadness": trueLabels.Add(Label.哀Sadness); break; case "fear": trueLabels.Add(Label.怖Fear); break; case "shame": trueLabels.Add(Label.恥Shame); break; case "disgust": trueLabels.Add(Label.厭Disgust); break; case "excitement": trueLabels.Add(Label.昂Excitement); break; case "surprise": trueLabels.Add(Label.驚Surprise); break; } if (i % (Constant.NakaLabelArray.Length + 1) == 0) { //取出SentenceList里的一个Sentence Sentence sentence = sentences[startIndex + (i - 1) / (Constant.NakaLabelArray.Length + 1)]; SourceAnnotation targetAnnotation = new SourceAnnotation(trueLabels.ToArray()); trueLabels.Clear(); worker.SentenceSourceAnnotationDic.Add(sentence, targetAnnotation); sentence.SourceWorkerSourceAnnotationDic.Add(worker, targetAnnotation); } } } }