Ejemplo n.º 1
0
        static private void initializeEkmanAsSource(Corpus experiment, int startIndex, int endIndex, IList <Sentence> sentences, ref IList <SourceWorker> sourceWorkerList)
        {
            string[] data = File.ReadAllLines(experiment + "/EkmanData" + startIndex + "-" + endIndex + ".csv");
            foreach (string row in data)
            {
                string[]     labels = row.Split(',');
                SourceWorker worker = new SourceWorker(labels[0]);
                if (!sourceWorkerList.Contains(worker))//重复的人不再添加
                {
                    sourceWorkerList.Add(worker);
                }
                else
                {
                    worker = sourceWorkerList.First(x => x.Equals(worker));
                }
                IList <Label> trueLabels = new List <Label>();
                for (int i = 1; i <= (endIndex - startIndex + 1) * (Constant.EkmanLabelArray.Length + 1); ++i)
                {
                    switch (labels[i])
                    {
                    case "Anger":
                        trueLabels.Add(Label.Anger);
                        break;

                    case "Sadness":
                        trueLabels.Add(Label.Sadness);
                        break;

                    case "Joy":
                        trueLabels.Add(Label.Joy);
                        break;

                    case "Disgust":
                        trueLabels.Add(Label.Disgust);
                        break;

                    case "Surprise":
                        trueLabels.Add(Label.Surprise);
                        break;

                    case "Fear":
                        trueLabels.Add(Label.Fear);
                        break;
                    }
                    if (i % (Constant.EkmanLabelArray.Length + 1) == 0)
                    {
                        //取出SentenceList里的一个Sentence
                        Sentence         sentence         = sentences[startIndex + (i - 1) / (Constant.EkmanLabelArray.Length + 1)];
                        SourceAnnotation sourceAnnotation = new SourceAnnotation(trueLabels.ToArray());
                        trueLabels.Clear();
                        worker.SentenceSourceAnnotationDic.Add(sentence, sourceAnnotation);
                        sentence.SourceWorkerSourceAnnotationDic.Add(worker, sourceAnnotation);
                    }
                }
            }
        }
Ejemplo n.º 2
0
 /// <summary>
 /// 只过滤标注Nakamura的worker,不管Train还是NotTrain,Source还是Target
 /// </summary>
 static public void FilterTargetWorker()
 {
     if (Constant.SourceTaxonomy.Name == TaxonomyType.Nakamura)
     {
         foreach (Sentence sentence in TrainConstant.SentenceList)
         {
             for (int i = sentence.SourceWorkerSourceAnnotationDic.Count - 1; i >= 30; --i)//用.Count就不用管sentence被标了40次还是41次
             {
                 SourceWorker sourceWorker = sentence.SourceWorkerSourceAnnotationDic.ElementAt(i).Key;
                 sentence.SourceWorkerSourceAnnotationDic.Remove(sourceWorker);
                 sourceWorker.SentenceSourceAnnotationDic.Remove(sentence);
             }
         }
         foreach (SourceWorker sourceWorker in TrainConstant.SourceWorkerList.ToArray())
         {
             if (sourceWorker.SentenceSourceAnnotationDic.Count == 0)
             {
                 TrainConstant.SourceWorkerList.Remove(sourceWorker);
             }
         }
         foreach (Sentence sentence in NotTrainConstant.SentenceList)
         {
             for (int i = sentence.SourceWorkerSourceAnnotationDic.Count - 1; i >= 30; --i)//用.Count就不用管sentence被标了40次还是41次
             {
                 SourceWorker sourceWorker = sentence.SourceWorkerSourceAnnotationDic.ElementAt(i).Key;
                 sentence.SourceWorkerSourceAnnotationDic.Remove(sourceWorker);
                 sourceWorker.SentenceSourceAnnotationDic.Remove(sentence);
             }
         }
         foreach (SourceWorker sourceWorker in NotTrainConstant.SourceWorkerList.ToArray())
         {
             if (sourceWorker.SentenceSourceAnnotationDic.Count == 0)
             {
                 NotTrainConstant.SourceWorkerList.Remove(sourceWorker);
             }
         }
     }
     else if (Constant.TargetTaxonomy.Name == TaxonomyType.Nakamura)
     {
         foreach (Sentence sentence in TrainConstant.SentenceList)
         {
             for (int i = sentence.TargetWorkerTargetAnnotationDic.Count - 1; i >= 30; --i)
             {
                 TargetWorker targetWorker = sentence.TargetWorkerTargetAnnotationDic.ElementAt(i).Key;
                 sentence.TargetWorkerTargetAnnotationDic.Remove(targetWorker);
                 targetWorker.SentenceTargetAnnotationDic.Remove(sentence);
             }
         }
         foreach (TargetWorker targetWorker in TrainConstant.TargetWorkerList.ToArray())
         {
             if (targetWorker.SentenceTargetAnnotationDic.Count == 0)
             {
                 TrainConstant.TargetWorkerList.Remove(targetWorker);
             }
         }
         foreach (Sentence sentence in NotTrainConstant.SentenceList)
         {
             for (int i = sentence.TargetWorkerTargetAnnotationDic.Count - 1; i >= 30; --i)
             {
                 TargetWorker targetWorker = sentence.TargetWorkerTargetAnnotationDic.ElementAt(i).Key;
                 sentence.TargetWorkerTargetAnnotationDic.Remove(targetWorker);
                 targetWorker.SentenceTargetAnnotationDic.Remove(sentence);
             }
         }
         foreach (TargetWorker targetWorker in NotTrainConstant.TargetWorkerList.ToArray())
         {
             if (targetWorker.SentenceTargetAnnotationDic.Count == 0)
             {
                 NotTrainConstant.TargetWorkerList.Remove(targetWorker);
             }
         }
     }
 }
Ejemplo n.º 3
0
        static private void initializeNakaAsSource(Corpus corpus, int startIndex, int endIndex, IList <Sentence> sentences, ref IList <SourceWorker> sourceWorkerList)
        {
            string[] data = File.ReadAllLines(corpus + "/NakaData" + startIndex + "-" + endIndex + ".csv");//不需要单独做一个data-sample,因为sentence-sample以外的sentence在下面的for循环里遍历不到。
            foreach (string row in data)
            {
                string[]     labels = row.Split(',');   //labels[0]是用户名
                SourceWorker worker = new SourceWorker(labels[0]);
                if (!sourceWorkerList.Contains(worker)) //重复的人不再添加
                {
                    sourceWorkerList.Add(worker);
                }
                else
                {
                    worker = sourceWorkerList.First(x => x.Equals(worker));
                }
                IList <Label> trueLabels = new List <Label>();
                for (int i = 1; i <= (endIndex - startIndex + 1) * (Constant.NakaLabelArray.Length + 1); ++i)
                {
                    switch (labels[i])
                    {
                    case "happiness":
                        trueLabels.Add(Label.喜Joy);
                        break;

                    case "fondness":
                        trueLabels.Add(Label.好Fondness);
                        break;

                    case "relief":
                        trueLabels.Add(Label.安Relief);
                        break;

                    case "anger":
                        trueLabels.Add(Label.怒Anger);
                        break;

                    case "sadness":
                        trueLabels.Add(Label.哀Sadness);
                        break;

                    case "fear":
                        trueLabels.Add(Label.怖Fear);
                        break;

                    case "shame":
                        trueLabels.Add(Label.恥Shame);
                        break;

                    case "disgust":
                        trueLabels.Add(Label.厭Disgust);
                        break;

                    case "excitement":
                        trueLabels.Add(Label.昂Excitement);
                        break;

                    case "surprise":
                        trueLabels.Add(Label.驚Surprise);
                        break;
                    }
                    if (i % (Constant.NakaLabelArray.Length + 1) == 0)
                    {
                        //取出SentenceList里的一个Sentence
                        Sentence         sentence         = sentences[startIndex + (i - 1) / (Constant.NakaLabelArray.Length + 1)];
                        SourceAnnotation targetAnnotation = new SourceAnnotation(trueLabels.ToArray());
                        trueLabels.Clear();
                        worker.SentenceSourceAnnotationDic.Add(sentence, targetAnnotation);
                        sentence.SourceWorkerSourceAnnotationDic.Add(worker, targetAnnotation);
                    }
                }
            }
        }