Beispiel #1
0
        static public void RunSPDS()
        {
            if (!SupervisedFunction.IsNumberOfTraningSentencesValid())
            {
                return;
            }
            double NumericIndependentEuclidean = 0;
            double BinaryIndependentDice       = 0;
            double BinaryDependentDice         = 0;
            double BinaryIndependentCompare    = 0;
            double BinaryDependentCompare      = 0;
            double BinaryDependentJaccard      = 0;
            double BinaryAndNumeric            = 0;

            for (int groupIndex = 0; groupIndex < GroupVariable.AnnotatorGroups.Length; ++groupIndex)
            {
                Label[] LabelArray = GroupFunction.DescendLabelsByNumber(groupIndex);
                foreach (Sentence sentence in Variable.Sentences)
                {
                    if (sentence.ID < SupervisedVariable.NumberOfTraningSentences)
                    {
                        continue;
                    }
                    sentence.AnnotaitonGroups[groupIndex].SPDSNumResult = new NumericResult();
                    sentence.AnnotaitonGroups[groupIndex].SPDSResult    = new Result();
                }
                IList <LabelPair> bilabels = PDSFunction.GenerateBilabels(groupIndex);
                foreach (LabelPair bilabel in bilabels)
                {
                    InitializeTrainingSijAndPj(bilabel);
                    CalculatePij(bilabel, groupIndex);
                    CalculatePj(bilabel);
                    CalculatePAkjl(bilabel, groupIndex);
                    CalculateSij(bilabel, groupIndex);
                    ObtainLabelResult(bilabel, groupIndex);
                }
                Function.WriteBinaryResultFile("SPDS", groupIndex);//只输出,计算在前面
                double groupNumericIndependentEuclidean = 0;
                double groupBinaryIndependentDice       = 0;
                double groupBinaryDependentDice         = 0;
                double groupBinaryIndependentCompare    = 0;
                double groupBinaryDependentCompare      = 0;
                double groupBinaryDependentJaccard      = 0;
                double groupBinaryAndNumeric            = 0;
                foreach (Sentence sentence in Variable.Sentences)
                {
                    if (sentence.ID < SupervisedVariable.NumberOfTraningSentences)
                    {
                        continue;
                    }
                    groupNumericIndependentEuclidean += SimilarityMeasure.Euclidean(sentence.AnnotaitonGroups[groupIndex].SPDSNumResult, sentence.NumericGold);
                    groupBinaryIndependentDice       += SimilarityMeasure.DicePlusANumber(sentence.AnnotaitonGroups[groupIndex].SPDSNumResult.ToBinaryResult(), sentence.NumericGold.ToBinaryResult());
                    groupBinaryIndependentCompare    += SimilarityMeasure.Compare(sentence.AnnotaitonGroups[groupIndex].SPDSNumResult.ToBinaryResult(), sentence.NumericGold.ToBinaryResult());
                    groupBinaryDependentCompare      += SimilarityMeasure.Compare(sentence.AnnotaitonGroups[groupIndex].SPDSResult, sentence.BinaryGold);
                    groupBinaryDependentDice         += SimilarityMeasure.DicePlusANumber(sentence.AnnotaitonGroups[groupIndex].SPDSResult, sentence.BinaryGold);
                    groupBinaryDependentJaccard      += SimilarityMeasure.JaccardPlusANumber(sentence.AnnotaitonGroups[groupIndex].SPDSResult, sentence.BinaryGold);
                    groupBinaryAndNumeric            += SimilarityMeasure.BinaryAndNumeric(sentence.AnnotaitonGroups[groupIndex].SPDSResult, sentence.NumericGold);
                }
                NumericIndependentEuclidean += groupNumericIndependentEuclidean / (Variable.Sentences.Count - SupervisedVariable.NumberOfTraningSentences);
                BinaryIndependentDice       += groupBinaryIndependentDice / (Variable.Sentences.Count - SupervisedVariable.NumberOfTraningSentences);
                BinaryDependentDice         += groupBinaryDependentDice / (Variable.Sentences.Count - SupervisedVariable.NumberOfTraningSentences);
                BinaryIndependentCompare    += groupBinaryIndependentCompare / (Variable.Sentences.Count - SupervisedVariable.NumberOfTraningSentences);
                BinaryDependentCompare      += groupBinaryDependentCompare / (Variable.Sentences.Count - SupervisedVariable.NumberOfTraningSentences);
                BinaryDependentJaccard      += groupBinaryDependentJaccard / (Variable.Sentences.Count - SupervisedVariable.NumberOfTraningSentences);
                BinaryAndNumeric            += groupBinaryAndNumeric / (Variable.Sentences.Count - SupervisedVariable.NumberOfTraningSentences);
            }
            NumericIndependentEuclidean /= GroupVariable.AnnotatorGroups.Length;
            BinaryIndependentDice       /= GroupVariable.AnnotatorGroups.Length;
            BinaryDependentDice         /= GroupVariable.AnnotatorGroups.Length;
            BinaryIndependentCompare    /= GroupVariable.AnnotatorGroups.Length;
            BinaryDependentCompare      /= GroupVariable.AnnotatorGroups.Length;
            BinaryDependentJaccard      /= GroupVariable.AnnotatorGroups.Length;
            BinaryAndNumeric            /= GroupVariable.AnnotatorGroups.Length;
        }
Beispiel #2
0
 static public LabelPair[] GenerateBilabels(int group)
 {
     Label[] labelArray = GroupFunction.DescendLabelsByNumber(group);
     #region 初始化,用于构造互信息参数
     IDictionary <Label, double> labelFloatDic = new Dictionary <Label, double>();
     IList <LabelPair>           labelPairList = new List <LabelPair>();//前后无序,45个,用于初始化
     Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairList, labelArray);
     IDictionary <Label, double> numberOfLabelTrue  = new Dictionary <Label, double>(labelFloatDic);
     IDictionary <Label, double> numberOfLabelFalse = new Dictionary <Label, double>(labelFloatDic);
     #endregion
     #region 求互信息的参数
     IList <Label> traversedLabels = new List <Label>();
     int           N = 0;
     foreach (Sentence sentence in Variable.Sentences)
     {
         foreach (Annotation annotation in sentence.AnnotaitonGroups[group].AnnotatorAnnotationDic.Values)
         {
             ++N;
             traversedLabels.Clear();
             foreach (Label label1 in labelArray)
             {
                 traversedLabels.Add(label1);
                 if (annotation.Labels[label1])
                 {
                     ++numberOfLabelTrue[label1];
                     foreach (Label label2 in labelArray)
                     {
                         if (!traversedLabels.Contains(label2))
                         {
                             LabelPair labelPair = labelPairList.First(lp => lp.First == label1 && lp.Second == label2);
                             if (annotation.Labels[label2])
                             {
                                 if (labelPair.Label1TrueLabel2TrueSentenceAndFreq.ContainsKey(sentence))
                                 {
                                     ++labelPair.Label1TrueLabel2TrueSentenceAndFreq[sentence];
                                 }
                                 else
                                 {
                                     labelPair.Label1TrueLabel2TrueSentenceAndFreq.Add(sentence, 1);
                                 }
                             }
                             else
                             {
                                 if (labelPair.Label1TrueLabel2FalseSentenceAndFreq.ContainsKey(sentence))
                                 {
                                     ++labelPair.Label1TrueLabel2FalseSentenceAndFreq[sentence];
                                 }
                                 else
                                 {
                                     labelPair.Label1TrueLabel2FalseSentenceAndFreq.Add(sentence, 1);
                                 }
                             }
                         }
                     }
                 }
                 else
                 {
                     ++numberOfLabelFalse[label1];
                     foreach (Label label2 in labelArray)
                     {
                         if (!traversedLabels.Contains(label2))
                         {
                             LabelPair labelPair = labelPairList.First(lp => lp.First == label1 && lp.Second == label2);
                             if (annotation.Labels[label2])
                             {
                                 if (labelPair.Label1FalseLabel2TrueSentenceAndFreq.ContainsKey(sentence))
                                 {
                                     ++labelPair.Label1FalseLabel2TrueSentenceAndFreq[sentence];
                                 }
                                 else
                                 {
                                     labelPair.Label1FalseLabel2TrueSentenceAndFreq.Add(sentence, 1);
                                 }
                             }
                             else
                             {
                                 if (labelPair.Label1FalseLabel2FalseSentenceAndFreq.ContainsKey(sentence))
                                 {
                                     ++labelPair.Label1FalseLabel2FalseSentenceAndFreq[sentence];
                                 }
                                 else
                                 {
                                     labelPair.Label1FalseLabel2FalseSentenceAndFreq.Add(sentence, 1);
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     #endregion
     #region 情感对排序,论文用
     //List<LabelPair> sortednumberOfLabel1TrueLabel2True = new List<LabelPair>(labelPairList);//变量名为MI,其实值为joint entropy
     //sortednumberOfLabel1TrueLabel2True.Sort(delegate(LabelPair s1, LabelPair s2)
     //{
     //    return s2.Label1TrueLabel2TrueFrequency.CompareTo(s1.Label1TrueLabel2TrueFrequency);
     //});
     //List<LabelPair> sortednumberOfLabel1TrueLabel2False = new List<LabelPair>(labelPairList);//变量名为MI,其实值为joint entropy
     //sortednumberOfLabel1TrueLabel2False.Sort(delegate(LabelPair s1, LabelPair s2)
     //{
     //    return s2.Label1TrueLabel2FalseFrequency.CompareTo(s1.Label1TrueLabel2FalseFrequency);
     //});
     //List<LabelPair> sortednumberOfLabel1FalseLabel2True = new List<LabelPair>(labelPairList);//变量名为MI,其实值为joint entropy
     //sortednumberOfLabel1FalseLabel2True.Sort(delegate(LabelPair s1, LabelPair s2)
     //{
     //    return s2.Label1FalseLabel2TrueFrequency.CompareTo(s1.Label1FalseLabel2TrueFrequency);
     //});
     //List<LabelPair> sortednumberOfLabel1FalseLabel2False = new List<LabelPair>(labelPairList);//变量名为MI,其实值为joint entropy
     //sortednumberOfLabel1FalseLabel2False.Sort(delegate(LabelPair s1, LabelPair s2)
     //{
     //    return s2.Label1FalseLabel2FalseFrequency.CompareTo(s1.Label1FalseLabel2FalseFrequency);
     //});
     #endregion
     #region 求联合熵
     for (int i = 0; i < labelPairList.Count; ++i)
     {
         if (labelPairList[i].Label1TrueLabel2TrueFrequency != 0)
         {
             labelPairList[i].Weight += -(labelPairList[i].Label1TrueLabel2TrueFrequency / N) * Math.Log((labelPairList[i].Label1TrueLabel2TrueFrequency / N), 2);
         }
         if (labelPairList[i].Label1TrueLabel2FalseFrequency != 0)
         {
             labelPairList[i].Weight += -(labelPairList[i].Label1TrueLabel2FalseFrequency / N) * Math.Log((labelPairList[i].Label1TrueLabel2FalseFrequency / N), 2);
         }
         if (labelPairList[i].Label1FalseLabel2TrueFrequency != 0)
         {
             labelPairList[i].Weight += -(labelPairList[i].Label1FalseLabel2TrueFrequency / N) * Math.Log((labelPairList[i].Label1FalseLabel2TrueFrequency / N), 2);
         }
         if (labelPairList[i].Label1FalseLabel2FalseFrequency != 0)
         {
             labelPairList[i].Weight += -(labelPairList[i].Label1FalseLabel2FalseFrequency / N) * Math.Log((labelPairList[i].Label1FalseLabel2FalseFrequency / N), 2);
         }
     }
     List <LabelPair> sortedMutualInformation = new List <LabelPair>(labelPairList);//变量名为MI,其实值为joint entropy;排序仅为观察用
     sortedMutualInformation.Sort(delegate(LabelPair s1, LabelPair s2)
     {
         return(s1.Weight.CompareTo(s2.Weight));
     });
     #endregion
     return(MinimumWeightedPerfectMatching(labelPairList, true));
 }
        static private void Initialize(int group)
        {
            Label[] labelArray = GroupFunction.DescendLabelsByNumber(group);
            #region 初始化
            TDDSVariable.Sij = new Sij(1);
            //整体的信息,用于构造互信息参数,求树(所有句子一棵树)
            IDictionary <Label, double>     labelFloatDic  = new Dictionary <Label, double>();
            IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化
            //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, labelArray);
            IDictionary <Label, double>     numberOfLabelTrue              = new Dictionary <Label, double>(labelFloatDic);
            IDictionary <Label, double>     numberOfLabelFalse             = new Dictionary <Label, double>(labelFloatDic);
            IDictionary <LabelPair, double> numberOfLabel1TrueLabel2True   = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1TrueLabel2False  = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1FalseLabel2True  = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1FalseLabel2False = new Dictionary <LabelPair, double>(labelPairFloat);

            IDictionary <Sentence, IDictionary <Label, double> >     ProbabilityOfLabelTrue              = new Dictionary <Sentence, IDictionary <Label, double> >();
            IDictionary <Sentence, IDictionary <Label, double> >     ProbabilityOfLabelFalse             = new Dictionary <Sentence, IDictionary <Label, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2True   = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2False  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2True  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >();

            //每句的信息,用于树的具体值(每个句子对应的树的值不同)
            foreach (Sentence sentence in Variable.Sentences)
            {
                ProbabilityOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
                ProbabilityOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
                ProbabilityOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
            }
            #endregion
            #region 求互信息的参数
            int N = 0;
            foreach (Sentence sentence in Variable.Sentences)
            {
                foreach (Annotation annotation in sentence.AnnotaitonGroups[group].AnnotatorAnnotationDic.Values)
                {
                    ++N;
                    IList <Label> traversedLabels = new List <Label>();
                    foreach (Label label1 in labelArray)
                    {
                        traversedLabels.Add(label1);
                        if (annotation.Labels[label1])
                        {
                            ++numberOfLabelTrue[label1];
                            ++ProbabilityOfLabelTrue[sentence][label1];
                            foreach (Label label2 in labelArray)
                            {
                                if (!traversedLabels.Contains(label2))
                                {
                                    if (annotation.Labels[label2])
                                    {
                                        ++numberOfLabel1TrueLabel2True[new LabelPair(label1, label2)];
                                        ++ProbabilityOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)];
                                    }
                                    else
                                    {
                                        ++numberOfLabel1TrueLabel2False[new LabelPair(label1, label2)];
                                        ++ProbabilityOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)];
                                    }
                                }
                            }
                        }
                        else
                        {
                            ++numberOfLabelFalse[label1];
                            ++ProbabilityOfLabelFalse[sentence][label1];
                            foreach (Label label2 in labelArray)
                            {
                                if (!traversedLabels.Contains(label2))
                                {
                                    if (annotation.Labels[label2])
                                    {
                                        ++numberOfLabel1FalseLabel2True[new LabelPair(label1, label2)];
                                        ++ProbabilityOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)];
                                    }
                                    else
                                    {
                                        ++numberOfLabel1FalseLabel2False[new LabelPair(label1, label2)];
                                        ++ProbabilityOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)];
                                    }
                                }
                            }
                        }
                    }
                }
            }
            #endregion
            #region 求树(全部一棵树)
            IList <KeyValuePair <LabelPair, double> > tree = TDDSFunction.GenerateIMTree(numberOfLabelTrue, numberOfLabelFalse,
                                                                                         numberOfLabel1TrueLabel2True, numberOfLabel1TrueLabel2False,
                                                                                         numberOfLabel1FalseLabel2True, numberOfLabel1FalseLabel2False, N, labelArray);//此处是导致多线程结果不同的原因:虽然换组时Variable.LabelArray不会变化,但原先sentence中用于CommonTree和DistinctTree计算的成员属性没有做区分。
            #endregion
            #region 初始化Sij
            if (TDDSVariable.SmoothTree == Smoothing.None)
            {
                foreach (Sentence sentence in Variable.Sentences)
                {
                    foreach (Label label in labelArray)
                    {
                        ProbabilityOfLabelTrue[sentence][label]  /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                        ProbabilityOfLabelFalse[sentence][label] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                    }
                    foreach (LabelPair labelPair in labelPairFloat.Keys)
                    {
                        ProbabilityOfLabel1TrueLabel2True[sentence][labelPair]   /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                        ProbabilityOfLabel1TrueLabel2False[sentence][labelPair]  /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                        ProbabilityOfLabel1FalseLabel2True[sentence][labelPair]  /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                        ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                    }
                }
            }
            else
            {
                IDictionary <Smoothing, double[]> smoothingNumber = Function.SmoothingNumber(Variable.NumberOfAnnotationsPerSentenceAfterGrouping);
                foreach (Sentence sentence in Variable.Sentences)
                {
                    foreach (Label label in labelArray)
                    {
                        ProbabilityOfLabelTrue[sentence][label]  = (ProbabilityOfLabelTrue[sentence][label] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                        ProbabilityOfLabelFalse[sentence][label] = (ProbabilityOfLabelFalse[sentence][label] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);;
                    }
                    foreach (LabelPair labelPair in labelPairFloat.Keys)
                    {
                        ProbabilityOfLabel1TrueLabel2True[sentence][labelPair]   = (ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                        ProbabilityOfLabel1TrueLabel2False[sentence][labelPair]  = (ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                        ProbabilityOfLabel1FalseLabel2True[sentence][labelPair]  = (ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                        ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] = (ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                    }
                }
            }
            foreach (Sentence sentence in Variable.Sentences)
            {
                for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l)
                {
                    Labelset Labelset = new Labelset(Variable.LabelArray, l);
                    TDDSVariable.Sij.Value[sentence].Add(Labelset, 1);
                    if (TDDSVariable.SmoothTree == Smoothing.None)
                    {
                        if (Labelset.Labels[tree[0].Key.First])
                        {
                            if (ProbabilityOfLabelTrue[sentence][tree[0].Key.First] != 0)
                            {
                                TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelTrue[sentence][tree[0].Key.First];//应该除,除后准确率更高,原因未知
                            }
                        }
                        else
                        {
                            if (ProbabilityOfLabelFalse[sentence][tree[0].Key.First] != 0)
                            {
                                TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelFalse[sentence][tree[0].Key.First];
                            }
                        }
                    }
                    else
                    {
                        if (Labelset.Labels[tree[0].Key.First])
                        {
                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelTrue[sentence][tree[0].Key.First];
                        }
                        else
                        {
                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelFalse[sentence][tree[0].Key.First];
                        }
                    }
                    foreach (KeyValuePair <LabelPair, double> labelPairAndValue in tree)
                    {
                        LabelPair labelPair = labelPairAndValue.Key;
                        if (TDDSVariable.SmoothTree == Smoothing.None)
                        {
                            if (Labelset.Labels[labelPair.First])
                            {
                                if (ProbabilityOfLabelTrue[sentence][labelPair.First] != 0)//考虑分母为0的情况
                                {
                                    if (Labelset.Labels[labelPair.Second])
                                    {
                                        if (ProbabilityOfLabel1TrueLabel2True[sentence].ContainsKey(labelPair))
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                        }
                                        else
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                        }
                                    }
                                    else
                                    {
                                        if (ProbabilityOfLabel1TrueLabel2False[sentence].ContainsKey(labelPair))
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                        }
                                        else
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                        }
                                    }
                                }
                                else
                                {
                                    TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;//此处应该是0,不是1
                                    break;
                                }
                            }
                            else
                            {
                                if (ProbabilityOfLabelFalse[sentence][labelPair.First] != 0)//考虑分母为0的情况
                                {
                                    if (Labelset.Labels[labelPair.Second])
                                    {
                                        if (ProbabilityOfLabel1FalseLabel2True[sentence].ContainsKey(labelPair))
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                        }
                                        else
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                        }
                                    }
                                    else
                                    {
                                        if (ProbabilityOfLabel1FalseLabel2False[sentence].ContainsKey(labelPair))
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                        }
                                        else
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                        }
                                    }
                                }
                                else
                                {
                                    TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;
                                    break;
                                }
                            }
                        }
                        else
                        {
                            if (Labelset.Labels[labelPair.First])
                            {
                                if (Labelset.Labels[labelPair.Second])
                                {
                                    if (ProbabilityOfLabel1TrueLabel2True[sentence].ContainsKey(labelPair))
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                    }
                                    else
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                    }
                                }
                                else
                                {
                                    if (ProbabilityOfLabel1TrueLabel2False[sentence].ContainsKey(labelPair))
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                    }
                                    else
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                    }
                                }
                            }
                            else
                            {
                                if (Labelset.Labels[labelPair.Second])
                                {
                                    if (ProbabilityOfLabel1FalseLabel2True[sentence].ContainsKey(labelPair))
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                    }
                                    else
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                    }
                                }
                                else
                                {
                                    if (ProbabilityOfLabel1FalseLabel2False[sentence].ContainsKey(labelPair))
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                    }
                                    else
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                    }
                                }
                            }
                        }
                    }
                }
            }
            //for (int i = 0; i < Variable.Sentences.Count; ++i)
            //{
            //    double all = 0;
            //    for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j)
            //    {
            //        all += TDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)];
            //    }
            //    for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j)
            //    {
            //        TDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)] /= all;
            //    }
            //}
            #endregion
            //Variable.OutputFile.WriteLine(TDDSVariable.Sij.ToString(DependentVariable.NumberOfIntlabel));
            //Variable.OutputFile.Close();
            //double[] ii = new double[Variable.Sentences.Count];
            //foreach (Sentence Sentence in Variable.Sentences)
            //{
            //    for (int l = 0; l < DependentVariable.NumberOfIntlabel; ++l)
            //    {
            //        ii[Sentence.ID] += TDDSVariable.Sij.Value[Sentence.ID, l];
            //    }
            //}
        }
Beispiel #4
0
 static public Sij Initialize(int groupIndex, double threshold, IndependenceEstimation independentEstimation)
 {
     #region 初始化
     Sij sij = new Sij(1);
     #endregion
     Label[] labelArray = GroupFunction.DescendLabelsByNumber(groupIndex);
     Graph   BN         = NDDSFunction.BuildBN(groupIndex, labelArray, independentEstimation, threshold);
     #region 从BN中求每个情感(事件)的父节点(条件)
     IDictionary <Label, IList <Label> > LabelsAndPas = new Dictionary <Label, IList <Label> >();
     foreach (Label label in labelArray)
     {
         LabelsAndPas.Add(label, new List <Label>());
     }
     foreach (KeyValuePair <LabelPair, bool> hasRelationship in BN.AdjMatrix)
     {
         if (hasRelationship.Value)
         {
             LabelsAndPas[hasRelationship.Key.Second].Add(hasRelationship.Key.First);
         }
     }
     #endregion
     #region 求计算联合概率的参数
     IDictionary <Sentence, IDictionary <LabelAndWitness, double> > Probability = new Dictionary <Sentence, IDictionary <LabelAndWitness, double> >();
     foreach (Sentence sentence in Variable.Sentences)
     {
         Probability.Add(sentence, new Dictionary <LabelAndWitness, double>());
     }
     IDictionary <Smoothing, double[]> smoothingNumber = Function.SmoothingNumber(2);
     foreach (KeyValuePair <Label, IList <Label> > labelAndPas in LabelsAndPas)
     {
         if (labelAndPas.Value.Count == 0)
         {
             foreach (Sentence sentence in Variable.Sentences)
             {
                 double numberOfLabelTrue  = 0;
                 double numberOfLabelFalse = 0;
                 foreach (Annotation annotation in sentence.AnnotaitonGroups[groupIndex].AnnotatorAnnotationDic.Values)
                 {
                     if (annotation.Labels[labelAndPas.Key])
                     {
                         ++numberOfLabelTrue;
                     }
                     else
                     {
                         ++numberOfLabelFalse;
                     }
                 }
                 if (NDDSVariable.SmoothBN != Smoothing.None)
                 {
                     Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, false), new Labelset()), (numberOfLabelFalse + smoothingNumber[NDDSVariable.SmoothBN][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[NDDSVariable.SmoothBN][1]));
                     Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, true), new Labelset()), (numberOfLabelTrue + (numberOfLabelTrue + smoothingNumber[NDDSVariable.SmoothBN][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[NDDSVariable.SmoothBN][1])));
                 }
                 else
                 {
                     Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, false), new Labelset()), numberOfLabelFalse / Variable.NumberOfAnnotationsPerSentenceAfterGrouping);
                     Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, true), new Labelset()), numberOfLabelTrue / Variable.NumberOfAnnotationsPerSentenceAfterGrouping);
                 }
             }
         }
         else
         {
             for (int i = 0; i < Math.Pow(2, labelAndPas.Value.Count); ++i)
             {
                 Labelset Labelset = new Labelset(labelAndPas.Value, i);
                 foreach (Sentence sentence in Variable.Sentences)
                 {
                     double numberOfLabelTrue  = 0;
                     double numberOfLabelFalse = 0;
                     foreach (Annotation annotation in sentence.AnnotaitonGroups[groupIndex].AnnotatorAnnotationDic.Values)
                     {
                         if (annotation.IsAccordingToLabelset(Labelset))
                         {
                             if (annotation.Labels[labelAndPas.Key])
                             {
                                 ++numberOfLabelTrue;
                             }
                             else
                             {
                                 ++numberOfLabelFalse;
                             }
                         }
                     }
                     if (NDDSVariable.SmoothBN != Smoothing.None)
                     {
                         Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, false), Labelset), (numberOfLabelFalse + smoothingNumber[NDDSVariable.SmoothBN][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[NDDSVariable.SmoothBN][1]));
                         Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, true), Labelset), (numberOfLabelTrue + smoothingNumber[NDDSVariable.SmoothBN][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[NDDSVariable.SmoothBN][1]));
                     }
                     else
                     {
                         Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, false), Labelset), numberOfLabelFalse / Variable.NumberOfAnnotationsPerSentenceAfterGrouping);
                         Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, true), Labelset), numberOfLabelTrue / Variable.NumberOfAnnotationsPerSentenceAfterGrouping);
                     }
                 }
             }
         }
     }
     #endregion
     #region 计算Sij
     IDictionary <Sentence, double> denominator = new Dictionary <Sentence, double>();//归一化参数
     foreach (Sentence sentence in Variable.Sentences)
     {
         sij.Value.Add(sentence, new Dictionary <Labelset, double>());
         denominator.Add(sentence, 0);
         for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l)
         {
             Labelset Labelset = new Labelset(Variable.LabelArray, l);
             double   value    = 1;
             foreach (Label label in labelArray)
             {
                 Labelset singleLabelAnnotation = new Labelset(label, Labelset.Labels[label]);
                 Labelset subLabelset           = new Labelset(LabelsAndPas[label], Labelset);
                 value *= Probability[sentence][new LabelAndWitness(singleLabelAnnotation, subLabelset)];
             }
             if (value != 0)
             {
                 sij.Value[sentence].Add(Labelset, value);
                 denominator[sentence] += value;
             }
         }
     }
     #endregion
     #region 归一化
     foreach (Sentence sentence in Variable.Sentences.ToArray())
     {
         foreach (Labelset labelset in sij.Value[sentence].Keys.ToArray())
         {
             sij.Value[sentence][labelset] /= denominator[sentence];
         }
     }
     #endregion
     return(sij);
 }