Exemplo n.º 1
0
        static private void Initialize(int group)
        {
            Label[] labelArray = GroupFunction.DescendLabelsByNumber(group);
            #region 初始化
            TDDSVariable.Sij = new Sij(1);
            //整体的信息,用于构造互信息参数,求树(所有句子一棵树)
            IDictionary <Label, double>     labelFloatDic  = new Dictionary <Label, double>();
            IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化
            //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, labelArray);
            IDictionary <Label, double>     numberOfLabelTrue              = new Dictionary <Label, double>(labelFloatDic);
            IDictionary <Label, double>     numberOfLabelFalse             = new Dictionary <Label, double>(labelFloatDic);
            IDictionary <LabelPair, double> numberOfLabel1TrueLabel2True   = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1TrueLabel2False  = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1FalseLabel2True  = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1FalseLabel2False = new Dictionary <LabelPair, double>(labelPairFloat);

            IDictionary <Sentence, IDictionary <Label, double> >     ProbabilityOfLabelTrue              = new Dictionary <Sentence, IDictionary <Label, double> >();
            IDictionary <Sentence, IDictionary <Label, double> >     ProbabilityOfLabelFalse             = new Dictionary <Sentence, IDictionary <Label, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2True   = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2False  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2True  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >();

            //每句的信息,用于树的具体值(每个句子对应的树的值不同)
            foreach (Sentence sentence in Variable.Sentences)
            {
                ProbabilityOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
                ProbabilityOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
                ProbabilityOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
            }
            #endregion
            #region 求互信息的参数
            int N = 0;
            foreach (Sentence sentence in Variable.Sentences)
            {
                foreach (Annotation annotation in sentence.AnnotaitonGroups[group].AnnotatorAnnotationDic.Values)
                {
                    ++N;
                    IList <Label> traversedLabels = new List <Label>();
                    foreach (Label label1 in labelArray)
                    {
                        traversedLabels.Add(label1);
                        if (annotation.Labels[label1])
                        {
                            ++numberOfLabelTrue[label1];
                            ++ProbabilityOfLabelTrue[sentence][label1];
                            foreach (Label label2 in labelArray)
                            {
                                if (!traversedLabels.Contains(label2))
                                {
                                    if (annotation.Labels[label2])
                                    {
                                        ++numberOfLabel1TrueLabel2True[new LabelPair(label1, label2)];
                                        ++ProbabilityOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)];
                                    }
                                    else
                                    {
                                        ++numberOfLabel1TrueLabel2False[new LabelPair(label1, label2)];
                                        ++ProbabilityOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)];
                                    }
                                }
                            }
                        }
                        else
                        {
                            ++numberOfLabelFalse[label1];
                            ++ProbabilityOfLabelFalse[sentence][label1];
                            foreach (Label label2 in labelArray)
                            {
                                if (!traversedLabels.Contains(label2))
                                {
                                    if (annotation.Labels[label2])
                                    {
                                        ++numberOfLabel1FalseLabel2True[new LabelPair(label1, label2)];
                                        ++ProbabilityOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)];
                                    }
                                    else
                                    {
                                        ++numberOfLabel1FalseLabel2False[new LabelPair(label1, label2)];
                                        ++ProbabilityOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)];
                                    }
                                }
                            }
                        }
                    }
                }
            }
            #endregion
            #region 求树(全部一棵树)
            IList <KeyValuePair <LabelPair, double> > tree = TDDSFunction.GenerateIMTree(numberOfLabelTrue, numberOfLabelFalse,
                                                                                         numberOfLabel1TrueLabel2True, numberOfLabel1TrueLabel2False,
                                                                                         numberOfLabel1FalseLabel2True, numberOfLabel1FalseLabel2False, N, labelArray);//此处是导致多线程结果不同的原因:虽然换组时Variable.LabelArray不会变化,但原先sentence中用于CommonTree和DistinctTree计算的成员属性没有做区分。
            #endregion
            #region 初始化Sij
            if (TDDSVariable.SmoothTree == Smoothing.None)
            {
                foreach (Sentence sentence in Variable.Sentences)
                {
                    foreach (Label label in labelArray)
                    {
                        ProbabilityOfLabelTrue[sentence][label]  /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                        ProbabilityOfLabelFalse[sentence][label] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                    }
                    foreach (LabelPair labelPair in labelPairFloat.Keys)
                    {
                        ProbabilityOfLabel1TrueLabel2True[sentence][labelPair]   /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                        ProbabilityOfLabel1TrueLabel2False[sentence][labelPair]  /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                        ProbabilityOfLabel1FalseLabel2True[sentence][labelPair]  /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                        ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                    }
                }
            }
            else
            {
                IDictionary <Smoothing, double[]> smoothingNumber = Function.SmoothingNumber(Variable.NumberOfAnnotationsPerSentenceAfterGrouping);
                foreach (Sentence sentence in Variable.Sentences)
                {
                    foreach (Label label in labelArray)
                    {
                        ProbabilityOfLabelTrue[sentence][label]  = (ProbabilityOfLabelTrue[sentence][label] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                        ProbabilityOfLabelFalse[sentence][label] = (ProbabilityOfLabelFalse[sentence][label] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);;
                    }
                    foreach (LabelPair labelPair in labelPairFloat.Keys)
                    {
                        ProbabilityOfLabel1TrueLabel2True[sentence][labelPair]   = (ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                        ProbabilityOfLabel1TrueLabel2False[sentence][labelPair]  = (ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                        ProbabilityOfLabel1FalseLabel2True[sentence][labelPair]  = (ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                        ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] = (ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);
                    }
                }
            }
            foreach (Sentence sentence in Variable.Sentences)
            {
                for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l)
                {
                    Labelset Labelset = new Labelset(Variable.LabelArray, l);
                    TDDSVariable.Sij.Value[sentence].Add(Labelset, 1);
                    if (TDDSVariable.SmoothTree == Smoothing.None)
                    {
                        if (Labelset.Labels[tree[0].Key.First])
                        {
                            if (ProbabilityOfLabelTrue[sentence][tree[0].Key.First] != 0)
                            {
                                TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelTrue[sentence][tree[0].Key.First];//应该除,除后准确率更高,原因未知
                            }
                        }
                        else
                        {
                            if (ProbabilityOfLabelFalse[sentence][tree[0].Key.First] != 0)
                            {
                                TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelFalse[sentence][tree[0].Key.First];
                            }
                        }
                    }
                    else
                    {
                        if (Labelset.Labels[tree[0].Key.First])
                        {
                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelTrue[sentence][tree[0].Key.First];
                        }
                        else
                        {
                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelFalse[sentence][tree[0].Key.First];
                        }
                    }
                    foreach (KeyValuePair <LabelPair, double> labelPairAndValue in tree)
                    {
                        LabelPair labelPair = labelPairAndValue.Key;
                        if (TDDSVariable.SmoothTree == Smoothing.None)
                        {
                            if (Labelset.Labels[labelPair.First])
                            {
                                if (ProbabilityOfLabelTrue[sentence][labelPair.First] != 0)//考虑分母为0的情况
                                {
                                    if (Labelset.Labels[labelPair.Second])
                                    {
                                        if (ProbabilityOfLabel1TrueLabel2True[sentence].ContainsKey(labelPair))
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                        }
                                        else
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                        }
                                    }
                                    else
                                    {
                                        if (ProbabilityOfLabel1TrueLabel2False[sentence].ContainsKey(labelPair))
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                        }
                                        else
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                        }
                                    }
                                }
                                else
                                {
                                    TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;//此处应该是0,不是1
                                    break;
                                }
                            }
                            else
                            {
                                if (ProbabilityOfLabelFalse[sentence][labelPair.First] != 0)//考虑分母为0的情况
                                {
                                    if (Labelset.Labels[labelPair.Second])
                                    {
                                        if (ProbabilityOfLabel1FalseLabel2True[sentence].ContainsKey(labelPair))
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                        }
                                        else
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                        }
                                    }
                                    else
                                    {
                                        if (ProbabilityOfLabel1FalseLabel2False[sentence].ContainsKey(labelPair))
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                        }
                                        else
                                        {
                                            TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                        }
                                    }
                                }
                                else
                                {
                                    TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;
                                    break;
                                }
                            }
                        }
                        else
                        {
                            if (Labelset.Labels[labelPair.First])
                            {
                                if (Labelset.Labels[labelPair.Second])
                                {
                                    if (ProbabilityOfLabel1TrueLabel2True[sentence].ContainsKey(labelPair))
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                    }
                                    else
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                    }
                                }
                                else
                                {
                                    if (ProbabilityOfLabel1TrueLabel2False[sentence].ContainsKey(labelPair))
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                    }
                                    else
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First];
                                    }
                                }
                            }
                            else
                            {
                                if (Labelset.Labels[labelPair.Second])
                                {
                                    if (ProbabilityOfLabel1FalseLabel2True[sentence].ContainsKey(labelPair))
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                    }
                                    else
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                    }
                                }
                                else
                                {
                                    if (ProbabilityOfLabel1FalseLabel2False[sentence].ContainsKey(labelPair))
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                    }
                                    else
                                    {
                                        TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First];
                                    }
                                }
                            }
                        }
                    }
                }
            }
            //for (int i = 0; i < Variable.Sentences.Count; ++i)
            //{
            //    double all = 0;
            //    for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j)
            //    {
            //        all += TDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)];
            //    }
            //    for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j)
            //    {
            //        TDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)] /= all;
            //    }
            //}
            #endregion
            //Variable.OutputFile.WriteLine(TDDSVariable.Sij.ToString(DependentVariable.NumberOfIntlabel));
            //Variable.OutputFile.Close();
            //double[] ii = new double[Variable.Sentences.Count];
            //foreach (Sentence Sentence in Variable.Sentences)
            //{
            //    for (int l = 0; l < DependentVariable.NumberOfIntlabel; ++l)
            //    {
            //        ii[Sentence.ID] += TDDSVariable.Sij.Value[Sentence.ID, l];
            //    }
            //}
        }
        static private void Initialize()
        {
            #region 初始化
            TDDSVariable.Sij = new Sij(1);
            //整体的信息,用于求树(所有句子一棵树)
            // 用于构造互信息参数
            IDictionary <Label, double>     labelFloatDic  = new Dictionary <Label, double>();
            IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化
            //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, Variable.LabelArray);

            IDictionary <Label, double>     numberOfLabelTrue              = new Dictionary <Label, double>(labelFloatDic);
            IDictionary <Label, double>     numberOfLabelFalse             = new Dictionary <Label, double>(labelFloatDic);
            IDictionary <LabelPair, double> numberOfLabel1TrueLabel2True   = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1TrueLabel2False  = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1FalseLabel2True  = new Dictionary <LabelPair, double>(labelPairFloat);
            IDictionary <LabelPair, double> numberOfLabel1FalseLabel2False = new Dictionary <LabelPair, double>(labelPairFloat);

            IDictionary <Sentence, IDictionary <Label, double> >     ProbabilityOfLabelTrue              = new Dictionary <Sentence, IDictionary <Label, double> >();
            IDictionary <Sentence, IDictionary <Label, double> >     ProbabilityOfLabelFalse             = new Dictionary <Sentence, IDictionary <Label, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2True   = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2False  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2True  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >();

            //每句的信息,用于树的具体值(每个句子对应的树的值不同)
            foreach (Sentence sentence in Variable.Sentences)
            {
                ProbabilityOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
                ProbabilityOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
                ProbabilityOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                ProbabilityOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
            }
            #endregion
            #region 求互信息的参数
            int N = 0;
            foreach (Annotator annotator in Variable.Annotators)
            {
                foreach (Sentence sentence in Variable.Sentences)
                {
                    foreach (Annotation annotation in Variable.Data[annotator][sentence])
                    {
                        ++N;
                        IList <Label> traversedLabels = new List <Label>();
                        foreach (Label label1 in Variable.LabelArray)
                        {
                            traversedLabels.Add(label1);
                            if (annotation.Labels[label1])
                            {
                                ++numberOfLabelTrue[label1];
                                ++ProbabilityOfLabelTrue[sentence][label1];
                                foreach (Label label2 in Variable.LabelArray)
                                {
                                    if (!traversedLabels.Contains(label2))
                                    {
                                        if (annotation.Labels[label2])
                                        {
                                            ++numberOfLabel1TrueLabel2True[new LabelPair(label1, label2)];
                                            ++ProbabilityOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)];
                                        }
                                        else
                                        {
                                            ++numberOfLabel1TrueLabel2False[new LabelPair(label1, label2)];
                                            ++ProbabilityOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)];
                                        }
                                    }
                                }
                            }
                            else
                            {
                                ++numberOfLabelFalse[label1];
                                ++ProbabilityOfLabelFalse[sentence][label1];
                                foreach (Label label2 in Variable.LabelArray)
                                {
                                    if (!traversedLabels.Contains(label2))
                                    {
                                        if (annotation.Labels[label2])
                                        {
                                            ++numberOfLabel1FalseLabel2True[new LabelPair(label1, label2)];
                                            ++ProbabilityOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)];
                                        }
                                        else
                                        {
                                            ++numberOfLabel1FalseLabel2False[new LabelPair(label1, label2)];
                                            ++ProbabilityOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)];
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            #endregion
            #region 求树(全部一棵树)
            IList <KeyValuePair <LabelPair, double> > tree = TDDSFunction.GenerateIMTree(numberOfLabelTrue, numberOfLabelFalse,
                                                                                         numberOfLabel1TrueLabel2True, numberOfLabel1TrueLabel2False,
                                                                                         numberOfLabel1FalseLabel2True, numberOfLabel1FalseLabel2False, N, Variable.LabelArray);
            #endregion
            #region 初始化Sij
            foreach (Sentence sentence in Variable.Sentences)
            {
                for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l)
                {
                    Annotation annotation = new Annotation(l);
                    if (annotation.Labels[tree[1].Key.First])
                    {
                        TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] = ProbabilityOfLabelTrue[sentence][tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                    }
                    else
                    {
                        TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] = ProbabilityOfLabelFalse[sentence][tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                    }
                    foreach (KeyValuePair <LabelPair, double> ap in tree)
                    {
                        if (annotation.Labels[ap.Key.First])
                        {
                            if (ProbabilityOfLabelTrue[sentence][ap.Key.First] != 0)//考虑分母为0的情况
                            {
                                if (annotation.Labels[ap.Key.Second])
                                {
                                    TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= ProbabilityOfLabel1TrueLabel2True[sentence][ap.Key] / ProbabilityOfLabelTrue[sentence][ap.Key.First];
                                }
                                else
                                {
                                    TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= ProbabilityOfLabel1TrueLabel2False[sentence][ap.Key] / ProbabilityOfLabelTrue[sentence][ap.Key.First];
                                }
                            }
                            else
                            {
                                TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;//此处应该是0,不是1
                                break;
                            }
                        }
                        else
                        {
                            if (ProbabilityOfLabelFalse[sentence][ap.Key.First] != 0)//考虑分母为0的情况
                            {
                                if (annotation.Labels[ap.Key.Second])
                                {
                                    TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= ProbabilityOfLabel1FalseLabel2True[sentence][ap.Key] / ProbabilityOfLabelFalse[sentence][ap.Key.First];
                                }
                                else
                                {
                                    TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= ProbabilityOfLabel1FalseLabel2False[sentence][ap.Key] / ProbabilityOfLabelFalse[sentence][ap.Key.First];
                                }
                            }
                            else
                            {
                                TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;
                                break;
                            }
                        }
                    }
                }
            }
            #endregion
        }
Exemplo n.º 3
0
 static private void Initialize(int group)
 {
     #region 初始化
     DTDDSVariable.Sij = new Sij(1);
     //每句的信息,用于树的具体值(每个句子对应的树的值不同)
     IDictionary <Sentence, IDictionary <Label, double> >     NumberOfLabelTrue              = new Dictionary <Sentence, IDictionary <Label, double> >();
     IDictionary <Sentence, IDictionary <Label, double> >     NumberOfLabelFalse             = new Dictionary <Sentence, IDictionary <Label, double> >();
     IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1TrueLabel2True   = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
     IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1TrueLabel2False  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
     IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1FalseLabel2True  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
     IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
     foreach (Sentence sentence in Variable.Sentences)
     {
         IDictionary <Label, int> numberOfEachLabel = new Dictionary <Label, int>();
         foreach (Label label in Variable.LabelArray)
         {
             numberOfEachLabel.Add(label, 0);
         }
         foreach (Annotation annotation in sentence.AnnotaitonGroups[group].AnnotatorAnnotationDic.Values)
         {
             foreach (Label label in Variable.LabelArray)
             {
                 if (annotation.Labels[label])
                 {
                     ++numberOfEachLabel[label];
                 }
             }
         }
         List <KeyValuePair <Label, int> > sortedLabel = new List <KeyValuePair <Label, int> >(numberOfEachLabel);
         sortedLabel.Sort(delegate(KeyValuePair <Label, int> s1, KeyValuePair <Label, int> s2)
         {
             return(s2.Value.CompareTo(s1.Value));
         });
         sentence.LabelArray = new Label[10];
         for (int a = 0; a < Variable.LabelArray.Length; ++a)
         {
             sentence.LabelArray[a] = sortedLabel[a].Key;
         }
         IDictionary <Label, double>     labelFloatDic  = new Dictionary <Label, double>();
         IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化
         //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, sentence.LabelArray);
         NumberOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
         NumberOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
         NumberOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
         NumberOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
         NumberOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
         NumberOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
     }
     #endregion
     #region 求互信息的参数
     foreach (Sentence sentence in Variable.Sentences)
     {
         foreach (Annotation annotation in sentence.AnnotaitonGroups[group].AnnotatorAnnotationDic.Values)
         {
             IList <Label> traversedLabels = new List <Label>();
             foreach (Label label1 in sentence.LabelArray)
             {
                 traversedLabels.Add(label1);
                 if (annotation.Labels[label1])
                 {
                     ++NumberOfLabelTrue[sentence][label1];
                     foreach (Label label2 in sentence.LabelArray)
                     {
                         if (!traversedLabels.Contains(label2))
                         {
                             if (annotation.Labels[label2])
                             {
                                 ++NumberOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)];
                             }
                             else
                             {
                                 ++NumberOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)];
                             }
                         }
                     }
                 }
                 else
                 {
                     ++NumberOfLabelFalse[sentence][label1];
                     foreach (Label label2 in sentence.LabelArray)
                     {
                         if (!traversedLabels.Contains(label2))
                         {
                             if (annotation.Labels[label2])
                             {
                                 ++NumberOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)];
                             }
                             else
                             {
                                 ++NumberOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)];
                             }
                         }
                     }
                 }
             }
         }
     }
     #endregion
     #region 为每个句子生成树
     foreach (Sentence sentence in Variable.Sentences)
     {
         sentence.Tree = TDDSFunction.GenerateIMTree(NumberOfLabelTrue[sentence], NumberOfLabelFalse[sentence],
                                                     NumberOfLabel1TrueLabel2True[sentence], NumberOfLabel1TrueLabel2False[sentence],
                                                     NumberOfLabel1FalseLabel2True[sentence], NumberOfLabel1FalseLabel2False[sentence], Variable.NumberOfAnnotationsPerSentenceAfterGrouping, sentence.LabelArray);
     }
     #endregion
     //Dependent.Tree.Distinct.FunctionOfDistinct.NumberOfIncompletedTreeSentence();
     IDictionary <Smoothing, double[]> smoothingNumber = Function.SmoothingNumber(Variable.LabelArray.Length);
     #region 初始化Sij
     foreach (Sentence sentence in Variable.Sentences)
     {
         for (int j = 0; j < Math.Pow(2, Variable.LabelArray.Length); ++j)
         {
             Annotation annotation = new Annotation(j);
             if (annotation.Labels[sentence.Tree[0].Key.First])
             {
                 DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] = NumberOfLabelTrue[sentence][sentence.Tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
             }
             else
             {
                 DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] = NumberOfLabelFalse[sentence][sentence.Tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
             }
             foreach (KeyValuePair <LabelPair, double> labelPairAndValue in sentence.Tree)
             {
                 LabelPair ap;
                 if (NumberOfLabel1TrueLabel2True[sentence].ContainsKey(labelPairAndValue.Key))
                 {
                     ap = labelPairAndValue.Key;
                 }
                 else
                 {
                     ap = labelPairAndValue.Key.Reverse;
                 }
                 if (annotation.Labels[ap.First])
                 {
                     if (DTDDSVariable.SmoothTree == Smoothing.None)
                     {
                         if (NumberOfLabelTrue[sentence][ap.First] != 0)//考虑分母为0的情况
                         {
                             if (annotation.Labels[ap.Second])
                             {
                                 DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= NumberOfLabel1TrueLabel2True[sentence][ap] / NumberOfLabelTrue[sentence][ap.First];
                             }
                             else
                             {
                                 DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= NumberOfLabel1TrueLabel2False[sentence][ap] / NumberOfLabelTrue[sentence][ap.First];
                             }
                         }
                         else
                         {
                             DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= 0;
                             break;
                         }
                     }
                     else
                     {
                         if (annotation.Labels[ap.Second])
                         {
                             DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= (NumberOfLabel1TrueLabel2True[sentence][ap] + smoothingNumber[DTDDSVariable.SmoothTree][0]) / (NumberOfLabelTrue[sentence][ap.First] + smoothingNumber[DTDDSVariable.SmoothTree][1]);
                         }
                         else
                         {
                             DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= (NumberOfLabel1TrueLabel2False[sentence][ap] + smoothingNumber[DTDDSVariable.SmoothTree][0]) / (NumberOfLabelTrue[sentence][ap.First] + smoothingNumber[DTDDSVariable.SmoothTree][1]);
                         }
                     }
                 }
                 else
                 {
                     if (DTDDSVariable.SmoothTree == Smoothing.None)
                     {
                         if (NumberOfLabelFalse[sentence][ap.First] != 0)//考虑分母为0的情况
                         {
                             if (annotation.Labels[ap.Second])
                             {
                                 DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= NumberOfLabel1FalseLabel2True[sentence][ap] / NumberOfLabelFalse[sentence][ap.First];
                             }
                             else
                             {
                                 DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= NumberOfLabel1FalseLabel2False[sentence][ap] / NumberOfLabelFalse[sentence][ap.First];
                             }
                         }
                         else
                         {
                             DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= 0;
                             break;
                         }
                     }
                     else
                     {
                         if (annotation.Labels[ap.Second])
                         {
                             DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= (NumberOfLabel1FalseLabel2True[sentence][ap] + smoothingNumber[DTDDSVariable.SmoothTree][0]) / (NumberOfLabelFalse[sentence][ap.First] + smoothingNumber[DTDDSVariable.SmoothTree][1]);
                         }
                         else
                         {
                             DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= (NumberOfLabel1FalseLabel2False[sentence][ap] + smoothingNumber[DTDDSVariable.SmoothTree][0]) / (NumberOfLabelFalse[sentence][ap.First] + smoothingNumber[DTDDSVariable.SmoothTree][1]);
                         }
                     }
                 }
             }
         }
     }
     //for (int i = 0; i < Variable.Sentences.Count; ++i)
     //{
     //    double all = 0;
     //    for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j)
     //    {
     //        all += DTDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)];
     //    }
     //    for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j)
     //    {
     //        DTDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)] /= all;
     //    }
     //}
     #endregion
     //Variable.OutputFile.WriteLine(DTDDSVariable.Sij.ToString(DependentVariable.NumberOfIntlabel));
     //Variable.OutputFile.Close();
 }
Exemplo n.º 4
0
        static private void Initialize()//运行过TreeForAll,就不用再计算每句的树了
        {
            #region 初始化
            DTDDSVariable.Sij = new Sij(1);
            //每句的信息,用于树的具体值(每个句子对应的树的值不同
            IDictionary <Label, double>     labelFloatDic  = new Dictionary <Label, double>();
            IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化
            //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, Variable.LabelArray);

            IDictionary <Sentence, IDictionary <Label, double> >     NumberOfLabelTrue              = new Dictionary <Sentence, IDictionary <Label, double> >();
            IDictionary <Sentence, IDictionary <Label, double> >     NumberOfLabelFalse             = new Dictionary <Sentence, IDictionary <Label, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1TrueLabel2True   = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1TrueLabel2False  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1FalseLabel2True  = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >();
            foreach (Sentence sentence in Variable.Sentences)
            {
                NumberOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
                NumberOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic));
                NumberOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                NumberOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                NumberOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
                NumberOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat));
            }
            #endregion
            #region 求互信息的参数
            foreach (Annotator annotator in Variable.Annotators)
            {
                foreach (Sentence sentence in Variable.Sentences)
                {
                    foreach (Annotation annotation in Variable.Data[annotator][sentence])
                    {
                        IList <Label> traversedLabels = new List <Label>();
                        foreach (Label label1 in Variable.LabelArray)
                        {
                            traversedLabels.Add(label1);
                            if (annotation.Labels[label1])
                            {
                                ++NumberOfLabelTrue[sentence][label1];
                                foreach (Label label2 in Variable.LabelArray)
                                {
                                    if (!traversedLabels.Contains(label2))
                                    {
                                        if (annotation.Labels[label2])
                                        {
                                            ++NumberOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)];
                                        }
                                        else
                                        {
                                            ++NumberOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)];
                                        }
                                    }
                                }
                            }
                            else
                            {
                                ++NumberOfLabelFalse[sentence][label1];
                                foreach (Label label2 in Variable.LabelArray)
                                {
                                    if (!traversedLabels.Contains(label2))
                                    {
                                        if (annotation.Labels[label2])
                                        {
                                            ++NumberOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)];
                                        }
                                        else
                                        {
                                            ++NumberOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)];
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            #endregion
            #region 为每个句子生成树
            foreach (Sentence sentence in Variable.Sentences)
            {
                IDictionary <Label, int> numberOfEachLabel = new Dictionary <Label, int>();
                foreach (Label label in Variable.LabelArray)
                {
                    numberOfEachLabel.Add(label, 0);
                }
                foreach (Annotator annotator in Variable.Annotators)
                {
                    foreach (Annotation annotation in Variable.Data[annotator][sentence])
                    {
                        foreach (Label label in Variable.LabelArray)
                        {
                            if (annotation.Labels[label])
                            {
                                ++numberOfEachLabel[label];
                            }
                        }
                    }
                }
                List <KeyValuePair <Label, int> > sortedLabel = new List <KeyValuePair <Label, int> >(numberOfEachLabel);
                sortedLabel.Sort(delegate(KeyValuePair <Label, int> s1, KeyValuePair <Label, int> s2)
                {
                    return(s2.Value.CompareTo(s1.Value));
                });
                sentence.LabelArray = new Label[Variable.LabelArray.Length];
                for (int a = 0; a < Variable.LabelArray.Length; ++a)
                {
                    sentence.LabelArray[a] = sortedLabel[a].Key;
                }
                sentence.Tree = TDDSFunction.GenerateIMTree(NumberOfLabelTrue[sentence], NumberOfLabelFalse[sentence],
                                                            NumberOfLabel1TrueLabel2True[sentence], NumberOfLabel1TrueLabel2False[sentence],
                                                            NumberOfLabel1FalseLabel2True[sentence], NumberOfLabel1FalseLabel2False[sentence], Variable.NumberOfAnnotationsPerSentenceAfterGrouping, sentence.LabelArray);
            }
            #endregion
            //NumberOfIncompletedTreeSentence();
            #region 初始化Sij
            foreach (Sentence sentence in Variable.Sentences)
            {
                for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l)
                {
                    Annotation annotation = new Annotation(l);
                    if (annotation.Labels[sentence.Tree[0].Key.First])
                    {
                        DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] = NumberOfLabelTrue[sentence][sentence.Tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                    }
                    else
                    {
                        DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] = NumberOfLabelFalse[sentence][sentence.Tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping;
                    }
                    foreach (KeyValuePair <LabelPair, double> ap in sentence.Tree)
                    {
                        LabelPair reverse = new LabelPair(ap.Key.Second, ap.Key.First);
                        if (annotation.Labels[ap.Key.First])
                        {
                            if (NumberOfLabelTrue[sentence][ap.Key.First] != 0)//考虑分母为0的情况
                            {
                                if (annotation.Labels[ap.Key.Second])
                                {
                                    if (NumberOfLabel1TrueLabel2True[sentence].ContainsKey(ap.Key))
                                    {
                                        DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1TrueLabel2True[sentence][ap.Key] / NumberOfLabelTrue[sentence][ap.Key.First];
                                    }
                                    else
                                    {
                                        DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1TrueLabel2True[sentence][reverse] / NumberOfLabelTrue[sentence][ap.Key.First];
                                    }
                                }
                                else if (NumberOfLabel1TrueLabel2False[sentence].ContainsKey(ap.Key))
                                {
                                    DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1TrueLabel2False[sentence][ap.Key] / NumberOfLabelTrue[sentence][ap.Key.First];
                                }
                                else
                                {
                                    DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1FalseLabel2True[sentence][reverse] / NumberOfLabelTrue[sentence][ap.Key.First];
                                }
                            }
                            else
                            {
                                DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;
                                break;
                            }
                        }
                        else
                        {
                            if (NumberOfLabelFalse[sentence][ap.Key.First] != 0)//考虑分母为0的情况
                            {
                                if (annotation.Labels[ap.Key.Second])
                                {
                                    if (NumberOfLabel1FalseLabel2True[sentence].ContainsKey(ap.Key))
                                    {
                                        DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1FalseLabel2True[sentence][ap.Key] / NumberOfLabelFalse[sentence][ap.Key.First];
                                    }
                                    else
                                    {
                                        DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1TrueLabel2False[sentence][reverse] / NumberOfLabelFalse[sentence][ap.Key.First];
                                    }
                                }
                                else if (NumberOfLabel1FalseLabel2False[sentence].ContainsKey(ap.Key))
                                {
                                    DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1FalseLabel2False[sentence][ap.Key] / NumberOfLabelFalse[sentence][ap.Key.First];
                                }
                                else
                                {
                                    DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1FalseLabel2False[sentence][reverse] / NumberOfLabelFalse[sentence][ap.Key.First];
                                }
                            }
                            else
                            {
                                DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;
                                break;
                            }
                        }
                    }
                }
            }
            #endregion
        }