static private void Initialize(int group) { Label[] labelArray = GroupFunction.DescendLabelsByNumber(group); #region 初始化 TDDSVariable.Sij = new Sij(1); //整体的信息,用于构造互信息参数,求树(所有句子一棵树) IDictionary <Label, double> labelFloatDic = new Dictionary <Label, double>(); IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化 //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, labelArray); IDictionary <Label, double> numberOfLabelTrue = new Dictionary <Label, double>(labelFloatDic); IDictionary <Label, double> numberOfLabelFalse = new Dictionary <Label, double>(labelFloatDic); IDictionary <LabelPair, double> numberOfLabel1TrueLabel2True = new Dictionary <LabelPair, double>(labelPairFloat); IDictionary <LabelPair, double> numberOfLabel1TrueLabel2False = new Dictionary <LabelPair, double>(labelPairFloat); IDictionary <LabelPair, double> numberOfLabel1FalseLabel2True = new Dictionary <LabelPair, double>(labelPairFloat); IDictionary <LabelPair, double> numberOfLabel1FalseLabel2False = new Dictionary <LabelPair, double>(labelPairFloat); IDictionary <Sentence, IDictionary <Label, double> > ProbabilityOfLabelTrue = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <Label, double> > ProbabilityOfLabelFalse = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2True = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2True = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); //每句的信息,用于树的具体值(每个句子对应的树的值不同) foreach (Sentence sentence in Variable.Sentences) { ProbabilityOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); ProbabilityOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); ProbabilityOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); ProbabilityOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); ProbabilityOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); ProbabilityOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); } #endregion #region 求互信息的参数 int N = 0; foreach (Sentence sentence in Variable.Sentences) { foreach (Annotation annotation in sentence.AnnotaitonGroups[group].AnnotatorAnnotationDic.Values) { ++N; IList <Label> traversedLabels = new List <Label>(); foreach (Label label1 in labelArray) { traversedLabels.Add(label1); if (annotation.Labels[label1]) { ++numberOfLabelTrue[label1]; ++ProbabilityOfLabelTrue[sentence][label1]; foreach (Label label2 in labelArray) { if (!traversedLabels.Contains(label2)) { if (annotation.Labels[label2]) { ++numberOfLabel1TrueLabel2True[new LabelPair(label1, label2)]; ++ProbabilityOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)]; } else { ++numberOfLabel1TrueLabel2False[new LabelPair(label1, label2)]; ++ProbabilityOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)]; } } } } else { ++numberOfLabelFalse[label1]; ++ProbabilityOfLabelFalse[sentence][label1]; foreach (Label label2 in labelArray) { if (!traversedLabels.Contains(label2)) { if (annotation.Labels[label2]) { ++numberOfLabel1FalseLabel2True[new LabelPair(label1, label2)]; ++ProbabilityOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)]; } else { ++numberOfLabel1FalseLabel2False[new LabelPair(label1, label2)]; ++ProbabilityOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)]; } } } } } } } #endregion #region 求树(全部一棵树) IList <KeyValuePair <LabelPair, double> > tree = TDDSFunction.GenerateIMTree(numberOfLabelTrue, numberOfLabelFalse, numberOfLabel1TrueLabel2True, numberOfLabel1TrueLabel2False, numberOfLabel1FalseLabel2True, numberOfLabel1FalseLabel2False, N, labelArray);//此处是导致多线程结果不同的原因:虽然换组时Variable.LabelArray不会变化,但原先sentence中用于CommonTree和DistinctTree计算的成员属性没有做区分。 #endregion #region 初始化Sij if (TDDSVariable.SmoothTree == Smoothing.None) { foreach (Sentence sentence in Variable.Sentences) { foreach (Label label in labelArray) { ProbabilityOfLabelTrue[sentence][label] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping; ProbabilityOfLabelFalse[sentence][label] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } foreach (LabelPair labelPair in labelPairFloat.Keys) { ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping; ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping; ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping; ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } } } else { IDictionary <Smoothing, double[]> smoothingNumber = Function.SmoothingNumber(Variable.NumberOfAnnotationsPerSentenceAfterGrouping); foreach (Sentence sentence in Variable.Sentences) { foreach (Label label in labelArray) { ProbabilityOfLabelTrue[sentence][label] = (ProbabilityOfLabelTrue[sentence][label] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]); ProbabilityOfLabelFalse[sentence][label] = (ProbabilityOfLabelFalse[sentence][label] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]);; } foreach (LabelPair labelPair in labelPairFloat.Keys) { ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] = (ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]); ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] = (ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]); ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] = (ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]); ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] = (ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] + smoothingNumber[TDDSVariable.SmoothTree][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[TDDSVariable.SmoothTree][1]); } } } foreach (Sentence sentence in Variable.Sentences) { for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l) { Labelset Labelset = new Labelset(Variable.LabelArray, l); TDDSVariable.Sij.Value[sentence].Add(Labelset, 1); if (TDDSVariable.SmoothTree == Smoothing.None) { if (Labelset.Labels[tree[0].Key.First]) { if (ProbabilityOfLabelTrue[sentence][tree[0].Key.First] != 0) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelTrue[sentence][tree[0].Key.First];//应该除,除后准确率更高,原因未知 } } else { if (ProbabilityOfLabelFalse[sentence][tree[0].Key.First] != 0) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelFalse[sentence][tree[0].Key.First]; } } } else { if (Labelset.Labels[tree[0].Key.First]) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelTrue[sentence][tree[0].Key.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabelFalse[sentence][tree[0].Key.First]; } } foreach (KeyValuePair <LabelPair, double> labelPairAndValue in tree) { LabelPair labelPair = labelPairAndValue.Key; if (TDDSVariable.SmoothTree == Smoothing.None) { if (Labelset.Labels[labelPair.First]) { if (ProbabilityOfLabelTrue[sentence][labelPair.First] != 0)//考虑分母为0的情况 { if (Labelset.Labels[labelPair.Second]) { if (ProbabilityOfLabel1TrueLabel2True[sentence].ContainsKey(labelPair)) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First]; } } else { if (ProbabilityOfLabel1TrueLabel2False[sentence].ContainsKey(labelPair)) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First]; } } } else { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;//此处应该是0,不是1 break; } } else { if (ProbabilityOfLabelFalse[sentence][labelPair.First] != 0)//考虑分母为0的情况 { if (Labelset.Labels[labelPair.Second]) { if (ProbabilityOfLabel1FalseLabel2True[sentence].ContainsKey(labelPair)) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First]; } } else { if (ProbabilityOfLabel1FalseLabel2False[sentence].ContainsKey(labelPair)) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First]; } } } else { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0; break; } } } else { if (Labelset.Labels[labelPair.First]) { if (Labelset.Labels[labelPair.Second]) { if (ProbabilityOfLabel1TrueLabel2True[sentence].ContainsKey(labelPair)) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First]; } } else { if (ProbabilityOfLabel1TrueLabel2False[sentence].ContainsKey(labelPair)) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair] / ProbabilityOfLabelTrue[sentence][labelPair.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair.Reverse] / ProbabilityOfLabelTrue[sentence][labelPair.First]; } } } else { if (Labelset.Labels[labelPair.Second]) { if (ProbabilityOfLabel1FalseLabel2True[sentence].ContainsKey(labelPair)) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2True[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1TrueLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First]; } } else { if (ProbabilityOfLabel1FalseLabel2False[sentence].ContainsKey(labelPair)) { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair] / ProbabilityOfLabelFalse[sentence][labelPair.First]; } else { TDDSVariable.Sij.Value[sentence][Labelset] *= ProbabilityOfLabel1FalseLabel2False[sentence][labelPair.Reverse] / ProbabilityOfLabelFalse[sentence][labelPair.First]; } } } } } } } //for (int i = 0; i < Variable.Sentences.Count; ++i) //{ // double all = 0; // for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j) // { // all += TDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)]; // } // for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j) // { // TDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)] /= all; // } //} #endregion //Variable.OutputFile.WriteLine(TDDSVariable.Sij.ToString(DependentVariable.NumberOfIntlabel)); //Variable.OutputFile.Close(); //double[] ii = new double[Variable.Sentences.Count]; //foreach (Sentence Sentence in Variable.Sentences) //{ // for (int l = 0; l < DependentVariable.NumberOfIntlabel; ++l) // { // ii[Sentence.ID] += TDDSVariable.Sij.Value[Sentence.ID, l]; // } //} }
static private void Initialize() { #region 初始化 TDDSVariable.Sij = new Sij(1); //整体的信息,用于求树(所有句子一棵树) // 用于构造互信息参数 IDictionary <Label, double> labelFloatDic = new Dictionary <Label, double>(); IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化 //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, Variable.LabelArray); IDictionary <Label, double> numberOfLabelTrue = new Dictionary <Label, double>(labelFloatDic); IDictionary <Label, double> numberOfLabelFalse = new Dictionary <Label, double>(labelFloatDic); IDictionary <LabelPair, double> numberOfLabel1TrueLabel2True = new Dictionary <LabelPair, double>(labelPairFloat); IDictionary <LabelPair, double> numberOfLabel1TrueLabel2False = new Dictionary <LabelPair, double>(labelPairFloat); IDictionary <LabelPair, double> numberOfLabel1FalseLabel2True = new Dictionary <LabelPair, double>(labelPairFloat); IDictionary <LabelPair, double> numberOfLabel1FalseLabel2False = new Dictionary <LabelPair, double>(labelPairFloat); IDictionary <Sentence, IDictionary <Label, double> > ProbabilityOfLabelTrue = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <Label, double> > ProbabilityOfLabelFalse = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2True = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1TrueLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2True = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > ProbabilityOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); //每句的信息,用于树的具体值(每个句子对应的树的值不同) foreach (Sentence sentence in Variable.Sentences) { ProbabilityOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); ProbabilityOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); ProbabilityOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); ProbabilityOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); ProbabilityOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); ProbabilityOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); } #endregion #region 求互信息的参数 int N = 0; foreach (Annotator annotator in Variable.Annotators) { foreach (Sentence sentence in Variable.Sentences) { foreach (Annotation annotation in Variable.Data[annotator][sentence]) { ++N; IList <Label> traversedLabels = new List <Label>(); foreach (Label label1 in Variable.LabelArray) { traversedLabels.Add(label1); if (annotation.Labels[label1]) { ++numberOfLabelTrue[label1]; ++ProbabilityOfLabelTrue[sentence][label1]; foreach (Label label2 in Variable.LabelArray) { if (!traversedLabels.Contains(label2)) { if (annotation.Labels[label2]) { ++numberOfLabel1TrueLabel2True[new LabelPair(label1, label2)]; ++ProbabilityOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)]; } else { ++numberOfLabel1TrueLabel2False[new LabelPair(label1, label2)]; ++ProbabilityOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)]; } } } } else { ++numberOfLabelFalse[label1]; ++ProbabilityOfLabelFalse[sentence][label1]; foreach (Label label2 in Variable.LabelArray) { if (!traversedLabels.Contains(label2)) { if (annotation.Labels[label2]) { ++numberOfLabel1FalseLabel2True[new LabelPair(label1, label2)]; ++ProbabilityOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)]; } else { ++numberOfLabel1FalseLabel2False[new LabelPair(label1, label2)]; ++ProbabilityOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)]; } } } } } } } } #endregion #region 求树(全部一棵树) IList <KeyValuePair <LabelPair, double> > tree = TDDSFunction.GenerateIMTree(numberOfLabelTrue, numberOfLabelFalse, numberOfLabel1TrueLabel2True, numberOfLabel1TrueLabel2False, numberOfLabel1FalseLabel2True, numberOfLabel1FalseLabel2False, N, Variable.LabelArray); #endregion #region 初始化Sij foreach (Sentence sentence in Variable.Sentences) { for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l) { Annotation annotation = new Annotation(l); if (annotation.Labels[tree[1].Key.First]) { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] = ProbabilityOfLabelTrue[sentence][tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } else { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] = ProbabilityOfLabelFalse[sentence][tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } foreach (KeyValuePair <LabelPair, double> ap in tree) { if (annotation.Labels[ap.Key.First]) { if (ProbabilityOfLabelTrue[sentence][ap.Key.First] != 0)//考虑分母为0的情况 { if (annotation.Labels[ap.Key.Second]) { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= ProbabilityOfLabel1TrueLabel2True[sentence][ap.Key] / ProbabilityOfLabelTrue[sentence][ap.Key.First]; } else { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= ProbabilityOfLabel1TrueLabel2False[sentence][ap.Key] / ProbabilityOfLabelTrue[sentence][ap.Key.First]; } } else { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0;//此处应该是0,不是1 break; } } else { if (ProbabilityOfLabelFalse[sentence][ap.Key.First] != 0)//考虑分母为0的情况 { if (annotation.Labels[ap.Key.Second]) { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= ProbabilityOfLabel1FalseLabel2True[sentence][ap.Key] / ProbabilityOfLabelFalse[sentence][ap.Key.First]; } else { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= ProbabilityOfLabel1FalseLabel2False[sentence][ap.Key] / ProbabilityOfLabelFalse[sentence][ap.Key.First]; } } else { TDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0; break; } } } } } #endregion }
static private void Initialize(int group) { #region 初始化 DTDDSVariable.Sij = new Sij(1); //每句的信息,用于树的具体值(每个句子对应的树的值不同) IDictionary <Sentence, IDictionary <Label, double> > NumberOfLabelTrue = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <Label, double> > NumberOfLabelFalse = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1TrueLabel2True = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1TrueLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1FalseLabel2True = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); foreach (Sentence sentence in Variable.Sentences) { IDictionary <Label, int> numberOfEachLabel = new Dictionary <Label, int>(); foreach (Label label in Variable.LabelArray) { numberOfEachLabel.Add(label, 0); } foreach (Annotation annotation in sentence.AnnotaitonGroups[group].AnnotatorAnnotationDic.Values) { foreach (Label label in Variable.LabelArray) { if (annotation.Labels[label]) { ++numberOfEachLabel[label]; } } } List <KeyValuePair <Label, int> > sortedLabel = new List <KeyValuePair <Label, int> >(numberOfEachLabel); sortedLabel.Sort(delegate(KeyValuePair <Label, int> s1, KeyValuePair <Label, int> s2) { return(s2.Value.CompareTo(s1.Value)); }); sentence.LabelArray = new Label[10]; for (int a = 0; a < Variable.LabelArray.Length; ++a) { sentence.LabelArray[a] = sortedLabel[a].Key; } IDictionary <Label, double> labelFloatDic = new Dictionary <Label, double>(); IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化 //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, sentence.LabelArray); NumberOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); NumberOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); NumberOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); NumberOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); NumberOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); NumberOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); } #endregion #region 求互信息的参数 foreach (Sentence sentence in Variable.Sentences) { foreach (Annotation annotation in sentence.AnnotaitonGroups[group].AnnotatorAnnotationDic.Values) { IList <Label> traversedLabels = new List <Label>(); foreach (Label label1 in sentence.LabelArray) { traversedLabels.Add(label1); if (annotation.Labels[label1]) { ++NumberOfLabelTrue[sentence][label1]; foreach (Label label2 in sentence.LabelArray) { if (!traversedLabels.Contains(label2)) { if (annotation.Labels[label2]) { ++NumberOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)]; } else { ++NumberOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)]; } } } } else { ++NumberOfLabelFalse[sentence][label1]; foreach (Label label2 in sentence.LabelArray) { if (!traversedLabels.Contains(label2)) { if (annotation.Labels[label2]) { ++NumberOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)]; } else { ++NumberOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)]; } } } } } } } #endregion #region 为每个句子生成树 foreach (Sentence sentence in Variable.Sentences) { sentence.Tree = TDDSFunction.GenerateIMTree(NumberOfLabelTrue[sentence], NumberOfLabelFalse[sentence], NumberOfLabel1TrueLabel2True[sentence], NumberOfLabel1TrueLabel2False[sentence], NumberOfLabel1FalseLabel2True[sentence], NumberOfLabel1FalseLabel2False[sentence], Variable.NumberOfAnnotationsPerSentenceAfterGrouping, sentence.LabelArray); } #endregion //Dependent.Tree.Distinct.FunctionOfDistinct.NumberOfIncompletedTreeSentence(); IDictionary <Smoothing, double[]> smoothingNumber = Function.SmoothingNumber(Variable.LabelArray.Length); #region 初始化Sij foreach (Sentence sentence in Variable.Sentences) { for (int j = 0; j < Math.Pow(2, Variable.LabelArray.Length); ++j) { Annotation annotation = new Annotation(j); if (annotation.Labels[sentence.Tree[0].Key.First]) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] = NumberOfLabelTrue[sentence][sentence.Tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] = NumberOfLabelFalse[sentence][sentence.Tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } foreach (KeyValuePair <LabelPair, double> labelPairAndValue in sentence.Tree) { LabelPair ap; if (NumberOfLabel1TrueLabel2True[sentence].ContainsKey(labelPairAndValue.Key)) { ap = labelPairAndValue.Key; } else { ap = labelPairAndValue.Key.Reverse; } if (annotation.Labels[ap.First]) { if (DTDDSVariable.SmoothTree == Smoothing.None) { if (NumberOfLabelTrue[sentence][ap.First] != 0)//考虑分母为0的情况 { if (annotation.Labels[ap.Second]) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= NumberOfLabel1TrueLabel2True[sentence][ap] / NumberOfLabelTrue[sentence][ap.First]; } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= NumberOfLabel1TrueLabel2False[sentence][ap] / NumberOfLabelTrue[sentence][ap.First]; } } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= 0; break; } } else { if (annotation.Labels[ap.Second]) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= (NumberOfLabel1TrueLabel2True[sentence][ap] + smoothingNumber[DTDDSVariable.SmoothTree][0]) / (NumberOfLabelTrue[sentence][ap.First] + smoothingNumber[DTDDSVariable.SmoothTree][1]); } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= (NumberOfLabel1TrueLabel2False[sentence][ap] + smoothingNumber[DTDDSVariable.SmoothTree][0]) / (NumberOfLabelTrue[sentence][ap.First] + smoothingNumber[DTDDSVariable.SmoothTree][1]); } } } else { if (DTDDSVariable.SmoothTree == Smoothing.None) { if (NumberOfLabelFalse[sentence][ap.First] != 0)//考虑分母为0的情况 { if (annotation.Labels[ap.Second]) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= NumberOfLabel1FalseLabel2True[sentence][ap] / NumberOfLabelFalse[sentence][ap.First]; } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= NumberOfLabel1FalseLabel2False[sentence][ap] / NumberOfLabelFalse[sentence][ap.First]; } } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= 0; break; } } else { if (annotation.Labels[ap.Second]) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= (NumberOfLabel1FalseLabel2True[sentence][ap] + smoothingNumber[DTDDSVariable.SmoothTree][0]) / (NumberOfLabelFalse[sentence][ap.First] + smoothingNumber[DTDDSVariable.SmoothTree][1]); } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, j)] *= (NumberOfLabel1FalseLabel2False[sentence][ap] + smoothingNumber[DTDDSVariable.SmoothTree][0]) / (NumberOfLabelFalse[sentence][ap.First] + smoothingNumber[DTDDSVariable.SmoothTree][1]); } } } } } } //for (int i = 0; i < Variable.Sentences.Count; ++i) //{ // double all = 0; // for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j) // { // all += DTDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)]; // } // for (int j = 0; j < DependentVariable.NumberOfIntlabel; ++j) // { // DTDDSVariable.Sij.Value[sentence][new Labelset(bilabel.ToArray(), j)] /= all; // } //} #endregion //Variable.OutputFile.WriteLine(DTDDSVariable.Sij.ToString(DependentVariable.NumberOfIntlabel)); //Variable.OutputFile.Close(); }
static private void Initialize()//运行过TreeForAll,就不用再计算每句的树了 { #region 初始化 DTDDSVariable.Sij = new Sij(1); //每句的信息,用于树的具体值(每个句子对应的树的值不同 IDictionary <Label, double> labelFloatDic = new Dictionary <Label, double>(); IDictionary <LabelPair, double> labelPairFloat = new Dictionary <LabelPair, double>();//前后无序,45个,用于初始化 //Function.InitializeEmptyLabelDic(ref labelFloatDic, ref labelPairFloat, Variable.LabelArray); IDictionary <Sentence, IDictionary <Label, double> > NumberOfLabelTrue = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <Label, double> > NumberOfLabelFalse = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1TrueLabel2True = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1TrueLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1FalseLabel2True = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); IDictionary <Sentence, IDictionary <LabelPair, double> > NumberOfLabel1FalseLabel2False = new Dictionary <Sentence, IDictionary <LabelPair, double> >(); foreach (Sentence sentence in Variable.Sentences) { NumberOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); NumberOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); NumberOfLabel1TrueLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); NumberOfLabel1TrueLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); NumberOfLabel1FalseLabel2True.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); NumberOfLabel1FalseLabel2False.Add(sentence, new Dictionary <LabelPair, double>(labelPairFloat)); } #endregion #region 求互信息的参数 foreach (Annotator annotator in Variable.Annotators) { foreach (Sentence sentence in Variable.Sentences) { foreach (Annotation annotation in Variable.Data[annotator][sentence]) { IList <Label> traversedLabels = new List <Label>(); foreach (Label label1 in Variable.LabelArray) { traversedLabels.Add(label1); if (annotation.Labels[label1]) { ++NumberOfLabelTrue[sentence][label1]; foreach (Label label2 in Variable.LabelArray) { if (!traversedLabels.Contains(label2)) { if (annotation.Labels[label2]) { ++NumberOfLabel1TrueLabel2True[sentence][new LabelPair(label1, label2)]; } else { ++NumberOfLabel1TrueLabel2False[sentence][new LabelPair(label1, label2)]; } } } } else { ++NumberOfLabelFalse[sentence][label1]; foreach (Label label2 in Variable.LabelArray) { if (!traversedLabels.Contains(label2)) { if (annotation.Labels[label2]) { ++NumberOfLabel1FalseLabel2True[sentence][new LabelPair(label1, label2)]; } else { ++NumberOfLabel1FalseLabel2False[sentence][new LabelPair(label1, label2)]; } } } } } } } } #endregion #region 为每个句子生成树 foreach (Sentence sentence in Variable.Sentences) { IDictionary <Label, int> numberOfEachLabel = new Dictionary <Label, int>(); foreach (Label label in Variable.LabelArray) { numberOfEachLabel.Add(label, 0); } foreach (Annotator annotator in Variable.Annotators) { foreach (Annotation annotation in Variable.Data[annotator][sentence]) { foreach (Label label in Variable.LabelArray) { if (annotation.Labels[label]) { ++numberOfEachLabel[label]; } } } } List <KeyValuePair <Label, int> > sortedLabel = new List <KeyValuePair <Label, int> >(numberOfEachLabel); sortedLabel.Sort(delegate(KeyValuePair <Label, int> s1, KeyValuePair <Label, int> s2) { return(s2.Value.CompareTo(s1.Value)); }); sentence.LabelArray = new Label[Variable.LabelArray.Length]; for (int a = 0; a < Variable.LabelArray.Length; ++a) { sentence.LabelArray[a] = sortedLabel[a].Key; } sentence.Tree = TDDSFunction.GenerateIMTree(NumberOfLabelTrue[sentence], NumberOfLabelFalse[sentence], NumberOfLabel1TrueLabel2True[sentence], NumberOfLabel1TrueLabel2False[sentence], NumberOfLabel1FalseLabel2True[sentence], NumberOfLabel1FalseLabel2False[sentence], Variable.NumberOfAnnotationsPerSentenceAfterGrouping, sentence.LabelArray); } #endregion //NumberOfIncompletedTreeSentence(); #region 初始化Sij foreach (Sentence sentence in Variable.Sentences) { for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l) { Annotation annotation = new Annotation(l); if (annotation.Labels[sentence.Tree[0].Key.First]) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] = NumberOfLabelTrue[sentence][sentence.Tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] = NumberOfLabelFalse[sentence][sentence.Tree[0].Key.First] / Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } foreach (KeyValuePair <LabelPair, double> ap in sentence.Tree) { LabelPair reverse = new LabelPair(ap.Key.Second, ap.Key.First); if (annotation.Labels[ap.Key.First]) { if (NumberOfLabelTrue[sentence][ap.Key.First] != 0)//考虑分母为0的情况 { if (annotation.Labels[ap.Key.Second]) { if (NumberOfLabel1TrueLabel2True[sentence].ContainsKey(ap.Key)) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1TrueLabel2True[sentence][ap.Key] / NumberOfLabelTrue[sentence][ap.Key.First]; } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1TrueLabel2True[sentence][reverse] / NumberOfLabelTrue[sentence][ap.Key.First]; } } else if (NumberOfLabel1TrueLabel2False[sentence].ContainsKey(ap.Key)) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1TrueLabel2False[sentence][ap.Key] / NumberOfLabelTrue[sentence][ap.Key.First]; } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1FalseLabel2True[sentence][reverse] / NumberOfLabelTrue[sentence][ap.Key.First]; } } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0; break; } } else { if (NumberOfLabelFalse[sentence][ap.Key.First] != 0)//考虑分母为0的情况 { if (annotation.Labels[ap.Key.Second]) { if (NumberOfLabel1FalseLabel2True[sentence].ContainsKey(ap.Key)) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1FalseLabel2True[sentence][ap.Key] / NumberOfLabelFalse[sentence][ap.Key.First]; } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1TrueLabel2False[sentence][reverse] / NumberOfLabelFalse[sentence][ap.Key.First]; } } else if (NumberOfLabel1FalseLabel2False[sentence].ContainsKey(ap.Key)) { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1FalseLabel2False[sentence][ap.Key] / NumberOfLabelFalse[sentence][ap.Key.First]; } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= NumberOfLabel1FalseLabel2False[sentence][reverse] / NumberOfLabelFalse[sentence][ap.Key.First]; } } else { DTDDSVariable.Sij.Value[sentence][new Labelset(Variable.LabelArray, l)] *= 0; break; } } } } } #endregion }