//计算mcj(consistency:角色c有j标签的概率) static public Mcj CalculateMcj(Sij sij, int time) { Mcj mcj = new Mcj(time); foreach (Character character in ConsistencyVariable.Characters) { mcj.Value.Add(character, new Dictionary <Labelset, double>()); foreach (Sentence sentence in character.Sentences) { foreach (Labelset labelset in sij.Value[sentence].Keys) { if (mcj.Value[character].ContainsKey(labelset)) { mcj.Value[character][labelset] += sij.Value[sentence][labelset]; } else { mcj.Value[character].Add(labelset, sij.Value[sentence][labelset]); } } } } foreach (Character character in ConsistencyVariable.Characters) { foreach (Labelset labelset in mcj.Value[character].Keys.ToArray()) { mcj.Value[character][labelset] /= character.Sentences.Count; } } return(mcj); }
static public void ObtainBinaryResult(Sij sij, string algorithm, int groupIndex) { switch (algorithm) { case "JDDS": foreach (Sentence sentence in Variable.Sentences) { sentence.AnnotaitonGroups[groupIndex].JDDSResult = new Result(sij.CalculateJointBestLabelset(sentence)); } break; case "SDDS": foreach (Sentence sentence in Variable.Sentences) { if (sentence.ID < SupervisedVariable.NumberOfTraningSentences) { continue; } sentence.AnnotaitonGroups[groupIndex].SDDSResult = new Result(sij.CalculateJointBestLabelset(sentence)); } break; case "IDDS": foreach (Sentence sentence in Variable.Sentences) { sentence.AnnotaitonGroups[groupIndex].IDDSResult = new Result(sij.CalculateJointBestLabelset(sentence)); } break; case "DTDDS": foreach (Sentence sentence in Variable.Sentences) { sentence.AnnotaitonGroups[groupIndex].DTDDSResult = new Result(sij.CalculateJointBestLabelset(sentence)); } break; case "TDDS": foreach (Sentence sentence in Variable.Sentences) { sentence.AnnotaitonGroups[groupIndex].TDDSResult = new Result(sij.CalculateJointBestLabelset(sentence)); } break; case "NDDS": foreach (Sentence sentence in Variable.Sentences) { sentence.AnnotaitonGroups[groupIndex].NDDSResult = new Result(sij.CalculateJointBestLabelset(sentence)); } break; case "PeT": foreach (Sentence sentence in Variable.Sentences) { sentence.AnnotaitonGroups[groupIndex].PeTResult = new Result(sij.CalculateJointBestLabelset(sentence)); } break; } }
static public void RunNDDS(double threshold, IndependenceEstimation independentEstimation) { for (int groupIndex = 0; groupIndex < GroupVariable.AnnotatorGroups.Length; ++groupIndex) { Sij sij = Initialize(groupIndex, threshold, independentEstimation); CoreFunction.Intgerate(Variable.LabelArray, groupIndex, ref sij); DDSFunction.ObtainBinaryResult(sij, "NDDS", groupIndex); Function.WriteBinaryResultFile("NDDS", groupIndex); } }
static public void RunIDDS() { for (int groupIndex = 0; groupIndex < GroupVariable.AnnotatorGroups.Length; ++groupIndex) { Sij sij = Initialize(groupIndex); CoreFunction.Intgerate(Variable.LabelArray, groupIndex, ref sij); DDSFunction.ObtainBinaryResult(sij, "IDDS", groupIndex); Function.WriteBinaryResultFile("IDDS", groupIndex); } }
static public void RunJDDS() { //遍历在某个group size分组下的第几组 for (int groupIndex = 0; groupIndex < GroupVariable.AnnotatorGroups.Length; ++groupIndex) { Sij sij = CoreFunction.InitializeSij(Variable.LabelArray, groupIndex); CoreFunction.Intgerate(Variable.LabelArray, groupIndex, ref sij);//迭代在此 DDSFunction.ObtainBinaryResult(sij, "JDDS", groupIndex); Function.WriteBinaryResultFile("JDDS", groupIndex); } }
static public bool CalculatePdataAndSij(ref Sij sij, Pj pj, PAkjl pakjl, ref Pdata pdata) { bool isFinished = false; pdata = new Pdata(++pdata.Time, pdata.Value); sij = new Sij(++sij.Time); double[,] numerator = new double[Variable.Sentences.Count, (int)Math.Pow(2, Variable.LabelArray.Length)]; for (int i = 0; i < Variable.Sentences.Count; ++i) { for (int j = 0; j < Math.Pow(2, Variable.LabelArray.Length); ++j) { numerator[i, j] = 1; } } foreach (Sentence sentence in Variable.Sentences) { for (int j = 0; j < Math.Pow(2, Variable.LabelArray.Length); ++j) //正确标签 { foreach (Annotator annotator in Variable.Annotators) //人 { foreach (Annotation annotation in Variable.Data[annotator][sentence]) { numerator[sentence.ID, j] *= pakjl.Value[annotator][new Labelset(Variable.LabelArray, j)][new Labelset(Variable.LabelArray, annotation.IntLabel)]; } } numerator[sentence.ID, j] *= pj.Value[new Labelset(Variable.LabelArray, j)]; } } double[] denominator = new double[Variable.Sentences.Count]; for (int i = 0; i < Variable.Sentences.Count; ++i) { for (int q = 0; q < Math.Pow(2, Variable.LabelArray.Length); ++q) { denominator[i] += numerator[i, q]; } } //计算Pdata和Sij foreach (Sentence sentence in Variable.Sentences) { for (int j = 0; j < Math.Pow(2, Variable.LabelArray.Length); ++j) { sij.Value[sentence][new Labelset(Variable.LabelArray, j)] = numerator[sentence.ID, j] / denominator[sentence.ID]; } pdata.Value += -Math.Log10(denominator[sentence.ID]); } if (pdata.MondifiedValue == 0 || pdata.Time == 10) { isFinished = true; } //Variable.OutputFile.WriteLine(pdata.ToString()); //Variable.OutputFile.WriteLine(sij.ToString(DependentVariable.NumberOfIntlabel)); return(isFinished); }
static private Sije InitializeSijeWithBN(Mce mce, int groupIndex) { Sije sije = new Sije(1); Sij sij = NDDSFunction.Initialize(groupIndex, Math.Pow(10, -1), IndependenceEstimation.MutualInformation); foreach (Sentence sentence in Variable.Sentences) { sije.Value.Add(sentence, new Dictionary <Labelset, IDictionary <Will, double> >()); IDictionary <Will, double> willDenominator = new Dictionary <Will, double>(); willDenominator.Add(Will.strong, 0); willDenominator.Add(Will.weak, 0); foreach (Labelset labelset in sij.Value[sentence].Keys.ToArray()) { double valueOfStrong = sij.Value[sentence][labelset] * labelset.HowStrong; double valueOfWeak = sij.Value[sentence][labelset] * labelset.HowWeak; if (sije.Value[sentence].ContainsKey(labelset)) { sije.Value[sentence][labelset][Will.strong] += valueOfStrong; sije.Value[sentence][labelset][Will.weak] += valueOfWeak; } else { sije.Value[sentence].Add(labelset, new Dictionary <Will, double>()); sije.Value[sentence][labelset].Add(Will.strong, valueOfStrong); sije.Value[sentence][labelset].Add(Will.weak, valueOfWeak); } willDenominator[Will.strong] += valueOfStrong; willDenominator[Will.weak] += valueOfStrong; } //p(t|e) foreach (Labelset labelset in sije.Value[sentence].Keys.ToArray()) { if (willDenominator[Will.strong] != 0) { sije.Value[sentence][labelset][Will.strong] /= willDenominator[Will.strong]; } if (willDenominator[Will.weak] != 0) { sije.Value[sentence][labelset][Will.weak] /= willDenominator[Will.weak]; } } //p(t|e)*p(x) if (willDenominator[Will.strong] != 0 && willDenominator[Will.weak] != 0)//有一个等于0就不用再算了 { foreach (Labelset labelset in sije.Value[sentence].Keys.ToArray()) { sije.Value[sentence][labelset][Will.strong] *= mce.Value[sentence.Character][Will.strong]; sije.Value[sentence][labelset][Will.weak] *= mce.Value[sentence.Character][Will.weak]; } } } return(sije); }
static public void UpdateLabelsetPairFrequencyForCharacter(bool addOne, Sij sij, ref IDictionary <Tuple <Character, Character>, IDictionary <Tuple <Labelset, Labelset>, double> > LabelsetFrequency) { //开头 Tuple <Character, Character> characterPair = Tuple.Create(new Character("##"), Variable.Sentences.First().Character); KeyValuePair <Labelset, double> laterBestLabelset = sij.SortLabelsets(Variable.Sentences[0])[0]; Tuple <Labelset, Labelset> LabelsetPair = Tuple.Create(new Labelset(true), laterBestLabelset.Key); double addend = addOne ? 1 : laterBestLabelset.Value; if (LabelsetFrequency[characterPair].ContainsKey(LabelsetPair)) { LabelsetFrequency[characterPair][LabelsetPair] += addend; } else { LabelsetFrequency[characterPair].Add(LabelsetPair, addend); } //中间 KeyValuePair <Labelset, double> formerBestLabelset; for (int i = 1; i < Variable.Sentences.Count; ++i) { characterPair = Tuple.Create(Variable.Sentences[i - 1].Character, Variable.Sentences[i].Character);//角色(换另一个角色,不换角色,都算) formerBestLabelset = sij.SortLabelsets(Variable.Sentences[i - 1])[0]; laterBestLabelset = sij.SortLabelsets(Variable.Sentences[i])[0]; LabelsetPair = Tuple.Create(formerBestLabelset.Key, laterBestLabelset.Key); addend = addOne ? 1 : smallerValue(formerBestLabelset.Value, laterBestLabelset.Value); if (LabelsetFrequency[characterPair].ContainsKey(LabelsetPair)) { LabelsetFrequency[characterPair][LabelsetPair] += addend; } else { LabelsetFrequency[characterPair].Add(LabelsetPair, addend); } } //计算次数(因为多算了,要除掉,也就是说全故事只算一次) foreach (Tuple <Character, Character> cp in LabelsetFrequency.Keys) { foreach (Tuple <Labelset, Labelset> i in LabelsetFrequency[cp].Keys.ToArray()) { LabelsetFrequency[cp][i] /= 2; } } }
static public void RunIDS() { for (int groupIndex = 0; groupIndex < GroupVariable.AnnotatorGroups.Length; ++groupIndex) { foreach (Sentence sentence in Variable.Sentences) { sentence.AnnotaitonGroups[groupIndex].IDSNumResult = new NumericResult(); sentence.AnnotaitonGroups[groupIndex].IDSResult = new Result(); } foreach (Label label in Variable.LabelArray) { Sij sij = CoreFunction.InitializeSij(new Label[] { label }, groupIndex); CoreFunction.Intgerate(new Label[] { label }, groupIndex, ref sij); ObtainLabelResult(sij, groupIndex); } Function.WriteBinaryResultFile("IDS", groupIndex); } }
static public void RunPDS() { for (int groupIndex = 0; groupIndex < GroupVariable.AnnotatorGroups.Length; ++groupIndex) { foreach (Sentence sentence in Variable.Sentences) { sentence.AnnotaitonGroups[groupIndex].PDSNumResult = new NumericResult(); sentence.AnnotaitonGroups[groupIndex].PDSResult = new Result(); } LabelPair[] bilabels = GenerateBilabels(groupIndex); foreach (LabelPair bilabel in bilabels) { Sij sij = CoreFunction.InitializeSij(bilabel.ToArray(), groupIndex); CoreFunction.Intgerate(bilabel.ToArray(), groupIndex, ref sij); ObtainLabelResult(bilabel, groupIndex, sij); } Function.WriteBinaryResultFile("PDS", groupIndex); } }
static private Pje CalculatePje(Sij sij, int time) { Pje pje = new Pje(time); IDictionary <Will, double> willDenominator = new Dictionary <Will, double>(); willDenominator.Add(Will.strong, 0); willDenominator.Add(Will.weak, 0); foreach (Sentence sentence in sij.Value.Keys) { foreach (Labelset labelset in sij.Value[sentence].Keys) { double valueOfStrong = sij.Value[sentence][labelset] * labelset.HowStrong; double valueOfWeak = sij.Value[sentence][labelset] * labelset.HowWeak; if (pje.Value.ContainsKey(labelset)) { pje.Value[labelset][Will.strong] += valueOfStrong; pje.Value[labelset][Will.weak] += valueOfWeak; } else { pje.Value.Add(labelset, new Dictionary <Will, double>()); pje.Value[labelset].Add(Will.strong, valueOfStrong); pje.Value[labelset].Add(Will.weak, valueOfWeak); } willDenominator[Will.strong] += valueOfStrong; willDenominator[Will.weak] += valueOfWeak; } } //p(t|e) foreach (Labelset labelset in pje.Value.Keys.ToArray()) { if (willDenominator[Will.strong] != 0) { pje.Value[labelset][Will.strong] /= willDenominator[Will.strong]; } if (willDenominator[Will.weak] != 0) { pje.Value[labelset][Will.weak] /= willDenominator[Will.weak]; } } return(pje); }
static public void CalculatePAkjl(Sij sij, ref PAkjl pakjl) { pakjl = new PAkjl(++pakjl.Time); IDictionary <Annotator, double[, ]> numerator = new Dictionary <Annotator, double[, ]>(); //分子 IDictionary <Annotator, double[]> denominator = new Dictionary <Annotator, double[]>(); //分母 foreach (Annotator annotator in Variable.Annotators) //人 { numerator.Add(annotator, new double[(int)Math.Pow(2, Variable.LabelArray.Length), (int)Math.Pow(2, Variable.LabelArray.Length)]); denominator.Add(annotator, new double[(int)Math.Pow(2, Variable.LabelArray.Length)]); } //计算分子分母 foreach (Annotator annotator in Variable.Annotators) { for (int j = 0; j < Math.Pow(2, Variable.LabelArray.Length); ++j)//正确标签 { foreach (Sentence sentence in Variable.Sentences) { foreach (Annotation annotation in Variable.Data[annotator][sentence]) { numerator[annotator][j, annotation.IntLabel] += sij.Value[sentence][new Labelset(Variable.LabelArray, j)]; denominator[annotator][j] += sij.Value[sentence][new Labelset(Variable.LabelArray, j)]; } } } } //计算π foreach (Annotator annotator in Variable.Annotators) //人 { for (int j = 0; j < (int)Math.Pow(2, Variable.LabelArray.Length); ++j) //正确标签 { if (denominator[annotator][j] != 0) //某些结果就是在所有句子中都没出现过 { for (int l = 0; l < (int)Math.Pow(2, Variable.LabelArray.Length); ++l) //人标的标签 { pakjl.Value[annotator][new Labelset(Variable.LabelArray, j)][new Labelset(Variable.LabelArray, l)] = numerator[annotator][j, l] / denominator[annotator][j]; } } } } //Variable.OutputFile.WriteLine(pajl.ToString(DependentVariable.NumberOfIntlabel)); }
static public void RunPeT(PorSForJointje PorS, Smoothing SmoothingBE, BnOrNot bnOrNot) { double[] accuracyOfPersonalityForEachGroup = new double[GroupVariable.AnnotatorGroups.Length]; for (int groupIndex = 0; groupIndex < GroupVariable.AnnotatorGroups.Length; ++groupIndex) { IDictionary <Annotator, IDictionary <Character, IDictionary <Will, double> > > okcx = PersonalityFunction.CalculateOkcx(groupIndex);//模拟人对角色个性的标注,计算一次就不变了 Mce mce = null; Sij sij = null; if (bnOrNot == BnOrNot.Yes) { sij = NDDSFunction.Initialize(groupIndex, Math.Pow(10, -1), IndependenceEstimation.MutualInformation); } else { sij = CoreFunction.InitializeSij(Variable.LabelArray, groupIndex); } Pje pje = null;//p(t|e) Pdata pdata = null; IList <double> Pdatas = new List <double>(); for (int convergeTime = 1; convergeTime <= Variable.ConvergeTimeThreshold; ++convergeTime) { mce = CalculateMce(sij, groupIndex); PersonalityFunction.WriteMVResultFile(mce, groupIndex); PAkjl pakjl = CoreFunction.CalculatePAkjl(Variable.LabelArray, sij, convergeTime, groupIndex); BEkef bekef = PersonalityFunction.CalculateBExy(mce, okcx, SmoothingBE, convergeTime, groupIndex); if (PorS == PorSForJointje.P) { pje = CalculatePje(sij, convergeTime); } if (CalculatePdataAndSij(ref sij, pakjl, bekef, pje, mce, okcx, ref pdata, Pdatas, groupIndex))//old/new { break; } } DDSFunction.ObtainBinaryResult(sij, "PeT", groupIndex); Function.WriteBinaryResultFile("PeT", groupIndex); accuracyOfPersonalityForEachGroup[groupIndex] = PersonalityPaperFunction.AccuracyOfPersonalityForEachGroup(PersonalityVariable.TruePersonality, mce.EstimatedPersonality); } Function.ConsoleWriteLine("Accuracy Of PeT: " + PersonalityPaperFunction.AccuracyOfPersonality(accuracyOfPersonalityForEachGroup)); }
static public void UpdateLabelsetPairFrequencyForSentence(bool addOne, Sij sij, ref IDictionary <Tuple <Labelset, Labelset>, double> LabelsetFrequency)//Update需要用ref { //开头 KeyValuePair <Labelset, double> laterBestLabelset = sij.SortLabelsets(Variable.Sentences[0])[0]; Tuple <Labelset, Labelset> LabelsetPair = Tuple.Create(new Labelset(true), laterBestLabelset.Key); double addend = addOne ? 1 : laterBestLabelset.Value; if (LabelsetFrequency.ContainsKey(LabelsetPair)) { LabelsetFrequency[LabelsetPair] += addend; } else { LabelsetFrequency.Add(LabelsetPair, addend); } //中间 KeyValuePair <Labelset, double> formerBestLabelset; for (int i = 1; i < Variable.Sentences.Count; ++i) { formerBestLabelset = sij.SortLabelsets(Variable.Sentences[i - 1])[0]; laterBestLabelset = sij.SortLabelsets(Variable.Sentences[i])[0]; LabelsetPair = Tuple.Create(formerBestLabelset.Key, laterBestLabelset.Key); addend = addOne ? 1 : smallerValue(formerBestLabelset.Value, laterBestLabelset.Value); if (LabelsetFrequency.ContainsKey(LabelsetPair)) { LabelsetFrequency[LabelsetPair] += addend; } else { LabelsetFrequency.Add(LabelsetPair, addend); } } //计算次数(因为多算了,要除掉) foreach (Tuple <Labelset, Labelset> i in LabelsetFrequency.Keys.ToArray()) { LabelsetFrequency[i] /= 2; } }
static public void ObtainResult(Sij sij, string algorithm) { StreamWriter resultFile = new StreamWriter("Result/" + algorithm + "Result.csv"); Function.InitialResultFile(resultFile); foreach (Sentence sentence in Variable.Sentences) { int bestResult = 0; double bestResultValue = 0; for (int j = 0; j < Math.Pow(2, Variable.LabelArray.Length); ++j) { if (sij.Value[sentence][new Labelset(Variable.LabelArray, j)] > bestResultValue) { bestResult = j; bestResultValue = sij.Value[sentence][new Labelset(Variable.LabelArray, j)]; } } switch (algorithm) { case "JDDS": sentence.PreciseResult = new Result(new KeyValuePair <Labelset, double>(new Labelset(Variable.LabelArray, bestResult), bestResultValue)); Function.WriteBinaryResultOfASentence(sentence.ID, sentence.PreciseResult, sentence.Character.ID, sentence.Speech, resultFile); break; case "TDDS": sentence.TreeForAllResult = new Result(new KeyValuePair <Labelset, double>(new Labelset(Variable.LabelArray, bestResult), bestResultValue)); Function.WriteBinaryResultOfASentence(sentence.ID, sentence.TreeForAllResult, sentence.Character.ID, sentence.Speech, resultFile); break; case "DTDDS": sentence.TreeForSenResult = new Result(new KeyValuePair <Labelset, double>(new Labelset(Variable.LabelArray, bestResult), bestResultValue)); Function.WriteBinaryResultOfASentence(sentence.ID, sentence.TreeForSenResult, sentence.Character.ID, sentence.Speech, resultFile); break; } resultFile.WriteLine(); } resultFile.Close(); }
//得到一种情感结果 static private void ObtainLabelResult(Sij sij, int group) { foreach (Sentence sentence in sij.Value.Keys) { //得到numeric结果 foreach (Labelset Labelset in sij.Value[sentence].Keys) { foreach (Label label in Labelset.Labels.Keys)//其实就一个Label { if (Labelset.Labels[label]) { sentence.AnnotaitonGroups[group].IDSNumResult.Labels[label] = sij.Value[sentence][Labelset]; } } } //得到binary结果 KeyValuePair <Labelset, double> resultAndProbability = sij.CalculateJointBestLabelset(sentence); foreach (Label label in resultAndProbability.Key.Labels.Keys) { sentence.AnnotaitonGroups[group].IDSResult.Labels[label] = resultAndProbability.Key.Labels[label]; } sentence.AnnotaitonGroups[group].IDSResult.Probability *= resultAndProbability.Value; } }
static private Mce CalculateMce(Sij sij, int time) { Mce mce = new Mce(time); foreach (Character character in ConsistencyVariable.Characters) { double numberOfTrueStrongAffects = 0; double numberOfTrueWeakAffects = 0; foreach (Sentence sentence in character.Sentences) { foreach (Labelset labelset in sij.Value[sentence].Keys) { numberOfTrueStrongAffects += sij.Value[sentence][labelset] * labelset.NumberOfTrueStrongAffects; numberOfTrueWeakAffects += sij.Value[sentence][labelset] * labelset.NumberOfTrueWeakAffects; } } IDictionary <Will, double> willAndValue = new Dictionary <Will, double>(); double will = numberOfTrueStrongAffects + numberOfTrueWeakAffects; willAndValue.Add(Will.strong, will == 0 ? 0.5 : numberOfTrueStrongAffects / will); willAndValue.Add(Will.weak, will == 0 ? 0.5 : numberOfTrueWeakAffects / will); mce.Value.Add(character, willAndValue); } return(mce); }
static SPDSVariable() { TrainingSij = new Sij(0); TrainingPj = new Pj(0); }
static public IDictionary <Tuple <Character, Character>, IDictionary <Tuple <Labelset, Labelset>, double> > AllLabelsetPairFrequencyForCharacter(Sij sij) { IDictionary <Tuple <Character, Character>, IDictionary <Tuple <Labelset, Labelset>, double> > labelPairFrequencyForCharacter = new Dictionary <Tuple <Character, Character>, IDictionary <Tuple <Labelset, Labelset>, double> >(); //开头 Tuple <Character, Character> characterPair = Tuple.Create(new Character("##"), Variable.Sentences.First().Character); labelPairFrequencyForCharacter.Add(characterPair, new Dictionary <Tuple <Labelset, Labelset>, double>()); foreach (Labelset j2 in sij.Value[Variable.Sentences[0]].Keys) { Tuple <Labelset, Labelset> LabelsetPair = Tuple.Create(new Labelset(true), j2); if (labelPairFrequencyForCharacter[characterPair].ContainsKey(LabelsetPair))//角色 { labelPairFrequencyForCharacter[characterPair][LabelsetPair] += sij.Value[Variable.Sentences[0]][j2] * j2.NumberOfTypes; } else { labelPairFrequencyForCharacter[characterPair].Add(LabelsetPair, sij.Value[Variable.Sentences[0]][j2] * j2.NumberOfTypes); } } //中间(站在当前往前看) for (int i = 1; i < Variable.Sentences.Count; ++i) { characterPair = Tuple.Create(Variable.Sentences[i - 1].Character, Variable.Sentences[i].Character);//角色(换另一个角色,不换角色,都算) foreach (Labelset j1 in sij.Value[Variable.Sentences[i - 1]].Keys) { foreach (Labelset j2 in sij.Value[Variable.Sentences[i]].Keys) { Tuple <Labelset, Labelset> Labelset = Tuple.Create(j1, j2); double value = smallerValue(sij.Value[Variable.Sentences[i - 1]][j1], sij.Value[Variable.Sentences[i]][j2]); if (labelPairFrequencyForCharacter.ContainsKey(characterPair)) { if (labelPairFrequencyForCharacter[characterPair].ContainsKey(Labelset)) { labelPairFrequencyForCharacter[characterPair][Labelset] += value; } else { labelPairFrequencyForCharacter[characterPair].Add(Labelset, value); } } else { labelPairFrequencyForCharacter.Add(characterPair, new Dictionary <Tuple <Labelset, Labelset>, double>()); labelPairFrequencyForCharacter[characterPair].Add(Labelset, value); } } } } //计算次数(因为多算了,要除掉,也就是说全故事只算一次) foreach (Tuple <Character, Character> cp in labelPairFrequencyForCharacter.Keys) { foreach (Tuple <Labelset, Labelset> sp in labelPairFrequencyForCharacter[cp].Keys) { labelPairFrequencyForCharacter[cp][sp] /= sp.Item1.NumberOfTypes; } } return(labelPairFrequencyForCharacter); }
static public IDictionary <Tuple <Character, Character>, IDictionary <Label, IDictionary <Tuple <Labelset, Labelset>, double> > > RenewIndependentLabelPairFreuquencyForCharacter(bool addOne, Label[] labels, Sij sij) { //初始化 IDictionary <Tuple <Character, Character>, IDictionary <Label, IDictionary <Tuple <Labelset, Labelset>, double> > > independentLabelPairFreuquencyForCharacter = new Dictionary <Tuple <Character, Character>, IDictionary <Label, IDictionary <Tuple <Labelset, Labelset>, double> > >(); Tuple <Character, Character> characterPair = new Tuple <Character, Character>(new Character("##"), Variable.Sentences.First().Character); independentLabelPairFreuquencyForCharacter.Add(characterPair, new Dictionary <Label, IDictionary <Tuple <Labelset, Labelset>, double> >()); foreach (Label label in labels) { independentLabelPairFreuquencyForCharacter[characterPair].Add(label, new Dictionary <Tuple <Labelset, Labelset>, double>()); } //开头 KeyValuePair <Labelset, double> laterBestLabelset = sij.SortLabelsets(Variable.Sentences[0])[0]; double addend = addOne ? 1 : laterBestLabelset.Value; foreach (Label label in labels) { Tuple <Labelset, Labelset> Labelset = Tuple.Create(new Labelset(true), laterBestLabelset.Key.ToSingleLabelAnnotation(label)); if (independentLabelPairFreuquencyForCharacter[characterPair][label].ContainsKey(Labelset)) { independentLabelPairFreuquencyForCharacter[characterPair][label][Labelset] += addend; } else { independentLabelPairFreuquencyForCharacter[characterPair][label].Add(Labelset, addend); } } //中间 KeyValuePair <Labelset, double> formerBestLabelset; for (int i = 1; i < Variable.Sentences.Count; ++i) { characterPair = Tuple.Create(Variable.Sentences[i - 1].Character, Variable.Sentences[i].Character);//角色(换另一个角色,不换角色,都算) formerBestLabelset = sij.SortLabelsets(Variable.Sentences[i - 1])[0]; laterBestLabelset = sij.SortLabelsets(Variable.Sentences[i])[0]; addend = addOne ? 1 : smallerValue(formerBestLabelset.Value, laterBestLabelset.Value); foreach (Label label in labels) { Tuple <Labelset, Labelset> Labelset = Tuple.Create(formerBestLabelset.Key.ToSingleLabelAnnotation(label), laterBestLabelset.Key.ToSingleLabelAnnotation(label)); if (independentLabelPairFreuquencyForCharacter.ContainsKey(characterPair)) { if (independentLabelPairFreuquencyForCharacter[characterPair][label].ContainsKey(Labelset)) { independentLabelPairFreuquencyForCharacter[characterPair][label][Labelset] += addend; } else { independentLabelPairFreuquencyForCharacter[characterPair][label].Add(Labelset, addend); } } else { independentLabelPairFreuquencyForCharacter.Add(characterPair, new Dictionary <Label, IDictionary <Tuple <Labelset, Labelset>, double> >()); foreach (Label l in labels) { independentLabelPairFreuquencyForCharacter[characterPair].Add(l, new Dictionary <Tuple <Labelset, Labelset>, double>()); independentLabelPairFreuquencyForCharacter[characterPair][l] = new Dictionary <Tuple <Labelset, Labelset>, double>(); } independentLabelPairFreuquencyForCharacter[characterPair][label].Add(Labelset, addend); } } } return(independentLabelPairFreuquencyForCharacter); }
static public IDictionary <Tuple <Labelset, Labelset>, double> RenewLabelsetPairFrequencyForSentence(bool addOne, Sij sij) { IDictionary <Tuple <Labelset, Labelset>, double> labelPairFrequencyForSentence = new Dictionary <Tuple <Labelset, Labelset>, double>(); //开头 KeyValuePair <Labelset, double> laterBestLabelset = sij.SortLabelsets(Variable.Sentences[0])[0]; double addend = addOne ? 1 : laterBestLabelset.Value; labelPairFrequencyForSentence.Add(Tuple.Create(new Labelset(true), laterBestLabelset.Key), addend); //中间 KeyValuePair <Labelset, double> formerBestLabelset; for (int i = 1; i < Variable.Sentences.Count; ++i) { formerBestLabelset = sij.SortLabelsets(Variable.Sentences[i - 1])[0]; laterBestLabelset = sij.SortLabelsets(Variable.Sentences[i])[0]; Tuple <Labelset, Labelset> Labelset = Tuple.Create(formerBestLabelset.Key, laterBestLabelset.Key); addend = addOne ? 1 : smallerValue(formerBestLabelset.Value, laterBestLabelset.Value); if (labelPairFrequencyForSentence.ContainsKey(Labelset)) { labelPairFrequencyForSentence[Labelset] += addend; } else { labelPairFrequencyForSentence.Add(Labelset, addend); } } return(labelPairFrequencyForSentence); }
static public Sij Initialize(int groupIndex, double threshold, IndependenceEstimation independentEstimation) { #region 初始化 Sij sij = new Sij(1); #endregion Label[] labelArray = GroupFunction.DescendLabelsByNumber(groupIndex); Graph BN = NDDSFunction.BuildBN(groupIndex, labelArray, independentEstimation, threshold); #region 从BN中求每个情感(事件)的父节点(条件) IDictionary <Label, IList <Label> > LabelsAndPas = new Dictionary <Label, IList <Label> >(); foreach (Label label in labelArray) { LabelsAndPas.Add(label, new List <Label>()); } foreach (KeyValuePair <LabelPair, bool> hasRelationship in BN.AdjMatrix) { if (hasRelationship.Value) { LabelsAndPas[hasRelationship.Key.Second].Add(hasRelationship.Key.First); } } #endregion #region 求计算联合概率的参数 IDictionary <Sentence, IDictionary <LabelAndWitness, double> > Probability = new Dictionary <Sentence, IDictionary <LabelAndWitness, double> >(); foreach (Sentence sentence in Variable.Sentences) { Probability.Add(sentence, new Dictionary <LabelAndWitness, double>()); } IDictionary <Smoothing, double[]> smoothingNumber = Function.SmoothingNumber(2); foreach (KeyValuePair <Label, IList <Label> > labelAndPas in LabelsAndPas) { if (labelAndPas.Value.Count == 0) { foreach (Sentence sentence in Variable.Sentences) { double numberOfLabelTrue = 0; double numberOfLabelFalse = 0; foreach (Annotation annotation in sentence.AnnotaitonGroups[groupIndex].AnnotatorAnnotationDic.Values) { if (annotation.Labels[labelAndPas.Key]) { ++numberOfLabelTrue; } else { ++numberOfLabelFalse; } } if (NDDSVariable.SmoothBN != Smoothing.None) { Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, false), new Labelset()), (numberOfLabelFalse + smoothingNumber[NDDSVariable.SmoothBN][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[NDDSVariable.SmoothBN][1])); Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, true), new Labelset()), (numberOfLabelTrue + (numberOfLabelTrue + smoothingNumber[NDDSVariable.SmoothBN][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[NDDSVariable.SmoothBN][1]))); } else { Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, false), new Labelset()), numberOfLabelFalse / Variable.NumberOfAnnotationsPerSentenceAfterGrouping); Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, true), new Labelset()), numberOfLabelTrue / Variable.NumberOfAnnotationsPerSentenceAfterGrouping); } } } else { for (int i = 0; i < Math.Pow(2, labelAndPas.Value.Count); ++i) { Labelset Labelset = new Labelset(labelAndPas.Value, i); foreach (Sentence sentence in Variable.Sentences) { double numberOfLabelTrue = 0; double numberOfLabelFalse = 0; foreach (Annotation annotation in sentence.AnnotaitonGroups[groupIndex].AnnotatorAnnotationDic.Values) { if (annotation.IsAccordingToLabelset(Labelset)) { if (annotation.Labels[labelAndPas.Key]) { ++numberOfLabelTrue; } else { ++numberOfLabelFalse; } } } if (NDDSVariable.SmoothBN != Smoothing.None) { Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, false), Labelset), (numberOfLabelFalse + smoothingNumber[NDDSVariable.SmoothBN][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[NDDSVariable.SmoothBN][1])); Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, true), Labelset), (numberOfLabelTrue + smoothingNumber[NDDSVariable.SmoothBN][0]) / (Variable.NumberOfAnnotationsPerSentenceAfterGrouping + smoothingNumber[NDDSVariable.SmoothBN][1])); } else { Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, false), Labelset), numberOfLabelFalse / Variable.NumberOfAnnotationsPerSentenceAfterGrouping); Probability[sentence].Add(new LabelAndWitness(new Labelset(labelAndPas.Key, true), Labelset), numberOfLabelTrue / Variable.NumberOfAnnotationsPerSentenceAfterGrouping); } } } } } #endregion #region 计算Sij IDictionary <Sentence, double> denominator = new Dictionary <Sentence, double>();//归一化参数 foreach (Sentence sentence in Variable.Sentences) { sij.Value.Add(sentence, new Dictionary <Labelset, double>()); denominator.Add(sentence, 0); for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l) { Labelset Labelset = new Labelset(Variable.LabelArray, l); double value = 1; foreach (Label label in labelArray) { Labelset singleLabelAnnotation = new Labelset(label, Labelset.Labels[label]); Labelset subLabelset = new Labelset(LabelsAndPas[label], Labelset); value *= Probability[sentence][new LabelAndWitness(singleLabelAnnotation, subLabelset)]; } if (value != 0) { sij.Value[sentence].Add(Labelset, value); denominator[sentence] += value; } } } #endregion #region 归一化 foreach (Sentence sentence in Variable.Sentences.ToArray()) { foreach (Labelset labelset in sij.Value[sentence].Keys.ToArray()) { sij.Value[sentence][labelset] /= denominator[sentence]; } } #endregion return(sij); }
static public IDictionary <Tuple <Character, Character>, IDictionary <Tuple <Labelset, Labelset>, double> > RenewLabelsetPairFrequencyForCharacter(bool addOne, Sij sij) { IDictionary <Tuple <Character, Character>, IDictionary <Tuple <Labelset, Labelset>, double> > labelPairFrequencyForCharacter = new Dictionary <Tuple <Character, Character>, IDictionary <Tuple <Labelset, Labelset>, double> >(); //开头 Tuple <Character, Character> characterPair = Tuple.Create(new Character("##"), Variable.Sentences.First().Character); labelPairFrequencyForCharacter.Add(characterPair, new Dictionary <Tuple <Labelset, Labelset>, double>()); KeyValuePair <Labelset, double> laterBestLabelset = sij.SortLabelsets(Variable.Sentences[0])[0]; double addend = addOne ? 1 : laterBestLabelset.Value; labelPairFrequencyForCharacter[characterPair].Add(Tuple.Create(new Labelset(true), laterBestLabelset.Key), addend); //中间 KeyValuePair <Labelset, double> formerBestLabelset; for (int i = 1; i < Variable.Sentences.Count; ++i) { characterPair = Tuple.Create(Variable.Sentences[i - 1].Character, Variable.Sentences[i].Character);//角色(换另一个角色,不换角色,都算) formerBestLabelset = sij.SortLabelsets(Variable.Sentences[i - 1])[0]; laterBestLabelset = sij.SortLabelsets(Variable.Sentences[i])[0]; Tuple <Labelset, Labelset> Labelset = Tuple.Create(formerBestLabelset.Key, laterBestLabelset.Key); addend = addOne ? 1 : smallerValue(formerBestLabelset.Value, laterBestLabelset.Value); if (labelPairFrequencyForCharacter.ContainsKey(characterPair)) { if (labelPairFrequencyForCharacter[characterPair].ContainsKey(Labelset)) { labelPairFrequencyForCharacter[characterPair][Labelset] += addend; } else { labelPairFrequencyForCharacter[characterPair].Add(Labelset, addend); } } else { labelPairFrequencyForCharacter.Add(characterPair, new Dictionary <Tuple <Labelset, Labelset>, double>()); labelPairFrequencyForCharacter[characterPair].Add(Labelset, addend); } } return(labelPairFrequencyForCharacter); }
static private bool CalculatePdataAndSij(ref Sij sij, PAkjl pakjl, BEkef bekef, Pje pje, Mce mce, IDictionary <Annotator, IDictionary <Character, IDictionary <Will, double> > > okxc, ref Pdata pdata, IList <double> pdatas, int groupIndex) { bool isFinished = false; //sij的分子 IDictionary <Sentence, IDictionary <Labelset, IDictionary <Will, double> > > numerator = new Dictionary <Sentence, IDictionary <Labelset, IDictionary <Will, double> > >(); //sij的分母(P(data on i)) IDictionary <Sentence, IDictionary <Will, double> > denominator = new Dictionary <Sentence, IDictionary <Will, double> >(); //计算分子 foreach (Sentence sentence in Variable.Sentences) { numerator.Add(sentence, new Dictionary <Labelset, IDictionary <Will, double> >()); #region 联合概率P(t,e) IDictionary <Labelset, IDictionary <Will, double> > jointje; if (pje == null)//PorS == PorSForJointje.S { //求后验概率P(t|e),新增 jointje = new Dictionary <Labelset, IDictionary <Will, double> >(); IDictionary <Will, double> willDenominator = new Dictionary <Will, double>(); willDenominator.Add(Will.strong, 0); willDenominator.Add(Will.weak, 0); foreach (Labelset labelset in sij.Value[sentence].Keys) { double valueOfStrong = sij.Value[sentence][labelset] * labelset.HowStrong; double valueOfWeak = sij.Value[sentence][labelset] * labelset.HowWeak; if (jointje.ContainsKey(labelset)) { jointje[labelset][Will.strong] += valueOfStrong; jointje[labelset][Will.weak] += valueOfWeak; } else { jointje.Add(labelset, new Dictionary <Will, double>()); jointje[labelset].Add(Will.strong, valueOfStrong); jointje[labelset].Add(Will.weak, valueOfWeak); } willDenominator[Will.strong] += valueOfStrong; willDenominator[Will.weak] += valueOfWeak; } //p(t|e) foreach (Labelset labelset in jointje.Keys.ToArray()) { if (willDenominator[Will.strong] != 0) { jointje[labelset][Will.strong] /= willDenominator[Will.strong]; } if (willDenominator[Will.weak] != 0) { jointje[labelset][Will.weak] /= willDenominator[Will.weak]; } } //p(t|e)*p(x) if (willDenominator[Will.strong] != 0 && willDenominator[Will.weak] != 0)//有一个等于0就不用再算了 { foreach (Labelset labelset in jointje.Keys.ToArray()) { jointje[labelset][Will.strong] *= mce.Value[sentence.Character][Will.strong]; jointje[labelset][Will.weak] *= mce.Value[sentence.Character][Will.weak]; } } } else//PorS == PorSForJointje.P { jointje = pje.Value; double valueOfStrong = 0; double valueOfWeak = 0; foreach (Labelset labelset in jointje.Keys.ToArray()) { valueOfStrong += jointje[labelset][Will.strong]; valueOfWeak += jointje[labelset][Will.weak]; } if (valueOfStrong != 0 && valueOfWeak != 0) { foreach (Labelset labelset in jointje.Keys.ToArray()) { jointje[labelset][Will.strong] *= mce.Value[sentence.Character][Will.strong]; jointje[labelset][Will.weak] *= mce.Value[sentence.Character][Will.weak]; } } } #endregion foreach (Labelset labelsetj in jointje.Keys)//j: true label { #region P({n}|t,e) double valueOfNumeratorForStrong = 1; double valueOfNumeratorForWeak = 1; foreach (Annotator annotator in sentence.AnnotaitonGroups[groupIndex].AnnotatorAnnotationDic.Keys) { Labelset labelsetl = sentence.AnnotaitonGroups[groupIndex].AnnotatorAnnotationDic[annotator].ToLabelset(Variable.LabelArray); if (pakjl.Value[annotator].ContainsKey(labelsetj)) { if (pakjl.Value[annotator][labelsetj].ContainsKey(labelsetl)) { valueOfNumeratorForStrong *= pakjl.Value[annotator][labelsetj][labelsetl]; valueOfNumeratorForWeak *= pakjl.Value[annotator][labelsetj][labelsetl]; } else { valueOfNumeratorForStrong = 0; valueOfNumeratorForWeak = 0; break; } } else { break; } //β部分的值(Personality新增) valueOfNumeratorForStrong *= Math.Pow(bekef.Value[annotator][Tuple.Create(Will.strong, Will.strong)], okxc[annotator][sentence.Character][Will.strong]) * Math.Pow(bekef.Value[annotator][Tuple.Create(Will.strong, Will.weak)], okxc[annotator][sentence.Character][Will.weak]); valueOfNumeratorForWeak *= Math.Pow(bekef.Value[annotator][Tuple.Create(Will.weak, Will.strong)], okxc[annotator][sentence.Character][Will.strong]) * Math.Pow(bekef.Value[annotator][Tuple.Create(Will.weak, Will.weak)], okxc[annotator][sentence.Character][Will.weak]); } #endregion //乘以(P(t|e)*P(e)) valueOfNumeratorForStrong *= jointje[labelsetj][Will.strong]; valueOfNumeratorForWeak *= jointje[labelsetj][Will.weak]; if (valueOfNumeratorForStrong != 0 || valueOfNumeratorForWeak != 0) { numerator[sentence].Add(labelsetj, new Dictionary <Will, double>()); numerator[sentence][labelsetj].Add(Will.strong, valueOfNumeratorForStrong); numerator[sentence][labelsetj].Add(Will.weak, valueOfNumeratorForWeak); } } denominator.Add(sentence, new Dictionary <Will, double>()); denominator[sentence].Add(Will.strong, 0); denominator[sentence].Add(Will.weak, 0); foreach (Labelset Labelsetq in numerator[sentence].Keys)//因为是加,故只需遍历numerator里有的标注,不需遍历所有标注 { denominator[sentence][Will.strong] += numerator[sentence][Labelsetq][Will.strong]; denominator[sentence][Will.weak] += numerator[sentence][Labelsetq][Will.weak]; } } //计算Pdata和Sij pdata = pdata != null ? new Pdata(++pdata.Time, pdata.Value) : new Pdata(1, 0); sij = new Sij(++sij.Time); foreach (Sentence sentence in Variable.Sentences) { sij.Value.Add(sentence, new Dictionary <Labelset, double>()); double nocompletValue = 0; foreach (Labelset labelset in numerator[sentence].Keys) { if (denominator[sentence][Will.strong] == 0) { sij.Value[sentence][labelset] = numerator[sentence][labelset][Will.weak] / denominator[sentence][Will.weak] * mce.Value[sentence.Character][Will.weak]; nocompletValue += sij.Value[sentence][labelset]; } else if (denominator[sentence][Will.weak] == 0) { sij.Value[sentence][labelset] = numerator[sentence][labelset][Will.strong] / denominator[sentence][Will.strong] * mce.Value[sentence.Character][Will.strong]; nocompletValue += sij.Value[sentence][labelset]; } else { sij.Value[sentence][labelset] = numerator[sentence][labelset][Will.strong] / denominator[sentence][Will.strong] * mce.Value[sentence.Character][Will.strong] + numerator[sentence][labelset][Will.weak] / denominator[sentence][Will.weak] * mce.Value[sentence.Character][Will.weak];//全概率公式 } } if (nocompletValue != 0) { foreach (Labelset labelset in numerator[sentence].Keys) { sij.Value[sentence][labelset] /= nocompletValue; } } pdata.Value += -Math.Log(denominator[sentence][Will.strong] + denominator[sentence][Will.weak]); } return(isFinished); }
static public void ObtainNumericResult(Sij sij, string algorithm, int group) { switch (algorithm) { case "JDDS": foreach (Sentence sentence in sij.Value.Keys) { sentence.AnnotaitonGroups[group].JDDSNumResult = new NumericResult(); foreach (Labelset Labelset in sij.Value[sentence].Keys) { foreach (Label label in Variable.LabelArray) { if (Labelset.Labels[label]) { sentence.AnnotaitonGroups[group].JDDSNumResult.Labels[label] += sij.Value[sentence][Labelset]; } } } } break; case "DTDDS": foreach (Sentence sentence in sij.Value.Keys) { sentence.AnnotaitonGroups[group].DTDDSNumResult = new NumericResult(); foreach (Labelset Labelset in sij.Value[sentence].Keys) { foreach (Label label in Variable.LabelArray) { if (Labelset.Labels[label]) { sentence.AnnotaitonGroups[group].DTDDSNumResult.Labels[label] += sij.Value[sentence][Labelset]; } } } } break; case "TDDS": foreach (Sentence sentence in sij.Value.Keys) { sentence.AnnotaitonGroups[group].TDDSNumResult = new NumericResult(); foreach (Labelset Labelset in sij.Value[sentence].Keys) { foreach (Label label in Variable.LabelArray) { if (Labelset.Labels[label]) { sentence.AnnotaitonGroups[group].TDDSNumResult.Labels[label] += sij.Value[sentence][Labelset]; } } } } break; case "SDDS": foreach (Sentence sentence in sij.Value.Keys) { sentence.AnnotaitonGroups[group].SDDSNumResult = new NumericResult(); foreach (Labelset Labelset in sij.Value[sentence].Keys) { foreach (Label label in Variable.LabelArray) { if (Labelset.Labels[label]) { sentence.AnnotaitonGroups[group].SDDSNumResult.Labels[label] += sij.Value[sentence][Labelset]; } } } } break; } }
static public IDictionary <Label, IDictionary <Tuple <Labelset, Labelset>, double> > RenewIndependentLabelsetPairFrequencyForSentence(bool addOne, Label[] labels, Sij sij) { //初始化 IDictionary <Label, IDictionary <Tuple <Labelset, Labelset>, double> > independentLabelsetPairFrequencyForSentence = new Dictionary <Label, IDictionary <Tuple <Labelset, Labelset>, double> >(); foreach (Label label in labels) { independentLabelsetPairFrequencyForSentence.Add(label, new Dictionary <Tuple <Labelset, Labelset>, double>()); } //开头 KeyValuePair <Labelset, double> laterBestLabelset = sij.SortLabelsets(Variable.Sentences[0])[0];//开头 double addend = addOne ? 1 : laterBestLabelset.Value; foreach (Label label in labels) { Tuple <Labelset, Labelset> Labelset = new Tuple <Labelset, Labelset>(new Labelset(true), laterBestLabelset.Key.ToSingleLabelAnnotation(label)); if (independentLabelsetPairFrequencyForSentence[label].ContainsKey(Labelset)) { independentLabelsetPairFrequencyForSentence[label][Labelset] += addend; } else { independentLabelsetPairFrequencyForSentence[label].Add(Labelset, addend); } } //中间 KeyValuePair <Labelset, double> formerBestLabelset; for (int i = 1; i < Variable.Sentences.Count; ++i) { formerBestLabelset = sij.SortLabelsets(Variable.Sentences[i - 1])[0]; laterBestLabelset = sij.SortLabelsets(Variable.Sentences[i])[0]; addend = addOne ? 1 : smallerValue(formerBestLabelset.Value, laterBestLabelset.Value); foreach (Label label in labels) { Tuple <Labelset, Labelset> Labelset = Tuple.Create(formerBestLabelset.Key.ToSingleLabelAnnotation(label), laterBestLabelset.Key.ToSingleLabelAnnotation(label)); if (independentLabelsetPairFrequencyForSentence[label].ContainsKey(Labelset)) { independentLabelsetPairFrequencyForSentence[label][Labelset] += addend; } else { independentLabelsetPairFrequencyForSentence[label].Add(Labelset, addend); } } } return(independentLabelsetPairFrequencyForSentence); }
static private Sij Initialize(int groupIndex) { Sij sij = new Sij(1); IDictionary <Label, double> labelFloatDic = new Dictionary <Label, double>(); foreach (Label label in Variable.LabelArray) { labelFloatDic.Add(label, 0); } IDictionary <Sentence, IDictionary <Label, double> > ProbabilityOfLabelTrue = new Dictionary <Sentence, IDictionary <Label, double> >(); IDictionary <Sentence, IDictionary <Label, double> > ProbabilityOfLabelFalse = new Dictionary <Sentence, IDictionary <Label, double> >(); foreach (Sentence sentence in Variable.Sentences) { ProbabilityOfLabelTrue.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); ProbabilityOfLabelFalse.Add(sentence, new Dictionary <Label, double>(labelFloatDic)); } foreach (Sentence sentence in Variable.Sentences) { foreach (Annotation annotation in sentence.AnnotaitonGroups[groupIndex].AnnotatorAnnotationDic.Values) { foreach (Label label in Variable.LabelArray) { if (annotation.Labels[label]) { ++ProbabilityOfLabelTrue[sentence][label]; } else { ++ProbabilityOfLabelFalse[sentence][label]; } } } } foreach (Sentence sentence in Variable.Sentences) { foreach (Label label in Variable.LabelArray) { ProbabilityOfLabelTrue[sentence][label] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping; ProbabilityOfLabelFalse[sentence][label] /= Variable.NumberOfAnnotationsPerSentenceAfterGrouping; } } foreach (Sentence sentence in Variable.Sentences) { sij.Value.Add(sentence, new Dictionary <Labelset, double>()); for (int l = 0; l < Math.Pow(2, Variable.LabelArray.Length); ++l) { Labelset Labelset = new Labelset(Variable.LabelArray, l); double value = 1; foreach (Label label in Labelset.Labels.Keys) { if (Labelset.Labels[label]) { value *= ProbabilityOfLabelTrue[sentence][label]; } else { value *= ProbabilityOfLabelFalse[sentence][label]; } } if (value != 0) { sij.Value[sentence].Add(Labelset, value); } } } return(sij); }
static public IDictionary <Tuple <Labelset, Labelset>, double> AllLabelsetPairFrequencyForSentence(Sij sij)//将所有标注综合到一次从头到尾的标注中(太慢,废弃) { IDictionary <Tuple <Labelset, Labelset>, double> labelPairFrequencyForSentence = new Dictionary <Tuple <Labelset, Labelset>, double>(); Tuple <Labelset, Labelset> LabelsetPair; { foreach (Labelset j2 in sij.Value[Variable.Sentences[0]].Keys)//开头 { LabelsetPair = Tuple.Create(new Labelset(true), j2); if (labelPairFrequencyForSentence.ContainsKey(LabelsetPair))//句 { labelPairFrequencyForSentence[LabelsetPair] += sij.Value[Variable.Sentences[0]][j2] * j2.NumberOfTypes; } else { labelPairFrequencyForSentence.Add(LabelsetPair, sij.Value[Variable.Sentences[0]][j2] * j2.NumberOfTypes); } } } //中间(站在当前往前看) for (int i = 1; i < Variable.Sentences.Count; ++i) { foreach (Labelset j1 in sij.Value[Variable.Sentences[i - 1]].Keys) { foreach (Labelset j2 in sij.Value[Variable.Sentences[i]].Keys) { LabelsetPair = Tuple.Create(j1, j2); double value = smallerValue(sij.Value[Variable.Sentences[i - 1]][j1], sij.Value[Variable.Sentences[i]][j2]); if (labelPairFrequencyForSentence.ContainsKey(LabelsetPair)) { labelPairFrequencyForSentence[LabelsetPair] += value; } else { labelPairFrequencyForSentence.Add(LabelsetPair, value); } } } } //计算次数(因为多算了,要除掉) foreach (Tuple <Labelset, Labelset> sp in labelPairFrequencyForSentence.Keys.ToArray()) { labelPairFrequencyForSentence[sp] /= sp.Item1.NumberOfTypes; } return(labelPairFrequencyForSentence); }