/// <summary> /// 使用互信息计算关联系数 /// </summary> /// <typeparam name="T"></typeparam> /// <param name="keyHead"></param> /// <param name="keyTail"></param> /// <param name="objKeyBondColl"></param> /// <returns></returns> public static double CalcBondRelateValueByPMI <T>(T keyHead, T keyTail, KeyBondColl <T, T> objKeyBondColl) { ////分别获得相邻单项的频次 //double dHeadValidCount = objKeyBondColl.Contains(keyHead) ? 1 + objKeyBondColl[keyHead].KeyItem.ValidCount * CalcRemeberValue<T,T>(keyHead,objKeyBondColl) : 1; //double dTailValidCount = objKeyBondColl.Contains(keyTail) ? 1 + objKeyBondColl[keyTail].KeyItem.ValidCount * CalcRemeberValue<T,T>(keyTail,objKeyBondColl) : 1; //double dTotalValidCount = 1 + 1.0 / ( 1 - MemoryDAL.CalcRemeberValue(1,objKeyBondColl.Parameter) ); ////获得相邻项共现的频次 //KeyItemColl<T> objLinkColl = objKeyBondColl.Contains(keyHead) ? objKeyBondColl[keyHead].LinkColl : new KeyItemColl<T>(); //KeyItemMDL<T> mdl = objLinkColl.Contains(keyTail) ? objLinkColl[keyTail] : new KeyItemMDL<T>(); //double dShareValidCount = 1 + mdl.ValidCount * KeyItemHelper.CalcRemeberValue(mdl,objLinkColl); //double dShareTotalCount = objKeyBondColl.Contains(keyHead) ? 1 + objKeyBondColl[keyHead].KeyItem.TotalCount : 1; //if (!objKeyBondColl.Contains(keyHead) || !objKeyBondColl.Contains(keyTail)) return 0; //if (!objKeyBondColl[keyHead].LinkColl.Contains(keyTail)) return 0; if (!objKeyBondColl.Contains(keyHead) || !objKeyBondColl.Contains(keyTail)) { return(0); } if (!objKeyBondColl[keyHead].LinkColl.Contains(keyTail)) { return(0); } //分别获得相邻单项的频次 double dHeadValidCount = objKeyBondColl[keyHead].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyHead, objKeyBondColl); double dTailValidCount = objKeyBondColl[keyTail].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyTail, objKeyBondColl); if (dHeadValidCount < objKeyBondColl.Parameter.Threshold || dTailValidCount < objKeyBondColl.Parameter.Threshold) { return(0); } double dTotalValidCount = objKeyBondColl.Parameter.TotalValidCount;// 1.0 / (1 - MemoryDAL.CalcRemeberValue(1, objKeyBondColl.Parameter)); //获得相邻项共现的频次 KeyItemColl <T> objLinkColl = objKeyBondColl[keyHead].LinkColl; KeyItemMDL <T> mdl = objLinkColl[keyTail]; double dShareValidCount = mdl.ValidCount * KeyItemHelper.CalcRemeberValue(mdl, objLinkColl); double dShareTotalCount = objLinkColl.Parameter.TotalValidCount; if (dShareTotalCount < objLinkColl.Parameter.Threshold || dShareValidCount < objLinkColl.Parameter.Threshold) { return(0); } //P(AB)=P(B|A)*P(A) //result=P(AB)/(P(A)*P(B))=P(B|A)/P(B) return((dShareValidCount / dShareTotalCount) / (dTailValidCount / dTotalValidCount)); }
public static double CalcBondRelateValueWithLaplace <T>(T keyHead, T keyTail, KeyBondColl <T, T> objKeyBondColl) { //分别获得相邻单项的频次 double dHeadValidCount = objKeyBondColl.Contains(keyHead) ? 1 + objKeyBondColl[keyHead].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyHead, objKeyBondColl) : 1; double dTailValidCount = objKeyBondColl.Contains(keyTail) ? 1 + objKeyBondColl[keyTail].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyTail, objKeyBondColl) : 1; double dTotalValidCount = 1 + 1.0 / (1 - MemoryDAL.CalcRemeberValue(1, objKeyBondColl.Parameter)); //获得相邻项共现的频次 KeyItemColl <T> objLinkColl = objKeyBondColl.Contains(keyHead) ? objKeyBondColl[keyHead].LinkColl : new KeyItemColl <T>(); KeyItemMDL <T> mdl = objLinkColl.Contains(keyTail) ? objLinkColl[keyTail] : new KeyItemMDL <T>(); double dShareValidCount = 1 + mdl.ValidCount * KeyItemHelper.CalcRemeberValue(mdl, objLinkColl); double dShareTotalCount = objKeyBondColl.Contains(keyHead) ? 1 + objKeyBondColl[keyHead].KeyItem.TotalCount : 1; //P(AB)=P(B|A)*P(A) //result=P(AB)/(P(A)*P(B))=P(B|A)/P(B) return((dShareValidCount / dShareTotalCount) / (dTailValidCount / dTotalValidCount)); }
/// <summary> /// 使用平均信息熵计算关联 /// </summary> /// <typeparam name="T"></typeparam> /// <param name="keyHead"></param> /// <param name="keyTail"></param> /// <param name="objKeyBondColl"></param> /// <returns></returns> public static double CalcBondRelateValueByAverageEntropy <T>(T keyHead, T keyTail, KeyBondColl <T, T> objKeyBondColl) { if (!objKeyBondColl.Contains(keyHead) || !objKeyBondColl.Contains(keyTail)) { return(0); } if (!objKeyBondColl[keyHead].LinkColl.Contains(keyTail)) { return(0); } //分别获得相邻单项的频次 double dHeadValidCount = objKeyBondColl[keyHead].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyHead, objKeyBondColl); double dTailValidCount = objKeyBondColl[keyTail].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyTail, objKeyBondColl); if (dHeadValidCount < objKeyBondColl.Parameter.Threshold || dTailValidCount < objKeyBondColl.Parameter.Threshold) { return(0); } //获得相邻项共现的频次 KeyItemColl <T> objLinkColl = objKeyBondColl[keyHead].LinkColl; KeyItemMDL <T> mdl = objLinkColl[keyTail]; double dShareValidCount = mdl.ValidCount * KeyItemHelper.CalcRemeberValue(mdl, objLinkColl); double dShareTotalCount = objLinkColl.Parameter.TotalValidCount; if (dShareTotalCount < objLinkColl.Parameter.Threshold || dShareValidCount < objLinkColl.Parameter.Threshold) { return(0); } double dEuler = 0.5772156649; double dKeywordCount = objKeyBondColl[keyHead].LinkColl.Count; double dAverageEntropy = Math.Log(dKeywordCount) + dEuler - 1; double dKeywordEntropy = (dKeywordCount * dShareValidCount / dShareTotalCount) * (Math.Log(dShareTotalCount) - Math.Log(dShareValidCount)); return(dKeywordEntropy - dAverageEntropy); }