Пример #1
0
        /// <summary>
        /// 使用互信息计算关联系数
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="keyHead"></param>
        /// <param name="keyTail"></param>
        /// <param name="objKeyBondColl"></param>
        /// <returns></returns>
        public static double CalcBondRelateValueByPMI <T>(T keyHead, T keyTail, KeyBondColl <T, T> objKeyBondColl)
        {
            ////分别获得相邻单项的频次
            //double dHeadValidCount = objKeyBondColl.Contains(keyHead) ? 1 + objKeyBondColl[keyHead].KeyItem.ValidCount * CalcRemeberValue<T,T>(keyHead,objKeyBondColl) : 1;
            //double dTailValidCount = objKeyBondColl.Contains(keyTail) ? 1 + objKeyBondColl[keyTail].KeyItem.ValidCount * CalcRemeberValue<T,T>(keyTail,objKeyBondColl) : 1;
            //double dTotalValidCount = 1 + 1.0 / ( 1 - MemoryDAL.CalcRemeberValue(1,objKeyBondColl.Parameter) );
            ////获得相邻项共现的频次
            //KeyItemColl<T>  objLinkColl = objKeyBondColl.Contains(keyHead) ? objKeyBondColl[keyHead].LinkColl : new KeyItemColl<T>();

            //KeyItemMDL<T> mdl = objLinkColl.Contains(keyTail) ? objLinkColl[keyTail] : new KeyItemMDL<T>();
            //double dShareValidCount = 1 + mdl.ValidCount * KeyItemHelper.CalcRemeberValue(mdl,objLinkColl);
            //double dShareTotalCount = objKeyBondColl.Contains(keyHead) ? 1 + objKeyBondColl[keyHead].KeyItem.TotalCount : 1;

            //if (!objKeyBondColl.Contains(keyHead) || !objKeyBondColl.Contains(keyTail)) return 0;
            //if (!objKeyBondColl[keyHead].LinkColl.Contains(keyTail)) return 0;

            if (!objKeyBondColl.Contains(keyHead) || !objKeyBondColl.Contains(keyTail))
            {
                return(0);
            }
            if (!objKeyBondColl[keyHead].LinkColl.Contains(keyTail))
            {
                return(0);
            }

            //分别获得相邻单项的频次
            double dHeadValidCount = objKeyBondColl[keyHead].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyHead, objKeyBondColl);
            double dTailValidCount = objKeyBondColl[keyTail].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyTail, objKeyBondColl);

            if (dHeadValidCount < objKeyBondColl.Parameter.Threshold || dTailValidCount < objKeyBondColl.Parameter.Threshold)
            {
                return(0);
            }
            double dTotalValidCount = objKeyBondColl.Parameter.TotalValidCount;// 1.0 / (1 - MemoryDAL.CalcRemeberValue(1, objKeyBondColl.Parameter));

            //获得相邻项共现的频次
            KeyItemColl <T> objLinkColl = objKeyBondColl[keyHead].LinkColl;

            KeyItemMDL <T> mdl = objLinkColl[keyTail];
            double         dShareValidCount = mdl.ValidCount * KeyItemHelper.CalcRemeberValue(mdl, objLinkColl);
            double         dShareTotalCount = objLinkColl.Parameter.TotalValidCount;

            if (dShareTotalCount < objLinkColl.Parameter.Threshold || dShareValidCount < objLinkColl.Parameter.Threshold)
            {
                return(0);
            }

            //P(AB)=P(B|A)*P(A)
            //result=P(AB)/(P(A)*P(B))=P(B|A)/P(B)
            return((dShareValidCount / dShareTotalCount) / (dTailValidCount / dTotalValidCount));
        }
Пример #2
0
        public static double CalcBondRelateValueWithLaplace <T>(T keyHead, T keyTail, KeyBondColl <T, T> objKeyBondColl)
        {
            //分别获得相邻单项的频次
            double dHeadValidCount  = objKeyBondColl.Contains(keyHead) ? 1 + objKeyBondColl[keyHead].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyHead, objKeyBondColl) : 1;
            double dTailValidCount  = objKeyBondColl.Contains(keyTail) ? 1 + objKeyBondColl[keyTail].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyTail, objKeyBondColl) : 1;
            double dTotalValidCount = 1 + 1.0 / (1 - MemoryDAL.CalcRemeberValue(1, objKeyBondColl.Parameter));
            //获得相邻项共现的频次
            KeyItemColl <T> objLinkColl = objKeyBondColl.Contains(keyHead) ? objKeyBondColl[keyHead].LinkColl : new KeyItemColl <T>();

            KeyItemMDL <T> mdl = objLinkColl.Contains(keyTail) ? objLinkColl[keyTail] : new KeyItemMDL <T>();
            double         dShareValidCount = 1 + mdl.ValidCount * KeyItemHelper.CalcRemeberValue(mdl, objLinkColl);
            double         dShareTotalCount = objKeyBondColl.Contains(keyHead) ? 1 + objKeyBondColl[keyHead].KeyItem.TotalCount : 1;


            //P(AB)=P(B|A)*P(A)
            //result=P(AB)/(P(A)*P(B))=P(B|A)/P(B)
            return((dShareValidCount / dShareTotalCount) / (dTailValidCount / dTotalValidCount));
        }
Пример #3
0
        /// <summary>
        /// 使用平均信息熵计算关联
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="keyHead"></param>
        /// <param name="keyTail"></param>
        /// <param name="objKeyBondColl"></param>
        /// <returns></returns>
        public static double CalcBondRelateValueByAverageEntropy <T>(T keyHead, T keyTail, KeyBondColl <T, T> objKeyBondColl)
        {
            if (!objKeyBondColl.Contains(keyHead) || !objKeyBondColl.Contains(keyTail))
            {
                return(0);
            }
            if (!objKeyBondColl[keyHead].LinkColl.Contains(keyTail))
            {
                return(0);
            }



            //分别获得相邻单项的频次
            double dHeadValidCount = objKeyBondColl[keyHead].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyHead, objKeyBondColl);
            double dTailValidCount = objKeyBondColl[keyTail].KeyItem.ValidCount * CalcRemeberValue <T, T>(keyTail, objKeyBondColl);

            if (dHeadValidCount < objKeyBondColl.Parameter.Threshold || dTailValidCount < objKeyBondColl.Parameter.Threshold)
            {
                return(0);
            }

            //获得相邻项共现的频次
            KeyItemColl <T> objLinkColl = objKeyBondColl[keyHead].LinkColl;

            KeyItemMDL <T> mdl = objLinkColl[keyTail];
            double         dShareValidCount = mdl.ValidCount * KeyItemHelper.CalcRemeberValue(mdl, objLinkColl);
            double         dShareTotalCount = objLinkColl.Parameter.TotalValidCount;

            if (dShareTotalCount < objLinkColl.Parameter.Threshold || dShareValidCount < objLinkColl.Parameter.Threshold)
            {
                return(0);
            }

            double dEuler          = 0.5772156649;
            double dKeywordCount   = objKeyBondColl[keyHead].LinkColl.Count;
            double dAverageEntropy = Math.Log(dKeywordCount) + dEuler - 1;

            double dKeywordEntropy = (dKeywordCount * dShareValidCount / dShareTotalCount) * (Math.Log(dShareTotalCount) - Math.Log(dShareValidCount));

            return(dKeywordEntropy - dAverageEntropy);
        }