public static KeyItemColl <T> ClearKeyItemColl <T>(KeyItemColl <T> objKeyItemColl) { double dTotalVaildCount = objKeyItemColl.Parameter.TotalValidCount + 1;// 1.0 / ( 1.0 - MemoryDAL.CalcRemeberValue(1,objKeyItemColl.Parameter) ); KeyItemColl <T> objResultColl = new KeyItemColl <T>(); objResultColl.Parameter = objKeyItemColl.Parameter; objResultColl.Parameter.Entropy = 0; double dClearValidCount = 0; foreach (KeyItemMDL <T> mdl in objKeyItemColl) { double dRemeberValue = mdl.ValidCount * MemoryDAL.CalcRemeberValue(objKeyItemColl.Parameter.TotalOffset - mdl.UpdateOffset, objKeyItemColl.Parameter); if (dRemeberValue > objKeyItemColl.Parameter.Threshold) { mdl.ValidCount = dRemeberValue; mdl.UpdateOffset = 0; if (!objResultColl.Contains(mdl.Key)) { dClearValidCount += mdl.ValidCount; double dProb = mdl.ValidCount / dTotalVaildCount; objKeyItemColl.Parameter.Entropy += -dProb *Math.Log(dProb); objResultColl.Add(mdl); } } } objResultColl.Parameter.TotalOffset = 0; objResultColl.Parameter.TotalValidCount = dClearValidCount; return(objResultColl); }
public static double CalcRemeberValue <T>(T key, KeyItemColl <T> objMemoryItemColl) { if (objMemoryItemColl.Contains(key)) { return(MemoryDAL.CalcRemeberValue(objMemoryItemColl.Parameter.TotalOffset - objMemoryItemColl[key].UpdateOffset, objMemoryItemColl.Parameter)); } return(0); }
public static double CalcValidCount <T>(T key, KeyItemColl <T> objKeyItemColl) { if (!objKeyItemColl.Contains(key)) { return(0); } KeyItemMDL <T> mdl = objKeyItemColl[key]; return(mdl.ValidCount * CalcRemeberValue(mdl, objKeyItemColl)); }
public static string ShowKeyItemColl(KeyItemColl <string> objKeyItemColl, int nTopCount, bool bIsOnlyWord = true, bool bIsOrderbyDesc = true, bool bIsShowTitle = true, string splitChar = "\t", string spaceChar = "\r") { StringBuilder sb = new StringBuilder(); if (bIsShowTitle) { sb.AppendLine(String.Format("[{0}]{1}|{2}|{3}|{4}", "词项", "遗忘词频", "总词频", "词权重", "成熟度")); } IEnumerable <KeyItemMDL <string> > buffer = objKeyItemColl; if (bIsOrderbyDesc) { buffer = from x in objKeyItemColl let dRemeberValue = x.ValidCount * CalcRemeberValue(x, objKeyItemColl) where (!bIsOnlyWord || (bIsOnlyWord && x.Key.Length > 1 && !Regex.IsMatch(x.Key, @"^[\d\p{P}\p{C}a-zA-Z]+$"))) orderby dRemeberValue descending select x; } else { buffer = from x in objKeyItemColl let dRemeberValue = x.ValidCount * CalcRemeberValue(x, objKeyItemColl) where (!bIsOnlyWord || (bIsOnlyWord && x.Key.Length > 1 && !Regex.IsMatch(x.Key, @"^[\d\p{P}\p{C}a-zA-Z]+$"))) orderby dRemeberValue ascending select x; } sb.AppendLine(String.Format("=============={0}/{1}|{2}=============", buffer.Count(), Math.Round(buffer.Sum(x => x.ValidCount * CalcRemeberValue(x, objKeyItemColl))), Math.Round(objKeyItemColl.Parameter.Entropy, 4))); double dTotalVaildCount = 1.0 / (1.0 - MemoryDAL.CalcRemeberValue(1, objKeyItemColl.Parameter)); int nRecordCount = 0; //buffer = buffer.OrderByDescending(x => x.ValidDegree); foreach (KeyItemMDL <string> mdl in buffer) { //if (!mdl.Key.Contains(splitChar)) continue; double dRemeberValue = mdl.ValidCount * CalcRemeberValue(mdl, objKeyItemColl); //double dProbValue = dRemeberValue / dTotalVaildCount; if (dRemeberValue < objKeyItemColl.Parameter.Threshold) { continue; } //if (-dProbValue * Math.Log(dProbValue) < 0.0005) continue;//小于平均信息熵 if (nRecordCount >= nTopCount) { break; } sb.AppendLine(String.Format("[{0}]{1}|{2}|{3}|{4}", mdl.Key.Replace(splitChar, "×").Replace(spaceChar, "$"), Math.Round(dRemeberValue, 4), Math.Round(mdl.TotalCount), Math.Round(dRemeberValue * Math.Log(dTotalVaildCount / dRemeberValue), 4), "")); nRecordCount += 1; } return(sb.ToString()); }
public static KeyItemMDL <T> UpdateKeyItemColl <T>(T key, KeyItemColl <T> objKeyItemColl, OffsetWeightMDL objWeightMDL) { if (!objKeyItemColl.Contains(key)) { KeyItemMDL <T> item = new KeyItemMDL <T>(); item.Key = key; item.UpdateOffset = objKeyItemColl.Parameter.TotalOffset; objKeyItemColl.Add(item); } KeyItemMDL <T> mdl = objKeyItemColl[key]; mdl.ValidCount = objWeightMDL.Weight + mdl.ValidCount * MemoryDAL.CalcRemeberValue(objKeyItemColl.Parameter.TotalOffset - mdl.UpdateOffset, objKeyItemColl.Parameter); mdl.TotalCount = objWeightMDL.Weight + mdl.TotalCount * MemoryDAL.CalcRemeberValue(objWeightMDL.Weight, objKeyItemColl.Parameter); mdl.UpdateOffset = objKeyItemColl.Parameter.TotalOffset; objKeyItemColl.Parameter.TotalValidCount = objWeightMDL.Weight + objKeyItemColl.Parameter.TotalValidCount * MemoryDAL.CalcRemeberValue(objWeightMDL.Offset, objKeyItemColl.Parameter); objKeyItemColl.Parameter.TotalOffset += objWeightMDL.Offset; return(mdl); }
public static double CalcValidCount <T>(KeyItemMDL <T> mdl, KeyItemColl <T> objKeyItemColl) { return(mdl.ValidCount * CalcRemeberValue(mdl, objKeyItemColl)); }
public static double CalcRemeberValue <T>(KeyItemMDL <T> mdl, KeyItemColl <T> objMemoryBondColl) { return(MemoryDAL.CalcRemeberValue(objMemoryBondColl.Parameter.TotalOffset - mdl.UpdateOffset, objMemoryBondColl.Parameter)); }