public static void LoadChatDataList(List <ChatData> data, string fileName) { using (StreamReader stream = new StreamReader(fileName)) { while (!stream.EndOfStream) { string time = stream.ReadLine(); string text = stream.ReadLine(); ChatData t = new ChatData(text, DateTime.ParseExact(time, "MM/dd/yyyy", CultureInfo.InvariantCulture)); data.Add(t); } } }
public bool AddMemory(string x) { var nearChat = Chatbot.SingletonClassify(memory, x, gram); string near = (nearChat != null ? nearChat.text : ""); if (Chatbot.Similarity(near, x, gram) > nearEPS) //记忆体已经有相似的东西 { return(false); } ChatData t = new ChatData(x, DateTime.Now); memory.Add(t); return(true); }
//is2PreKnowledge表示 2个前提的知识中去划分出到底是哪个前提 public static ChatData SingletonClassify(List <ChatData> list, string input, int gram, bool is2PreKnowledge = false) { ChatData argmax = null; double maxLikelihood = 0; FeatureList inputDict; FeatureList LMDict; inputDict = GetFeature(input, gram); for (int R = 0; R < list.Count; R++) { double likelihood = 0; if (!is2PreKnowledge) //如果是从list中提取全部的话 { string content = list[R].text; if (content.Contains("<DEL>")) { continue; } if (content.Contains("<Q>")) { content = GetToken(content, "<Q>"); } if (content.Contains("<Q1>") && content.Contains("<Q2>")) { content = GetToken(content, "<Q1>") + GetToken(content, "<Q2>"); } LMDict = GetFeature(content, gram); foreach (var i in inputDict.feature) { if (LMDict.feature.ContainsKey(i.Key)) { likelihood += LMDict.feature[i.Key] * i.Value; } } } else { string content = list[R].text; if (content.Contains("<Q1>") && content.Contains("<Q2>")) { content = GetToken(content, "<Q1>"); LMDict = GetFeature(content, gram); foreach (var i in inputDict.feature) { if (LMDict.feature.ContainsKey(i.Key)) { likelihood += LMDict.feature[i.Key] * i.Value; } } if (likelihood > maxLikelihood) { argmax = list[R]; maxLikelihood = likelihood; } likelihood = 0; content = GetToken(content, "<Q2>"); LMDict = GetFeature(content, gram); foreach (var i in inputDict.feature) { if (LMDict.feature.ContainsKey(i.Key)) { likelihood += LMDict.feature[i.Key] * i.Value; } } } //else is a <Q> item } if (likelihood > maxLikelihood) { argmax = list[R]; maxLikelihood = likelihood; } } return(argmax); }
public string BotInput(string input) { if (input.StartsWith("<HELP>")) { return(HELP); } if (input.StartsWith("<TEA>")) { string content = input.Substring(5, input.Length - 5); content = content.Trim(); if (content == "") { return("¿"); } return(Teach(content)); } if (input.StartsWith("<READ>")) { string toOutput = ""; toOutput += "Ground Truth:" + Environment.NewLine; foreach (var i in LM.groundTruth) { toOutput += i.time + "\t" + i.text + Environment.NewLine; } toOutput += "Knowledge:" + Environment.NewLine; foreach (var i in LM.knowledge) { toOutput += i.time + "\t" + i.text + Environment.NewLine; } return(toOutput); } if (input.StartsWith("<DEL>")) { string content = input.Substring(5, input.Length - 5); content = content.Trim(); ChatData cor = SingletonClassify(LM.groundTruth, content, gram); ChatData corKL = SingletonClassify(LM.knowledge, content, gram); double simCOR = (cor == null ? -114514 : Similarity(cor.text, content, gram)); double simCORKL = (corKL == null ? -114514 : Similarity(corKL.text, content, gram)); if (simCORKL > simCOR) { cor = corKL; } if (cor != null && !DELCorrectness) { DELCorrectness = true; DELCorrectItem = content; return("请再次输入相同的指令以确认你要删除的知识。" + Environment.NewLine + "要取消删除,输入<DEL>+任意不同指令。" + Environment.NewLine + "要删除的知识是:" + cor.text + Environment.NewLine); } else if (cor != null && DELCorrectness) { DELCorrectness = false; if (DELCorrectItem == content) { DELCorrectItem = ""; cor.text = "<DEL>" + cor.text; SerializeSavor.SaveChatDataList(LM.groundTruth, "LM.txt"); SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt"); return("删除完成。" + Environment.NewLine + "删除的知识是:" + cor.text + Environment.NewLine); } else { DELCorrectItem = ""; return("已取消删除。"); } } else { return("没能找到要删除的知识" + Environment.NewLine); //防止知识库是空的 } } if (input.StartsWith("<CHAT>")) { string content = input.Substring(6, input.Length - 6); content = content.Trim(); return(Chat(content)); } if (input.StartsWith("<CLEAR>")) { SM.memory.Clear(); return("短时记忆清空完成" + Environment.NewLine); } if (input.StartsWith("<MEM>")) { return(SM.outputMemory()); } if (input.StartsWith("<ADDMEM>")) { string content = input.Substring(8, input.Length - 8); content = content.Trim(); bool res = SM.AddMemory(content); if (res) { return("已加入缓存。加入的内容为:" + Environment.NewLine + content + Environment.NewLine + "<NEXT>"); } else { return("内容为:" + content + Environment.NewLine + "的指令已被写入缓存,请勿重复添加。"); } } return("¿"); }
private string Teach(string content) { if (content.Contains("<Q>") && content.Contains("<A>")) //同时含有<Q>和<A> //<TEA><Q>f**k you<A>f**k you leather man { string Q = GetToken(content, "<Q>"); string A = GetToken(content, "<A>"); string text = "<Q>" + Q + "<A>" + A; ChatData x = new ChatData(text, DateTime.Now); ChatData nearest = SingletonClassify(LM.knowledge, x.text, gram); if (nearest == null || Similarity(x.text, nearest.text, gram) < nearEPS) { LM.knowledge.Add(x); SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt"); return("Q: " + Q + Environment.NewLine + "A:" + A + Environment.NewLine + "学习完成"); } else { double sim = Similarity(x.text, nearest.text, gram); if (sim > INF - 1) { return("知识库中已经有相同的知识!"); } else { if (content.Contains("<EXACT>")) { LM.knowledge.Add(x); SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt"); return("Q: " + Q + Environment.NewLine + "A:" + A + Environment.NewLine + "学习完成" + Environment.NewLine); } else { return("知识库中已有相似知识。请在<TEA>后追加<EXACT>指令。" + Environment.NewLine + "相似知识为:" + nearest.text + Environment.NewLine); } } } } else if (content.Contains("<G>")) { string G = GetToken(content, "<G>"); string text = G; ChatData x = new ChatData(text, DateTime.Now); ChatData nearest = SingletonClassify(LM.groundTruth, x.text, gram); if (nearest == null || Similarity(x.text, nearest.text, gram) < nearEPS) { LM.groundTruth.Add(x); SerializeSavor.SaveChatDataList(LM.groundTruth, "LM.txt"); return(G + Environment.NewLine + "学习完成"); } else { double sim = Similarity(x.text, nearest.text, gram); if (sim > INF - 1) { return("知识库中已经有相同的知识!"); } else { if (content.Contains("<EXACT>")) { LM.groundTruth.Add(x); SerializeSavor.SaveChatDataList(LM.groundTruth, "LM.txt"); return(G + Environment.NewLine + "学习完成"); } else { return("知识库中已有相似知识。请在<TEA>后追加<EXACT>指令。" + Environment.NewLine + "相似知识为:" + nearest.text + Environment.NewLine); } } } } else if (content.Contains("<A>") && content.Contains("<Q1>") && content.Contains("<Q2>")) { string Q1 = GetToken(content, "<Q1>"); string Q2 = GetToken(content, "<Q2>"); string A = GetToken(content, "<A>"); string text = "<Q1>" + Q1 + "<Q2>" + Q2 + "<A>" + A; ChatData x = new ChatData(text, DateTime.Now); ChatData nearest = SingletonClassify(LM.knowledge, x.text, gram); if (nearest == null || Similarity(x.text, nearest.text, gram) < nearEPS) { LM.knowledge.Add(x); SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt"); return("Q1: " + Q1 + Environment.NewLine + "Q2" + Q2 + Environment.NewLine + "A:" + A + Environment.NewLine + "学习完成"); } else { double sim = Similarity(x.text, nearest.text, gram); if (sim > INF - 1) { return("知识库中已经有相同的知识!"); } else { if (content.Contains("<EXACT>")) { LM.knowledge.Add(x); SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt"); return("Q1: " + Q1 + Environment.NewLine + "Q2" + Q2 + Environment.NewLine + "A:" + A + Environment.NewLine + "学习完成"); } else { return("知识库中已有相似知识。请在<TEA>后追加<EXACT>指令。" + Environment.NewLine + "相似知识为:" + nearest.text + Environment.NewLine); } } } } else { return("格式错误。"); } }
public string Chat(string content) { ChatData cor = SingletonClassify(LM.knowledge, content, gram); ChatData cor2 = SingletonClassify(LM.knowledge, content, gram, true); double simA = Similarity(cor.text, content, gram); double sim1 = (cor2 == null ? -114514 : Similarity(GetToken(cor2.text, "<Q1>"), content, gram)); double sim2 = (cor2 == null ? -114514 : Similarity(GetToken(cor2.text, "<Q2>"), content, gram)); if ((cor == null || simA < Math.Max(sim1, sim2)) && GetToken(content, "<NEXT>").CompareTo("<NOTFOUND>") == 0) //知识库里没有相关的问答 { string nearestKL = ""; if (sim2 > sim1) { nearestKL = GetToken(cor2.text, "<Q2>"); sim1 = sim2; } else { nearestKL = GetToken(cor.text, "<Q1>"); } if (sim1 < nearEPS) { return("未能找到适合的回复。使用下列语句为bot追加语料。" + Environment.NewLine + "<TEA><Q>" + content + "<A>" + Environment.NewLine + "<TEA><Q1><Q2>" + content + "<A>" + Environment.NewLine); } else { string reply = StringAlignment(nearestKL, nearestKL, content); reply = reply.Substring(1, reply.Length - 2); bool res = SM.AddMemory(reply); if (res) { return("已加入缓存。加入的内容为:" + Environment.NewLine + reply + Environment.NewLine + "<NEXT>"); } else { return("内容为:" + reply + Environment.NewLine + "的指令已被写入缓存,请勿重复添加。"); } } } else { content = content.Replace("<NEXT>", ""); string replyNoInference = GetToken(cor.text, "<A>"); string reply = replyNoInference; double maxSim = 0; maxSim = SimilarityByClass(cor.text, content, gram); ChatData ansData = cor; List <ChatData> findList = SM.memory.ConvertAll(chatdata => new ChatData(chatdata.text, chatdata.time)); findList.AddRange(LM.groundTruth); string toAlign = content; foreach (var i in findList) { if (i.text.Contains("<DEL>")) { continue; } string splicedStr = ""; ChatData cur = null; double sim = 0; splicedStr = content + i.text; cur = SingletonClassify(LM.knowledge, splicedStr, gram); sim = SimilarityByClass(cur.text, splicedStr, gram); if (maxSim < sim) { maxSim = sim; toAlign = splicedStr; reply = GetToken(cur.text, "<A>"); ansData = cur; } splicedStr = i.text + content; cur = SingletonClassify(LM.knowledge, splicedStr, gram); sim = SimilarityByClass(cur.text, splicedStr, gram); //Log(sim + " " + maxSim + " " + splicedStr + " " + cur.text); if (maxSim < sim) { maxSim = sim; toAlign = splicedStr; reply = GetToken(cur.text, "<A>"); ansData = cur; } }//<CHAT>d平行于e if (GetToken(ansData.text, "<Q>") != "<NOTFOUND>") { reply = StringAlignment(GetToken(ansData.text, "<Q>"), GetToken(ansData.text, "<A>"), toAlign); } else if (GetToken(ansData.text, "<Q1>") != "<NOTFOUND>" && GetToken(ansData.text, "<Q2>") != "<NOTFOUND>") { reply = StringAlignment(GetToken(ansData.text, "<Q1>") + GetToken(ansData.text, "<Q2>"), GetToken(ansData.text, "<A>"), toAlign); } reply = "回答:" + reply.Substring(1, reply.Length - 2); reply += Environment.NewLine + "原句为:" + ansData.text; reply += Environment.NewLine + "合并句为:" + toAlign; reply += Environment.NewLine + "概率为" + maxSim; return(reply); } }