public static void LoadChatDataList(List <ChatData> data, string fileName)
 {
     using (StreamReader stream = new StreamReader(fileName)) {
         while (!stream.EndOfStream)
         {
             string   time = stream.ReadLine();
             string   text = stream.ReadLine();
             ChatData t    = new ChatData(text, DateTime.ParseExact(time, "MM/dd/yyyy", CultureInfo.InvariantCulture));
             data.Add(t);
         }
     }
 }
示例#2
0
        public bool AddMemory(string x)
        {
            var    nearChat = Chatbot.SingletonClassify(memory, x, gram);
            string near     = (nearChat != null ? nearChat.text : "");

            if (Chatbot.Similarity(near, x, gram) > nearEPS)                 //记忆体已经有相似的东西
            {
                return(false);
            }
            ChatData t = new ChatData(x, DateTime.Now);

            memory.Add(t);
            return(true);
        }
示例#3
0
        //is2PreKnowledge表示 2个前提的知识中去划分出到底是哪个前提
        public static ChatData SingletonClassify(List <ChatData> list, string input, int gram, bool is2PreKnowledge = false)
        {
            ChatData    argmax        = null;
            double      maxLikelihood = 0;
            FeatureList inputDict;
            FeatureList LMDict;

            inputDict = GetFeature(input, gram);
            for (int R = 0; R < list.Count; R++)
            {
                double likelihood = 0;
                if (!is2PreKnowledge)                           //如果是从list中提取全部的话
                {
                    string content = list[R].text;
                    if (content.Contains("<DEL>"))
                    {
                        continue;
                    }
                    if (content.Contains("<Q>"))
                    {
                        content = GetToken(content, "<Q>");
                    }
                    if (content.Contains("<Q1>") && content.Contains("<Q2>"))
                    {
                        content = GetToken(content, "<Q1>") + GetToken(content, "<Q2>");
                    }
                    LMDict = GetFeature(content, gram);
                    foreach (var i in inputDict.feature)
                    {
                        if (LMDict.feature.ContainsKey(i.Key))
                        {
                            likelihood += LMDict.feature[i.Key] * i.Value;
                        }
                    }
                }
                else
                {
                    string content = list[R].text;
                    if (content.Contains("<Q1>") && content.Contains("<Q2>"))
                    {
                        content = GetToken(content, "<Q1>");
                        LMDict  = GetFeature(content, gram);
                        foreach (var i in inputDict.feature)
                        {
                            if (LMDict.feature.ContainsKey(i.Key))
                            {
                                likelihood += LMDict.feature[i.Key] * i.Value;
                            }
                        }
                        if (likelihood > maxLikelihood)
                        {
                            argmax        = list[R];
                            maxLikelihood = likelihood;
                        }
                        likelihood = 0;
                        content    = GetToken(content, "<Q2>");
                        LMDict     = GetFeature(content, gram);
                        foreach (var i in inputDict.feature)
                        {
                            if (LMDict.feature.ContainsKey(i.Key))
                            {
                                likelihood += LMDict.feature[i.Key] * i.Value;
                            }
                        }
                    }                       //else is a <Q> item
                }
                if (likelihood > maxLikelihood)
                {
                    argmax        = list[R];
                    maxLikelihood = likelihood;
                }
            }
            return(argmax);
        }
示例#4
0
 public string BotInput(string input)
 {
     if (input.StartsWith("<HELP>"))
     {
         return(HELP);
     }
     if (input.StartsWith("<TEA>"))
     {
         string content = input.Substring(5, input.Length - 5);
         content = content.Trim();
         if (content == "")
         {
             return("¿");
         }
         return(Teach(content));
     }
     if (input.StartsWith("<READ>"))
     {
         string toOutput = "";
         toOutput += "Ground Truth:" + Environment.NewLine;
         foreach (var i in LM.groundTruth)
         {
             toOutput += i.time + "\t" + i.text + Environment.NewLine;
         }
         toOutput += "Knowledge:" + Environment.NewLine;
         foreach (var i in LM.knowledge)
         {
             toOutput += i.time + "\t" + i.text + Environment.NewLine;
         }
         return(toOutput);
     }
     if (input.StartsWith("<DEL>"))
     {
         string content = input.Substring(5, input.Length - 5);
         content = content.Trim();
         ChatData cor      = SingletonClassify(LM.groundTruth, content, gram);
         ChatData corKL    = SingletonClassify(LM.knowledge, content, gram);
         double   simCOR   = (cor == null ? -114514 : Similarity(cor.text, content, gram));
         double   simCORKL = (corKL == null ? -114514 : Similarity(corKL.text, content, gram));
         if (simCORKL > simCOR)
         {
             cor = corKL;
         }
         if (cor != null && !DELCorrectness)
         {
             DELCorrectness = true;
             DELCorrectItem = content;
             return("请再次输入相同的指令以确认你要删除的知识。" + Environment.NewLine
                    + "要取消删除,输入<DEL>+任意不同指令。" + Environment.NewLine
                    + "要删除的知识是:" + cor.text + Environment.NewLine);
         }
         else if (cor != null && DELCorrectness)
         {
             DELCorrectness = false;
             if (DELCorrectItem == content)
             {
                 DELCorrectItem = "";
                 cor.text       = "<DEL>" + cor.text;
                 SerializeSavor.SaveChatDataList(LM.groundTruth, "LM.txt");
                 SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt");
                 return("删除完成。" + Environment.NewLine + "删除的知识是:" + cor.text + Environment.NewLine);
             }
             else
             {
                 DELCorrectItem = "";
                 return("已取消删除。");
             }
         }
         else
         {
             return("没能找到要删除的知识" + Environment.NewLine);           //防止知识库是空的
         }
     }
     if (input.StartsWith("<CHAT>"))
     {
         string content = input.Substring(6, input.Length - 6);
         content = content.Trim();
         return(Chat(content));
     }
     if (input.StartsWith("<CLEAR>"))
     {
         SM.memory.Clear();
         return("短时记忆清空完成" + Environment.NewLine);
     }
     if (input.StartsWith("<MEM>"))
     {
         return(SM.outputMemory());
     }
     if (input.StartsWith("<ADDMEM>"))
     {
         string content = input.Substring(8, input.Length - 8);
         content = content.Trim();
         bool res = SM.AddMemory(content);
         if (res)
         {
             return("已加入缓存。加入的内容为:" + Environment.NewLine + content + Environment.NewLine + "<NEXT>");
         }
         else
         {
             return("内容为:" + content + Environment.NewLine + "的指令已被写入缓存,请勿重复添加。");
         }
     }
     return("¿");
 }
示例#5
0
 private string Teach(string content)
 {
     if (content.Contains("<Q>") && content.Contains("<A>"))           //同时含有<Q>和<A>
     //<TEA><Q>f**k you<A>f**k you leather man
     {
         string   Q       = GetToken(content, "<Q>");
         string   A       = GetToken(content, "<A>");
         string   text    = "<Q>" + Q + "<A>" + A;
         ChatData x       = new ChatData(text, DateTime.Now);
         ChatData nearest = SingletonClassify(LM.knowledge, x.text, gram);
         if (nearest == null || Similarity(x.text, nearest.text, gram) < nearEPS)
         {
             LM.knowledge.Add(x);
             SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt");
             return("Q: " + Q + Environment.NewLine + "A:" + A + Environment.NewLine + "学习完成");
         }
         else
         {
             double sim = Similarity(x.text, nearest.text, gram);
             if (sim > INF - 1)
             {
                 return("知识库中已经有相同的知识!");
             }
             else
             {
                 if (content.Contains("<EXACT>"))
                 {
                     LM.knowledge.Add(x);
                     SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt");
                     return("Q: " + Q + Environment.NewLine + "A:" + A + Environment.NewLine + "学习完成" + Environment.NewLine);
                 }
                 else
                 {
                     return("知识库中已有相似知识。请在<TEA>后追加<EXACT>指令。" + Environment.NewLine
                            + "相似知识为:" + nearest.text + Environment.NewLine);
                 }
             }
         }
     }
     else if (content.Contains("<G>"))
     {
         string   G       = GetToken(content, "<G>");
         string   text    = G;
         ChatData x       = new ChatData(text, DateTime.Now);
         ChatData nearest = SingletonClassify(LM.groundTruth, x.text, gram);
         if (nearest == null || Similarity(x.text, nearest.text, gram) < nearEPS)
         {
             LM.groundTruth.Add(x);
             SerializeSavor.SaveChatDataList(LM.groundTruth, "LM.txt");
             return(G + Environment.NewLine + "学习完成");
         }
         else
         {
             double sim = Similarity(x.text, nearest.text, gram);
             if (sim > INF - 1)
             {
                 return("知识库中已经有相同的知识!");
             }
             else
             {
                 if (content.Contains("<EXACT>"))
                 {
                     LM.groundTruth.Add(x);
                     SerializeSavor.SaveChatDataList(LM.groundTruth, "LM.txt");
                     return(G + Environment.NewLine + "学习完成");
                 }
                 else
                 {
                     return("知识库中已有相似知识。请在<TEA>后追加<EXACT>指令。" + Environment.NewLine
                            + "相似知识为:" + nearest.text + Environment.NewLine);
                 }
             }
         }
     }
     else if (content.Contains("<A>") && content.Contains("<Q1>") && content.Contains("<Q2>"))
     {
         string   Q1      = GetToken(content, "<Q1>");
         string   Q2      = GetToken(content, "<Q2>");
         string   A       = GetToken(content, "<A>");
         string   text    = "<Q1>" + Q1 + "<Q2>" + Q2 + "<A>" + A;
         ChatData x       = new ChatData(text, DateTime.Now);
         ChatData nearest = SingletonClassify(LM.knowledge, x.text, gram);
         if (nearest == null || Similarity(x.text, nearest.text, gram) < nearEPS)
         {
             LM.knowledge.Add(x);
             SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt");
             return("Q1: " + Q1 + Environment.NewLine
                    + "Q2" + Q2 + Environment.NewLine
                    + "A:" + A + Environment.NewLine + "学习完成");
         }
         else
         {
             double sim = Similarity(x.text, nearest.text, gram);
             if (sim > INF - 1)
             {
                 return("知识库中已经有相同的知识!");
             }
             else
             {
                 if (content.Contains("<EXACT>"))
                 {
                     LM.knowledge.Add(x);
                     SerializeSavor.SaveChatDataList(LM.knowledge, "KL.txt");
                     return("Q1: " + Q1 + Environment.NewLine
                            + "Q2" + Q2 + Environment.NewLine
                            + "A:" + A + Environment.NewLine + "学习完成");
                 }
                 else
                 {
                     return("知识库中已有相似知识。请在<TEA>后追加<EXACT>指令。" + Environment.NewLine
                            + "相似知识为:" + nearest.text + Environment.NewLine);
                 }
             }
         }
     }
     else
     {
         return("格式错误。");
     }
 }
示例#6
0
        public string Chat(string content)
        {
            ChatData cor  = SingletonClassify(LM.knowledge, content, gram);
            ChatData cor2 = SingletonClassify(LM.knowledge, content, gram, true);
            double   simA = Similarity(cor.text, content, gram);
            double   sim1 = (cor2 == null ? -114514 : Similarity(GetToken(cor2.text, "<Q1>"), content, gram));
            double   sim2 = (cor2 == null ? -114514 : Similarity(GetToken(cor2.text, "<Q2>"), content, gram));

            if ((cor == null ||
                 simA < Math.Max(sim1, sim2)) && GetToken(content, "<NEXT>").CompareTo("<NOTFOUND>") == 0)                           //知识库里没有相关的问答
            {
                string nearestKL = "";
                if (sim2 > sim1)
                {
                    nearestKL = GetToken(cor2.text, "<Q2>");
                    sim1      = sim2;
                }
                else
                {
                    nearestKL = GetToken(cor.text, "<Q1>");
                }
                if (sim1 < nearEPS)
                {
                    return("未能找到适合的回复。使用下列语句为bot追加语料。" + Environment.NewLine
                           + "<TEA><Q>" + content + "<A>" + Environment.NewLine
                           + "<TEA><Q1><Q2>" + content + "<A>" + Environment.NewLine);
                }
                else
                {
                    string reply = StringAlignment(nearestKL, nearestKL, content);
                    reply = reply.Substring(1, reply.Length - 2);
                    bool res = SM.AddMemory(reply);
                    if (res)
                    {
                        return("已加入缓存。加入的内容为:" + Environment.NewLine + reply + Environment.NewLine + "<NEXT>");
                    }
                    else
                    {
                        return("内容为:" + reply + Environment.NewLine + "的指令已被写入缓存,请勿重复添加。");
                    }
                }
            }
            else
            {
                content = content.Replace("<NEXT>", "");
                string replyNoInference = GetToken(cor.text, "<A>");
                string reply            = replyNoInference;
                double maxSim           = 0;
                maxSim = SimilarityByClass(cor.text, content, gram);
                ChatData        ansData  = cor;
                List <ChatData> findList = SM.memory.ConvertAll(chatdata => new ChatData(chatdata.text, chatdata.time));
                findList.AddRange(LM.groundTruth);
                string toAlign = content;
                foreach (var i in findList)
                {
                    if (i.text.Contains("<DEL>"))
                    {
                        continue;
                    }
                    string   splicedStr = "";
                    ChatData cur        = null;
                    double   sim        = 0;

                    splicedStr = content + i.text;
                    cur        = SingletonClassify(LM.knowledge, splicedStr, gram);
                    sim        = SimilarityByClass(cur.text, splicedStr, gram);
                    if (maxSim < sim)
                    {
                        maxSim  = sim;
                        toAlign = splicedStr;
                        reply   = GetToken(cur.text, "<A>");
                        ansData = cur;
                    }

                    splicedStr = i.text + content;
                    cur        = SingletonClassify(LM.knowledge, splicedStr, gram);
                    sim        = SimilarityByClass(cur.text, splicedStr, gram);
                    //Log(sim + " " + maxSim + " " + splicedStr + " " + cur.text);
                    if (maxSim < sim)
                    {
                        maxSim  = sim;
                        toAlign = splicedStr;
                        reply   = GetToken(cur.text, "<A>");
                        ansData = cur;
                    }
                }//<CHAT>d平行于e
                if (GetToken(ansData.text, "<Q>") != "<NOTFOUND>")
                {
                    reply = StringAlignment(GetToken(ansData.text, "<Q>"), GetToken(ansData.text, "<A>"), toAlign);
                }
                else if (GetToken(ansData.text, "<Q1>") != "<NOTFOUND>" && GetToken(ansData.text, "<Q2>") != "<NOTFOUND>")
                {
                    reply = StringAlignment(GetToken(ansData.text, "<Q1>") + GetToken(ansData.text, "<Q2>"), GetToken(ansData.text, "<A>"), toAlign);
                }
                reply  = "回答:" + reply.Substring(1, reply.Length - 2);
                reply += Environment.NewLine + "原句为:" + ansData.text;
                reply += Environment.NewLine + "合并句为:" + toAlign;
                reply += Environment.NewLine + "概率为" + maxSim;
                return(reply);
            }
        }