//麻求烦的,直接写一个函数,判断子式(假如有重复子式)在二叉树中的高度吧,反正不管是查询表达式还是结果表达式,都得判断某个子式在二叉树的高度吗? public double ZiShiBTlevel(AAAAData node, String QueryOrResultLaTeX) { //首先获取不管是查询表达式还是结果表达式的“字符串” List <AAAAData> List = new List <AAAAData>(); List = Test4(QueryOrResultLaTeX); double MinBTLevel = 1000;//记录最大高度 foreach (var it in List) { //遍历整个二叉树的节点,和整个二叉树的每一个节点一个一个去比 //如果第一次找到相同的子式的话 if (node.str.Equals(it.str)) { if (it.BTLevel < MinBTLevel) { MinBTLevel = it.BTLevel; } } } //if (MinBTLevel == 1000)//如果这个值还是1000,说明根本没找到 //{ // MinBTLevel = 38 - node.BTLevel; //} return(MinBTLevel); }
//=================这个是我把子式和数学表达式插入数据库======================= //这个没啥用 public void practise() { AAAAData data1 = new AAAAData(); data1.str = "张三"; data1.BTLevel = 1; AAAAData data2 = new AAAAData(); data2.str = "张三"; data2.BTLevel = 2; //if (data1.Equals(data2)) //{ // Console.WriteLine("两者相等"); //} //else //{ // Console.WriteLine("两者不相等"); //} Dictionary <AAAAData, List <String> > dic = new Dictionary <AAAAData, List <String> >(); List <String> lis = new List <string>(); lis.Add("1"); dic.Add(data1, lis); foreach (var it in dic) { if (it.Key.Equals(data2)) { //dic[data2] = 2; dic[it.Key].Add("2"); } } foreach (var it in dic) { //Console.WriteLine("==========================="); //Console.WriteLine(it.Value); foreach (var itt in it.Value) { Console.WriteLine("===========================" + itt); } } }
//下面这个相当于把默认的函数库里面的方法就给重写了 public override bool Equals(object obj) { if (obj == null) { return(false); } if (obj is AAAAData) { AAAAData stu = obj as AAAAData; if (stu.BTLevel == this.BTLevel && stu.str == this.str) { return(true); } else { return(false); } } else { throw new ArgumentException("类型转换异常"); } }
public void sort(String queryLaTeX) { //=========用来判断节点个数============== AdjacentNode anode = new AdjacentNode(); //=========用来判断节点个数============== //Console.WriteLine("草拟吗了"); //======================================第二步:排序时间的测量===================================================================== System.Diagnostics.Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); // 开始监视代码运行时间 //第一步:获取查询表达式的所有子式集合 List <AAAAData> queryLaTeXList = new List <AAAAData>(); queryLaTeXList = Test4(queryLaTeX); //第二步:获取所有结果表达式集合(字符串) List <String> resultLaTeXList = new List <string>(); resultLaTeXList = getResultLaTeXList(queryLaTeX); //第三步:开始对查询表达式和每一个结果表达式进行打分,那就遍历每一个结果表达式吧 //======================!!!!!“查询表达式”的各个子式的分数依次是固定的========================== List <double> queryScore = new List <double>(); foreach (var queryZiShi in queryLaTeXList) { queryScore.Add(ZiShiBTlevel(queryZiShi, queryLaTeX)); } //======================!!!!!“查询表达式”的各个子式的分数依次是固定的========================== //======================放结果的最终需要排序的========================================== List <AAAAData> FinalList = new List <AAAAData>(); //======================放结果的最终需要排序的========================================== foreach (var resultLaTeX in resultLaTeXList) { //===================!!!!!对于每一个结果表达式,肯定也对应一个分数集合吧===================== List <double> resultScore = new List <double>(); foreach (var queryZiShiNode in queryLaTeXList) { if (ZiShiBTlevel(queryZiShiNode, resultLaTeX.ToLower()) == 1000)//说明结果表达式不存在查询子式 { //resultScore.Add(-(queryZiShiNode.BTLevel+1)); //这里没有的话我就弱化一下吧 resultScore.Add(-queryZiShiNode.BTLevel); } else { resultScore.Add(ZiShiBTlevel(queryZiShiNode, resultLaTeX.ToLower())); } } //===================!!!!!对于每一个结果表达式,肯定也对应一个分数集合吧===================== //==================================开始查询与结果计算评分===================================== //开始利用公式计算 double sum = 0; for (int k = 0; k < queryScore.Count; k++) { //sum = sum + Math.Abs((14 - queryScore[k]) - (14 - resultScore[k])) / (27 - Min(14 - queryScore[k], 14 - resultScore[k])); //sum = sum + Math.Abs((14 - queryScore[k]) - (14 - resultScore[k])) / (27); // 拉姆达=1 sum = sum + ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)); // 拉姆达=2 //sum = sum + ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)); // 拉姆达=3 //sum = sum + ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)); // 拉姆达=4 //sum = sum + ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)) * ((Math.Abs((14 - queryScore[k]) - (14 - resultScore[k]))) / (27)); // 拉姆达=5 } //=====================在这里起码补充一下查询树和结果树之间的差距的距离吧????????????=========================== //sum = sum + Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower())) / (27 - Min(14 - MaxBTLevel(queryLaTeX), 14 - MaxBTLevel(resultLaTeX.ToLower()))); //sum = sum + Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower())) / (27); //拉姆达=1 sum = sum + ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)); //拉姆达=2 //sum = sum + ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)); //拉姆达=3 //sum = sum + ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)); //拉姆达=4 //sum = sum + ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)) * ((Math.Abs(MaxBTLevel(queryLaTeX) - MaxBTLevel(resultLaTeX.ToLower()))) / (27)); //拉姆达=5 //这里是节点个数不咋地 //sum = sum + Math.Abs(anode.AdjacentNodeList(queryLaTeX).Count - anode.AdjacentNodeList(resultLaTeX.ToLower()).Count) / (27); //=====================在这里起码补充一下查询树和结果树之间的差距的距离吧????????????=========================== //=====================这里我想连接一下查询表达式各个分数和结果表达式分数 String queryStr = ""; foreach (var it in queryScore) { queryStr = queryStr + it + "#"; } queryStr = queryStr + MaxBTLevel(queryLaTeX); String resultStr = ""; foreach (var it in resultScore) { resultStr = resultStr + it + "#"; } resultStr = resultStr + MaxBTLevel(resultLaTeX.ToLower()); //=====================这里我想连接一下查询表达式各个分数和结果表达式分数 //==================================开始查询与结果计算评分===================================== //==================把打好分的结果以节点形式放入一个集合当中=========================== AAAAData data = new AAAAData(); sum = sum / (queryScore.Count + 1);//??????????????????????这里加1也不对啊?????????????????????? //data.BTLevel = 1 - sum;//拉姆达=1 data.BTLevel = 1 - Math.Sqrt(sum);//拉姆达=2 //data.BTLevel = 1 - Math.Pow(sum,0.3333333333);//拉姆达=3 //data.BTLevel = 1 - Math.Pow(sum,0.25);//拉姆达=4 //data.BTLevel = 1 - Math.Pow(sum, 0.2);//拉姆达=5 data.str = resultLaTeX; data.queryStr = queryStr; data.resultStr = resultStr; FinalList.Add(data); //==================把打好分的结果以节点形式放入一个集合当中=========================== } //======================对最终结果进行排序====================== AAAAData tempdata; for (int i = 0; i < FinalList.Count - 1; i++) { for (int j = i + 1; j < FinalList.Count; j++) { if (FinalList[j].BTLevel > FinalList[i].BTLevel) { tempdata = FinalList[j]; FinalList[j] = FinalList[i]; FinalList[i] = tempdata; } } } //======================对最终结果进行排序====================== //======================打印输出================================ //foreach (var it in FinalList) //{ // Console.WriteLine("结果表达式:"+it.str + "\t" + "查询表达式打分:" + it.queryStr + "\t" + "结果表达式打分:" + it.resultStr + "\t" + "相似度得分为:" + it.BTLevel); //} //===========我想去个重================== List <AAAAData> quchong = new List <AAAAData>(); foreach (var it in FinalList) { if (quchong.Count == 0) { quchong.Add(it); } else { int biaozhi = 0; foreach (var itt in quchong) { if (it.str.Equals(itt.str))//如果出现重复了 { biaozhi = 1; break; } } if (biaozhi == 0)//说明没有出现重复了,你染没有重复,那就读取对应的结果表达式 { quchong.Add(it); } } } int counttt = 1; foreach (var it in quchong) { if (it.str.Equals("\\frac{a+b}{2}\\geq\\sqrt{a*b}")) { } Console.WriteLine("==========================================================第" + counttt + "个===================================================="); Console.WriteLine("结果表达式:" + it.str + "\t" + "查询表达式打分:" + it.queryStr + "\t" + "结果表达式打分:" + it.resultStr + "\t" + "相似度得分为:" + it.BTLevel); Console.WriteLine(""); Console.WriteLine(""); counttt++; } //===========我想去个重================== //======================打印输出================================ //======================================第二步:排序时间的测量===================================================================== stopwatch.Stop(); // 停止监视 TimeSpan timespan = stopwatch.Elapsed; // 获取当前实例测量得出的总时间 double hours = timespan.TotalHours; // 总小时 double minutes = timespan.TotalMinutes; // 总分钟 double seconds = timespan.TotalSeconds; // 总秒数 double milliseconds = timespan.TotalMilliseconds; // 总毫秒数 string timePast = "耗时" + seconds + "秒,即" + milliseconds + "毫秒!"; Console.WriteLine("========================================第二步:排序时间为:" + timePast); }
////我把第一篇论文的实验数据的子式写入数据库吧 //public void Test3() //{ // // LaTeX,子式节点 // Dictionary<AAAAData, List<String>> dic = new Dictionary<AAAAData, List<String>>(); // StreamReader sr = new StreamReader("C:\\Users\\dell\\Desktop\\暑假\\实验数据\\1.txt", Encoding.Default); // String read = sr.ReadLine(); // //第一步:开始读取每一个数学表达式了啊 // while (read != null) // { // //第二步:直接构成倒排索引吧,省的以后要是有重复的数学表达式啥的 // //对于输入进来的每一个数学公式,把它解析为各个子式 // List<AAAAData> li = new List<AAAAData>(); // li = Test4(read); // int biaozhi = 0;//每进来一个子式,就记作一个标志 // //对于每一个子式节点,我需要插入字典,作为倒排索引 // foreach (var it in li) // { // //if (dic.Count == 0) // //{ // // List<String> list = new List<string>(); // // list.Add(read); // // dic.Add(it,list); // // Console.WriteLine("第一个if"); // //} // //else // //{ // // //我去遍历字典的键值对象 // // foreach (var zidian in dic) // // { // // //如果字典里面的倒排索引有某一个子式了 // // if (zidian.Key.Equals(it) && (biaozhi == 0)) // // { // // Console.WriteLine("第二个if"); // // dic[zidian.Key].Add(read); // // biaozhi++; // // } // // else if(!zidian.Key.Equals(it)) // // { // // List<String> list = new List<string>(); // // list.Add(read); // // dic.Add(it, list); // // } // // } // //} // //========================================================== // if (dic.Count == 0) // { // List<String> list = new List<string>(); // list.Add(read); // dic.Add(it, list); // continue; // } // int a = 0;//这是个判断标志,下面开始遍历,如果找到,那么a=1,没找到a=0 // AAAAData tempKey = null;//定义临时键存储文件名字 // foreach (var item in dic) // { // if (item.Key.Equals(it)) // { // a = 1; // tempKey = item.Key;//其实不用定义,因为如果找到,indexes.FileName与item.Key相等,下面那个“[]中括号”找谁不都一样么,多此一举 // break;//一旦找到说明找到了,那么就可以终止了,此时等于一加个break,否则又出bug了 // } // } // if (a == 1)//说明找到了 // { // dic[tempKey].Add(read); // } // else if (a == 0)//说明没找到 // { // List<String> list = new List<string>(); // list.Add(read); // dic.Add(it, list); // } // //========================================================== // }//循环里面 // read = sr.ReadLine(); // } // ////这里把数据倒排插入数据库 // //bool panduan; // //string sqlstr = "insert into Test1(子式,子式所在高度,数学表达式) values ('" + node + "','" + filename + "','" + path + "')"; // //panduan = DBhelper.InsertUpdateDal(sqlstr); // //if (panduan == true) // //{ // // Console.Write("操作成功"); // //} // //else // //{ // // Console.Write("操作失败"); // //} //} //我先试一试获取数学表达式子式 public List <AAAAData> Test4(String LaTeX) { //开始层次遍历 List <AAAAData> list = new List <AAAAData>(); BinaryTreeNode bnode = new BinaryTreeNode(); FinalNode1 root = new FinalNode1(); FinalNode1 tempNode = new FinalNode1(); root = bnode.getNode(LaTeX); //Console.WriteLine("来看看第一个节点:" + root.zifu); root.xuhao = 1; tempNode = root;//临时指向根结点 Queue <FinalNode1> q = new Queue <FinalNode1>(); q.Enqueue(root); //Console.WriteLine("下一个来看看第一个节点:" + q.First().zifu); while (q.Count != 0) { //指向队列第一个字符 tempNode = q.First(); //Console.WriteLine("什么鬼啊下下一个来看看第一个节点:" + tempNode.zifu); //下面这个再加个判断就行了就是if(tempNode.left == null && tempNode.right == null),tempNode就是出队节点 //=======!!!!!!!!!!!!!!这里注意:因为有根号的了,所以不能直接判断左右为空,而我根号默认是左节点!!!!!!!!!!!!!!!!! if ((tempNode.left != null && tempNode.right != null) || (tempNode.left != null && tempNode.right == null)) { AAAAData data = new AAAAData(); data.BTLevel = tempNode.BTreeLevel; data.str = Test5(tempNode); list.Add(data); //Console.WriteLine("这里应该是根号:" + tempNode.zifu); } //第一个节点出队 q.Dequeue(); if (tempNode.left != null) { tempNode.left.xuhao = tempNode.xuhao * 2; //Console.WriteLine("++++++++++++++left" + tempNode.left.xuhao); q.Enqueue(tempNode.left); } if (tempNode.right != null) { tempNode.right.xuhao = tempNode.xuhao * 2 + 1; //Console.WriteLine("++++++++++++++" + tempNode.right.xuhao); q.Enqueue(tempNode.right); } } return(list); }
public void Test2(String queryLaTeX) { StreamReader sr = new StreamReader("C:\\Users\\dell\\Desktop\\暑假\\实验数据\\1.txt", Encoding.Default); String read = sr.ReadLine(); List <AAAAData> Final = new List <AAAAData>(); while (read != null) { //获取子式 ChildrenBTree children = new ChildrenBTree(); Dictionary <int, List <FinalNode1> > aaa = new Dictionary <int, List <FinalNode1> >(); aaa = children.childrenBTree(read); String strs = ""; //Console.WriteLine("草拟吗:"+read); foreach (var aaaa in aaa) { //每一个子式 String str = ""; foreach (var aaaaa in aaaa.Value) { str = str + aaaaa.zifu; } Dictionary <int, List <FinalNode1> > dics = new Dictionary <int, List <FinalNode1> >(); AAAA A = new AAAA(); dics = A.BTLevelScore(str, read); if (dics == null || dics.Count == 0) { //Console.WriteLine("草擦曹操"); read = sr.ReadLine(); continue; } //下面这个是统计查询表达式a+b,在结果表达式a+b+c+a+b,的两个a+b的最小树的层次啊 foreach (var it in dics) { //Console.WriteLine("键值:" + it.Key); int min = 10000; foreach (var itt in it.Value) { if (itt.BTreeLevel < min) { min = itt.BTreeLevel; } //Console.WriteLine(itt.zifu+"\t"+itt.BTreeLevel); } strs = strs + min + "#"; } } //============================= //统计一下负数的那个,树的高度越高,它的负值越大,负的越厉害。注意现在统计的是结果表达式的最大高度,也就是read int fushu = 0; int maxBTLevel = 0; List <FinalNode1> lists = new List <FinalNode1>(); AdjacentNode aa = new AdjacentNode(); lists = aa.AdjacentNodeList(read); foreach (var it in lists) { if (it.BTreeLevel > fushu) { fushu = it.BTreeLevel; } } fushu = (-1) * fushu; strs = strs + fushu + "#" + read; //Console.WriteLine(str); //已经得到一个查询表达式在结果表达式里面的层次级别了,开始计算分数了 String[] news = strs.Split('#'); List <int> list = new List <int>(); for (int i = 0; i < news.Length - 1; i++) { list.Add(Convert.ToInt32(news[i])); //Console.WriteLine("草:"+news[i]); } //开始排序 int tempdata; for (int i = 0; i < list.Count - 1; i++) { for (int j = i + 1; j < list.Count; j++) { if (list[j] < list[i]) { tempdata = list[j]; list[j] = list[i]; list[i] = tempdata; } } } //开始利用公式计算 double sum = 0; for (int i = 0; i < news.Length - 1; i++) { sum = sum + Math.Abs(19 - (20 - Convert.ToDouble(news[i]))) / (19); //Console.WriteLine("看看结果:"+sum); } sum = sum / (news.Length - 1); AAAAData d = new AAAAData(); d.BTLevel = 1 - sum; d.str = read; Final.Add(d); //============================= read = sr.ReadLine(); } //开始排序 AAAAData tempdatas; for (int i = 0; i < Final.Count - 1; i++) { for (int j = i + 1; j < Final.Count; j++) { if (Final[j].BTLevel > Final[i].BTLevel) { tempdatas = Final[j]; Final[j] = Final[i]; Final[i] = tempdatas; } } } foreach (var it in Final) { Console.WriteLine("公式为:" + it.str + "\t" + "相似度为:" + it.BTLevel); } }