public void Test1(String queryLaTeX) { StreamReader sr = new StreamReader("C:\\Users\\dell\\Desktop\\暑假\\实验数据\\1.txt", Encoding.Default); String read = sr.ReadLine(); Dictionary <String, List <String> > dicnew = new Dictionary <string, List <string> >(); while (read != null) { ChildrenBTree t = new ChildrenBTree(); Dictionary <int, List <FinalNode1> > dic = new Dictionary <int, List <FinalNode1> >(); dic = t.childrenBTree(read); foreach (var it in dic) { Console.WriteLine(it.Key); String str = null; foreach (var itt in it.Value) { str = str + itt.zifu; } Console.WriteLine("结果:" + str); if (dicnew.Count == 0) { List <String> list = new List <String>(); list.Add(read); dicnew.Add(str, list); } else { if (dicnew.ContainsKey(str)) { dicnew[str].Add(read); } else { List <String> list = new List <String>(); list.Add(read); dicnew.Add(str, list); } } } read = sr.ReadLine(); } ChildrenBTree queryziFu = new ChildrenBTree(); Dictionary <int, List <FinalNode1> > querydic = new Dictionary <int, List <FinalNode1> >(); querydic = queryziFu.childrenBTree(queryLaTeX); String strs = null; foreach (var it in querydic) { foreach (var itt in it.Value) { strs = strs + itt.zifu; } } List <AAAAData> dataList = new List <AAAAData>(); //这个是查询公式对应的数学公式 foreach (var it in dicnew[strs]) { //开始处理每一个数学公式it ChildrenBTree t = new ChildrenBTree(); Dictionary <int, List <FinalNode1> > dicc = new Dictionary <int, List <FinalNode1> >(); dicc = t.childrenBTree(it); String tempStr = null; foreach (var itt in dicc) { foreach (var ittt in itt.Value) { tempStr = tempStr + ittt.zifu; } if (strs.Equals(tempStr)) { } } } }
public void Test2(String queryLaTeX) { StreamReader sr = new StreamReader("C:\\Users\\dell\\Desktop\\暑假\\实验数据\\1.txt", Encoding.Default); String read = sr.ReadLine(); List <AAAAData> Final = new List <AAAAData>(); while (read != null) { //获取子式 ChildrenBTree children = new ChildrenBTree(); Dictionary <int, List <FinalNode1> > aaa = new Dictionary <int, List <FinalNode1> >(); aaa = children.childrenBTree(read); String strs = ""; //Console.WriteLine("草拟吗:"+read); foreach (var aaaa in aaa) { //每一个子式 String str = ""; foreach (var aaaaa in aaaa.Value) { str = str + aaaaa.zifu; } Dictionary <int, List <FinalNode1> > dics = new Dictionary <int, List <FinalNode1> >(); AAAA A = new AAAA(); dics = A.BTLevelScore(str, read); if (dics == null || dics.Count == 0) { //Console.WriteLine("草擦曹操"); read = sr.ReadLine(); continue; } //下面这个是统计查询表达式a+b,在结果表达式a+b+c+a+b,的两个a+b的最小树的层次啊 foreach (var it in dics) { //Console.WriteLine("键值:" + it.Key); int min = 10000; foreach (var itt in it.Value) { if (itt.BTreeLevel < min) { min = itt.BTreeLevel; } //Console.WriteLine(itt.zifu+"\t"+itt.BTreeLevel); } strs = strs + min + "#"; } } //============================= //统计一下负数的那个,树的高度越高,它的负值越大,负的越厉害。注意现在统计的是结果表达式的最大高度,也就是read int fushu = 0; int maxBTLevel = 0; List <FinalNode1> lists = new List <FinalNode1>(); AdjacentNode aa = new AdjacentNode(); lists = aa.AdjacentNodeList(read); foreach (var it in lists) { if (it.BTreeLevel > fushu) { fushu = it.BTreeLevel; } } fushu = (-1) * fushu; strs = strs + fushu + "#" + read; //Console.WriteLine(str); //已经得到一个查询表达式在结果表达式里面的层次级别了,开始计算分数了 String[] news = strs.Split('#'); List <int> list = new List <int>(); for (int i = 0; i < news.Length - 1; i++) { list.Add(Convert.ToInt32(news[i])); //Console.WriteLine("草:"+news[i]); } //开始排序 int tempdata; for (int i = 0; i < list.Count - 1; i++) { for (int j = i + 1; j < list.Count; j++) { if (list[j] < list[i]) { tempdata = list[j]; list[j] = list[i]; list[i] = tempdata; } } } //开始利用公式计算 double sum = 0; for (int i = 0; i < news.Length - 1; i++) { sum = sum + Math.Abs(19 - (20 - Convert.ToDouble(news[i]))) / (19); //Console.WriteLine("看看结果:"+sum); } sum = sum / (news.Length - 1); AAAAData d = new AAAAData(); d.BTLevel = 1 - sum; d.str = read; Final.Add(d); //============================= read = sr.ReadLine(); } //开始排序 AAAAData tempdatas; for (int i = 0; i < Final.Count - 1; i++) { for (int j = i + 1; j < Final.Count; j++) { if (Final[j].BTLevel > Final[i].BTLevel) { tempdatas = Final[j]; Final[j] = Final[i]; Final[i] = tempdatas; } } } foreach (var it in Final) { Console.WriteLine("公式为:" + it.str + "\t" + "相似度为:" + it.BTLevel); } }
public List <String> Read(String queryLaTeX) { StreamReader sr = new StreamReader("C:\\Users\\Administrator\\Desktop\\第二篇论文实验读写数据库\\2.txt", Encoding.Default); List <AAIndex> list = new List <AAIndex>(); String read = sr.ReadLine(); //int a = 0; while (read != null) { //a++; //if (a == 6742) // break; String[] re = read.Split('#'); AAIndex index = new AAIndex(); index.zishi = re[0]; //Console.WriteLine("txt文件中子式为:"+re[0]); index.zishiStructure = re[1]; index.LaTeX = re[2]; list.Add(index); // Console.WriteLine(re[2]); read = sr.ReadLine(); } System.Diagnostics.Stopwatch stop = new System.Diagnostics.Stopwatch(); stop.Start();//开始监视代码运行时间 //现在相当于当成数据库了,这个list存储着属性为子式、子式结构和数学公式的集合 Dictionary <int, List <FinalNode1> > children = new Dictionary <int, List <FinalNode1> >(); ChildrenBTree childrenBTree = new ChildrenBTree(); children = childrenBTree.childrenBTree(queryLaTeX); //用一个集合存储最终查询后的结果表达式的集合 List <String> resultList = new List <String>(); //这个是获取一个LaTeX的每一个子式 foreach (var it in children) { //定义每一个子式 String zishi = ""; foreach (var itt in it.Value) { zishi = zishi + itt.zifu; } //Console.WriteLine("AARead.cs:"+zishi); //先查询包含子式的集合 foreach (var itts in list.Where(p => p.zishi.Equals(zishi)).ToList()) { //Console.WriteLine(itts.LaTeX); resultList.Add(itts.LaTeX); } } //foreach (var it in resultList) //{ // Console.WriteLine("草泥马有没有啊:"+it); //} //这个是获取一个LaTeX的每一个子式结构 foreach (var it in children) { //定义每一个子式结构 String zishiStructure = ""; foreach (var itt in it.Value) { if (isSpecialYunSuanShu(itt.zifu)) { } else { zishiStructure = zishiStructure + itt.zifu; } } //Console.WriteLine("AARead.cs:" + zishiStructure); if (zishiStructure.Equals("")) { continue; } //先查询包含子式的集合 foreach (var itts in list.Where(p => p.zishiStructure.Equals(zishiStructure)).ToList()) { resultList.Add(itts.LaTeX); } } stop.Stop(); //代码结束时间 TimeSpan timespan = stop.Elapsed; double milliseconds = timespan.TotalMilliseconds; //总毫秒 //Console.WriteLine("查询所得数学表达式时间为:" + milliseconds + "毫秒"); return(resultList); }
}//第三个:查询表达式的子式在结果表达式里面相同的个数占结果表达式总子式的比值,作为公式覆盖度 public double similarChildrenCounts(String resultLaTeX, String queryLaTeX) { Dictionary <int, List <FinalNode1> > queryLaTeXchildren = new Dictionary <int, List <FinalNode1> >(); Dictionary <int, List <FinalNode1> > resultLaTeXchildren = new Dictionary <int, List <FinalNode1> >(); ChildrenBTree childrenBTree = new ChildrenBTree(); //字典类型,集合里面存的是每一个子式FinalNode节点类型的,中序遍历后的,比如 //比如a+b+c,存的是a,+,b,这是一个个FinalNode节点类型的字符,所以我得把里面的字符提出来放在一个新集合中,才能好比较 queryLaTeXchildren = childrenBTree.childrenBTree(queryLaTeX); resultLaTeXchildren = childrenBTree.childrenBTree(resultLaTeX); //字典类型 List <String> queryChildren = new List <String>(); //定义专门存放查询表达式的子式集合,因为这样好求相似个数 List <String> resultChildren = new List <String>(); //定义专门存放结果表达式的子式集合,因为这样好求相似个数 //先把查询表达式字典里面的一个个节点的字符“串”成一个字符串 foreach (var it in queryLaTeXchildren) { String temp = "";//对于每一个子式,都需要一个字符串来“串或者加起来” if (it.Value.Count > 1) { foreach (var itt in it.Value) { temp = temp + itt.zifu;//加起来之后就是一个“子式”字符串 } } queryChildren.Add(temp); } //================这个是不包含全部的============== //int mostLength = queryChildren[0].Length; //int mostLengthNum = 0; //for (int i = 1; i < queryChildren.Count; i++) //{ // if (queryChildren[i].Length > mostLength) // { // mostLength = queryChildren[i].Length; // mostLengthNum = i; // } //} //queryChildren.RemoveAt(mostLengthNum); //================这个是不包含全部的============== //然后把结果表达式字典里面的一个个节点的字符“串”成一个字符串 foreach (var it in resultLaTeXchildren) { String temp = "";//对于每一个子式,都需要一个字符串来“串或者加起来” if (it.Value.Count > 1) { foreach (var itt in it.Value) { temp = temp + itt.zifu;//加起来之后就是一个“子式”字符串 } } resultChildren.Add(temp); } //================这个是不包含全部的============== //int mostLengths = resultChildren[0].Length; //int mostLengthNums = 0; //for (int i = 1; i < resultChildren.Count; i++) //{ // if (resultChildren[i].Length > mostLengths) // { // mostLengths = resultChildren[i].Length; // mostLengthNums = i; // } //} //resultChildren.RemoveAt(mostLengthNums); //================这个是不包含全部的============== //==================================== //foreach (var it in queryChildren) //{ // Console.WriteLine("查询表达式子式:"+it); //} //foreach (var it in resultChildren) //{ // Console.WriteLine("结果表达式子式:" + it); //} //=================================== //现在就开始算查询表达式的子式和结果表达式的子式,然后求前者占后者的比重 List <String> jiaoji = new List <String>(); jiaoji = queryChildren.Intersect(resultChildren).ToList(); //if (queryChildren.Count == 0) // return 0; double score = 0; score = Convert.ToDouble(jiaoji.Count) / Convert.ToDouble(queryChildren.Count); //Console.WriteLine("2交集个数:" + jiaoji.Count); //Console.WriteLine("2结果表达式子式个数==============:" + resultChildren.Count); //Console.WriteLine("2查询表达式子式个数==============:" + queryChildren.Count); //Console.WriteLine("2结果表达式占查询表达式分数======:" + score); return(score); }//第三个:结果表达式的子式在查询表达式里面相同的个数占查询表达式总子式的比值,作为公式覆盖度
//这个方法是把数据写入txt文件,到时候写入SQLServer里面,因为我现在没有内存下载数据库了 public void Write() { //先测试一下 //StreamReader sr = new StreamReader("C:\\Users\\Administrator\\Desktop\\最终版组合测试添加数据用来修改的\\最终版组合测试添加数据用来修改的\\测试建索引\\1.txt", Encoding.Default); //StreamWriter sw = new StreamWriter("C:\\Users\\Administrator\\Desktop\\最终版组合测试添加数据用来修改的\\最终版组合测试添加数据用来修改的\\测试建索引\\2.txt"); //第一步:先把txt所有数学公式读入内存 StreamReader sr = new StreamReader("C:\\Users\\Administrator\\Desktop\\最终版组合测试添加数据用来修改的\\最终版组合测试添加数据用来修改的\\14最终版组合测试模糊匹配有点毛病啊,已解决14\\第二个实验测试数据\\1.txt", Encoding.Default); StreamWriter sw = new StreamWriter("C:\\Users\\Administrator\\Desktop\\第二篇论文实验读写数据库\\2.txt"); List <AAIndex> indexList = new List <AAIndex>(); DuiShu duishu = new DuiShu(); FuShu fuShu = new FuShu(); String read = sr.ReadLine(); read = duishu.duiShu(read); read = fuShu.fuShu(read); int a = 0; while (read != null) { //if (a == 100) //{ // break; //} //a++; //先读每一个数学公式,然后把每一个数学公式的子式、子式结构和该数学公式提取出即可 //Console.WriteLine(read); Dictionary <int, List <FinalNode1> > children = new Dictionary <int, List <FinalNode1> >(); ChildrenBTree childrenBTree = new ChildrenBTree(); children = childrenBTree.childrenBTree(read); //这个是获取一个LaTeX的每一个子式 foreach (var it in children) { //定义每一个子式 String zishi = ""; //定义每一个子式结构 String zishiStructure = ""; foreach (var itt in it.Value) { zishi = zishi + itt.zifu; if (isSpecialYunSuanShu(itt.zifu)) { } else { zishiStructure = zishiStructure + itt.zifu; } } //Console.WriteLine("子式:" + zishi); AAIndex index = new AAIndex(); index.zishi = zishi; index.zishiStructure = zishiStructure; index.LaTeX = read; indexList.Add(index); } read = sr.ReadLine(); if (read == null) { break; } read = duishu.duiShu(read); read = fuShu.fuShu(read); } //Console.WriteLine("哪出bug了:==============="); //开始写了 foreach (var it in indexList) { if (it.zishiStructure.Equals("")) { sw.WriteLine(it.zishi + "#" + " " + "#" + it.LaTeX + "#" + it.xuhao); } else { sw.WriteLine(it.zishi + "#" + it.zishiStructure + "#" + it.LaTeX + "#" + it.xuhao); } } sw.Flush(); }
public List <double> tfidf(String queryLaTeX) { AABTreeStructure acquireBtreeStructure = new AABTreeStructure(); //调用这个类,获取树结构的类 List <FinalNode1> queryLaTeXAdjacentNodeList = new List <FinalNode1>(); //存放“查询表达式”邻接节点有序对的集合 List <FinalNode1> resultLaTeXAdjacentNodeList = new List <FinalNode1>(); //存放“结果表达式”邻接节点有序对的集合 queryLaTeXAdjacentNodeList = acquireBtreeStructure.AdjacentNodeList(queryLaTeX); resultLaTeXAdjacentNodeList = acquireBtreeStructure.AdjacentNodeList(queryLaTeX); //我得先把查询表达式的“关键字”放入一个集合 //(那个徐彩云论文引用的英文论文里面有很多规定, //但是许彩云只只把“运算符”,“括号”,“子表达式作为关键字,到时候回去看那篇英文论文再添加再添加) List <String> queryLaTeXKeyWords = new List <String>(); //结果表达式关键字集合 List <String> resultLaTeXKeyWords = new List <String>(); Dictionary <int, List <FinalNode1> > children = new Dictionary <int, List <FinalNode1> >(); ChildrenBTree childrenBTree = new ChildrenBTree(); children = childrenBTree.childrenBTree(queryLaTeX); //foreach (var it in children) //{ // Console.WriteLine("KEY值:"+it.Key); // foreach (var itt in it.Value) // { // Console.WriteLine("值为:"+itt.zifu); // } // Console.WriteLine("====================================="); //} //================(我这里关键词只算了运算符,子式,没有算括号group,到时候还得把所有LaTeX以树形结点形式包含group放入数据库以便sql语句查找)=====下面写的代码是找到查询表达式里面的所有关键字,可以重复========================================= //首先先把里面的运算符作为关键字放入一个集合里面 foreach (var it in queryLaTeXAdjacentNodeList) { if (isOperator(it.zifu)) { queryLaTeXKeyWords.Add(it.zifu); } } //我这里子表达式是一个一个节点的,因为我要弄那个邻接节点有序对,所以是一个一个的节点, //但是我要把子表达式作为关键字,所以我得把一个一个的节点的字符“以字符串”的形式连接起来,真正作为一个关键字 foreach (var it in children) { String temp = ""; foreach (var itt in it.Value) { //Console.WriteLine("值为:" + itt.zifu); temp = temp + itt.zifu; } //获取到第一个子式关键字之后temp,然后装入集合 queryLaTeXKeyWords.Add(temp); //Console.WriteLine("====================================="); } //================================================上面写的代码是找到查询表达式里面的所有关键字,可以重复========================================= /*foreach (var it in queryLaTeXKeyWords) * { * Console.WriteLine("AATfIdf.cs:tfIdf集合关键字"+it); * }*/ //查询表达式每一个关键字在表达式中关键词中出现的次数 Dictionary <String, int> KeyWordsCount = new Dictionary <string, int>(); for (int i = 0; i < queryLaTeXKeyWords.Count; i++) { int a = 0; if (KeyWordsCount.Count == 0) { //如果一开始为空的话,就是出现一次 KeyWordsCount.Add(queryLaTeXKeyWords[i], 1); continue; } //现在开始去遍历专门存放关键字和次数的字典,查看有没有这个关键字 foreach (var it in KeyWordsCount) { if (queryLaTeXKeyWords[i].Equals(it.Key)) //说明有这个关键字 { a = 1; //说明有重复的关键字了 break; //KeyWordsCount[queryLaTeXKeyWords[i]]++; } } if (a == 1) { KeyWordsCount[queryLaTeXKeyWords[i]]++; } else { KeyWordsCount.Add(queryLaTeXKeyWords[i], 1); } } //===============下面里面的代码是我现在做实验临时弄的,这个是读取txt文本(我构建的临时数据集)(到时候读一下数据库数据集)里面的数学公式======================== Dictionary <String, String> txt = new Dictionary <String, String>(); StreamReader sr = new StreamReader("E:\\我要用的东西\\1我的论文2第二篇论文\\实验\\最终版组合测试添加数据用来修改的\\14最终版组合测试模糊匹配有点毛病啊,已解决14\\第二个实验测试数据\\1.txt", Encoding.Default); String read = sr.ReadLine(); while (read != null) { //Console.WriteLine("看看行不啊:"+read); String temp = ""; List <FinalNode1> List = new List <FinalNode1>(); //存放“查询表达式”邻接节点有序对的集合 List = acquireBtreeStructure.AdjacentNodeList(read); //已经存放“查询表达式”邻接节点有序对的集合 foreach (var it in List) { temp = temp + it.zifu; } txt.Add(read, temp); read = sr.ReadLine(); } sr.Close(); //===============上面里面的代码是我现在做实验临时弄的,这个是读取txt文本(我构建的临时数据集)(到时候读一下数据库数据集)里面的数学公式======================== //现在开始求每一个关键字的权值了,tf*idf int allKeyWordsCount = queryLaTeXKeyWords.Count(); //一个“查询表达式”中“所有关键字”的总次数 List <double> queryLaTeXtfIdfs = new List <double>(); //这个是放入所有关键字的最终权重值得集合,到时候计算相似度 foreach (var it in KeyWordsCount) { double tf = Convert.ToDouble(it.Value) / Convert.ToDouble(allKeyWordsCount); //===下面这两个权重到底是在数据库所有数学表达式呢,还是检索结果表达式里面的数学表达式呢?======= double LaTeXCount = txt.Count;//所有公式的数量 //===========包含关键词的数量(这是我临时测试关键词的数量)================ int tempCount = 0;//包含关键词的数量 foreach (var it1 in txt) { if (it1.Key.Contains(it.Key)) { tempCount++; } } //===========包含关键词的数量(这是我临时测试关键词的数量)================ //Console.WriteLine("包含关键词:"+it.Key + "数量为:"+tempCount); double ContainKeyWordLaTeXCount = tempCount;//包含关键词的数学公式的数量 double idf = Math.Log10(LaTeXCount / (1 + ContainKeyWordLaTeXCount)); double tfIdf = tf * idf;//现在这个算出来的是每一个关键字的权值 queryLaTeXtfIdfs.Add(tfIdf); } return(queryLaTeXtfIdfs); }