//Calculate cosin and BM fields public void calculateWeight() { for (int i = 0; i < r.files.Count; i++) { Debug.WriteLine(i); //Run on all files again Dictionary <string, string> d = r.ProccessDocs(r.files[i]); foreach (string docNo in d.Keys) { //Parse DOC Dictionary <string, termInfo> docdic = p.Parse(d[docNo]); //length dictionary docslength[docNo.Trim(' ')] = docdic.Count; //Wight dictionary DocWeight.Add(docNo.Trim(' '), 0); //max tf dic Docsmaxtf.Add(docNo.Trim(' '), p.maxtf); foreach (string trm in docdic.Keys) { if (docdic.ContainsKey(trm) && ind.dic.ContainsKey(trm)) { //For Cosin double W = (((double)docdic[trm].tf / (double)p.maxtf) * (double)ind.dic[trm].idf); DocWeight[docNo.Trim(' ')] += (W * W); } } DocWeight[docNo.Trim(' ')] = Math.Sqrt(DocWeight[docNo.Trim(' ')]); } } //for BM Formula foreach (string s in docslength.Keys) { avgDL += docslength[s]; } avgDL = avgDL / (double)docslength.Count; }
private void StartEngine() { //Delete Exists Directory if (Directory.Exists(pathclose + @"\AfterPost")) { Directory.Delete(pathclose + @"\AfterPost", true); } if (Directory.Exists(pathclose + @"\tempPost")) { Directory.Delete(pathclose + @"\tempPost", true); } /* if (Directory.Exists(pathclose + @"\CacheDic")) * Directory.Delete(pathclose + @"\CacheDic", true);*/ //initlizing reader,parser,indexer r = new ReadFile(pathopen); p = new Parser(pathopen + @"\stop_words.txt", isStem); ind = new Indexer(pathclose, isStem); //For program timing DateTime start = DateTime.Now; for (int i = 0; i < r.files.Count; i++) { Debug.WriteLine(i); //return dictionary<DocNO, TEXT> d = r.ProccessDocs(r.files[i]); foreach (string s in d.Keys) { //Parse DOC Dictionary <string, termInfo> docdic = p.Parse(d[s]); //make temp Post file ind.tempPosting(docdic, s.Trim(' '), p.maxterm, p.maxtf); } } StreamWriter sw = new StreamWriter(pathopen + @"\docsMap.txt"); sw.WriteLine(r.sb.ToString()); sw.Close(); ind.tempPost(); //Write the last dictionary p.clearSteam(); // cleans the stemmers dictionary ind.mergefile(); // merge and split ind.writeTextDic(); // WriteDic For show TimeSpan ts3 = DateTime.Now - start; //The requested popup of this run System.Windows.Forms.MessageBox.Show("Number Of Docs Indexed : " + ind.TotalDoc + "\nTime Of Running : " + ts3.TotalSeconds + "\nIndex Size[bytes] : " + ind.PostSize + "\nCache Size[bytes] : " + ind.cacheSize, "Done!", MessageBoxButtons.OK, MessageBoxIcon.Information); }