コード例 #1
0
 //Calculate cosin and BM fields
 public void calculateWeight()
 {
     for (int i = 0; i < r.files.Count; i++)
     {
         Debug.WriteLine(i);
         //Run on all files again
         Dictionary <string, string> d = r.ProccessDocs(r.files[i]);
         foreach (string docNo in d.Keys)
         {
             //Parse DOC
             Dictionary <string, termInfo> docdic = p.Parse(d[docNo]);
             //length dictionary
             docslength[docNo.Trim(' ')] = docdic.Count;
             //Wight dictionary
             DocWeight.Add(docNo.Trim(' '), 0);
             //max tf dic
             Docsmaxtf.Add(docNo.Trim(' '), p.maxtf);
             foreach (string trm in docdic.Keys)
             {
                 if (docdic.ContainsKey(trm) && ind.dic.ContainsKey(trm))
                 {  //For Cosin
                     double W = (((double)docdic[trm].tf / (double)p.maxtf) * (double)ind.dic[trm].idf);
                     DocWeight[docNo.Trim(' ')] += (W * W);
                 }
             }
             DocWeight[docNo.Trim(' ')] = Math.Sqrt(DocWeight[docNo.Trim(' ')]);
         }
     }
     //for BM Formula
     foreach (string s in docslength.Keys)
     {
         avgDL += docslength[s];
     }
     avgDL = avgDL / (double)docslength.Count;
 }
コード例 #2
0
ファイル: MainWindow.xaml.cs プロジェクト: rbh3/corpus
        private void StartEngine()
        {
            //Delete Exists Directory
            if (Directory.Exists(pathclose + @"\AfterPost"))
            {
                Directory.Delete(pathclose + @"\AfterPost", true);
            }
            if (Directory.Exists(pathclose + @"\tempPost"))
            {
                Directory.Delete(pathclose + @"\tempPost", true);
            }

            /*  if (Directory.Exists(pathclose + @"\CacheDic"))
            *     Directory.Delete(pathclose + @"\CacheDic", true);*/
            //initlizing reader,parser,indexer
            r   = new ReadFile(pathopen);
            p   = new Parser(pathopen + @"\stop_words.txt", isStem);
            ind = new Indexer(pathclose, isStem);

            //For program timing
            DateTime start = DateTime.Now;


            for (int i = 0; i < r.files.Count; i++)
            {
                Debug.WriteLine(i);
                //return dictionary<DocNO, TEXT>
                d = r.ProccessDocs(r.files[i]);

                foreach (string s in d.Keys)
                {
                    //Parse DOC
                    Dictionary <string, termInfo> docdic = p.Parse(d[s]);
                    //make temp Post file
                    ind.tempPosting(docdic, s.Trim(' '), p.maxterm, p.maxtf);
                }
            }
            StreamWriter sw = new StreamWriter(pathopen + @"\docsMap.txt");

            sw.WriteLine(r.sb.ToString());
            sw.Close();
            ind.tempPost();     //Write the last dictionary
            p.clearSteam();     // cleans the stemmers dictionary
            ind.mergefile();    // merge and split
            ind.writeTextDic(); // WriteDic For show
            TimeSpan ts3 = DateTime.Now - start;

            //The requested popup of this run
            System.Windows.Forms.MessageBox.Show("Number Of Docs Indexed : " + ind.TotalDoc + "\nTime Of Running : " + ts3.TotalSeconds + "\nIndex Size[bytes] : " + ind.PostSize + "\nCache Size[bytes] : " + ind.cacheSize, "Done!", MessageBoxButtons.OK, MessageBoxIcon.Information);
        }