/// <summary> /// THIS SMAP LOOP FOR TFIDF Algorithm /// </summary> /// <param name="files"> all the email files</param> /// <param name="limit"> how many emails </param> /// <param name="SPAM"> are they spam or not</param> /// <param name="obj"> the TFIDF object </param> private void Loop(string[] files, decimal limit, bool SPAM, TFIDF obj) { try { for (int i = 0; i < (int)limit; i++) { StreamReader myFile = new StreamReader(files[i]); string allFile = myFile.ReadToEnd(); string[] words = allFile.Split(' '); obj.addDocument(words, SPAM); // add the document and its words } } catch { } }
/// <summary> /// this calcuate the shown statics for each algorithm /// </summary> /// <param name="hams">list of ham files </param> /// <param name="spams">list of spam files </param> /// <param name="algorithm"></param> /// <returns></returns> staticSet calculateStatics(string[] hams, string[] spams, AlgorithmBox algorithm) { staticSet myStatics = new staticSet(0); int hamScores = 0; int spamScores = 0; myStatics.totalham = (float)hamNumber.Value; myStatics.totalspam = (float)spamNumber.Value; progressBar.Value = 0; progressBar.Maximum = (int)(myStatics.totalham + myStatics.totalspam); DateTime tic = DateTime.Now; Algorithm myAlgo = null; switch (algorithm) { case AlgorithmBox.LCS: myAlgo = new LCSWord(); break; case AlgorithmBox.Levenshtein: myAlgo = new Levenshtien(); break; case AlgorithmBox.Jaro: myAlgo = new Jaro(); break; case AlgorithmBox.JaroWinker: myAlgo = new JaroWinker(); break; case AlgorithmBox.BiGram: myAlgo = new BGram(); break; default: break; } // we need a conditon here if (selectedAlgorithm == AlgorithmBox.TFIDF) { TFIDF myTFI = new TFIDF(); myTFI.Terms = phrasePower; Loop(hams, hamNumber.Value, false, myTFI); Loop(spams, spamNumber.Value, true, myTFI); // the TFIDF runner myTFI.run(); float threshold = ((float)thresholdValue.Value) / 100F; myTFI.claculate(ref hamScores, ref spamScores, threshold); } else { // other runners hamScores = Loop(hams, hamNumber.Value, myAlgo); spamScores = Loop(spams, spamNumber.Value, myAlgo); } DateTime toc = DateTime.Now; TimeSpan timeSpent = toc - tic; // calculate the spent time , a simple timing mechanism myStatics.time = (float)timeSpent.TotalSeconds; if (myStatics.totalspam > 0) { // false negatives myStatics.fnegatives = (myStatics.totalspam - spamScores) / myStatics.totalspam; // spam recall myStatics.spamrecall = spamScores / myStatics.totalspam; // spam precision myStatics.spamper = spamScores / (spamScores + (myStatics.totalham - hamScores)); } if (myStatics.totalham > 0) { // false positives myStatics.fpositives = (myStatics.totalham - hamScores) / myStatics.totalham; } if (myStatics.totalham > 0 || myStatics.totalspam > 0) { // accuracy myStatics.accuracy = 1 - (((myStatics.totalspam - spamScores) + (myStatics.totalham - hamScores)) / (myStatics.totalham + myStatics.totalspam)); } // Spam recall return(myStatics); }