/// <summary> /// Analyzes a message if it is or not SPAM. /// Returns a boolean value for the validation. /// </summary> /// <param name="subject">The message subject.</param> /// <param name="body">The mail message body.</param> /// <param name="spamWordsFilename">The Spam Word List File.</param> /// <param name="hamWordsFilename">The Ham Word List File.</param> /// <param name="ignoreWordsFilename">The Ignore Word List File.</param> /// <returns>True for SPAM, false if it isn't a SPAM.</returns> static public bool AnalyzeMessage(string subject, string body, string spamWordsFilename, string hamWordsFilename, string ignoreWordsFilename) { // Load Spam Word List File Hashtable SpamTab = new Hashtable(); Tokenizer.LoadFromFile(spamWordsFilename, ref SpamTab); // Load Ham Word List File Hashtable HamTab = new Hashtable(); Tokenizer.LoadFromFile(hamWordsFilename, ref HamTab); // Load Ignore Word List File Hashtable IgnoreTab = new Hashtable(); Tokenizer.LoadFromFile(ignoreWordsFilename, ref IgnoreTab); //Parse Message Into Tokens string[] msgTokens = Tokenizer.Parse(string.Format("{0} {1}", subject, body)); float I = 0; float invI = 0; foreach (string t in msgTokens) { if (!IgnoreTab.Contains(t)) { float SpamCount = SpamTab.ContainsKey(t) ? (float)SpamTab[t] : 0f; float HamCount = HamTab.ContainsKey(t) ? (float)HamTab[t] : 0f; if (SpamCount == 0 && HamCount == 0) { continue; } // Calculate Probability float bw = SpamCount / SpamTab.Count; float gw = HamCount / HamTab.Count; float pw = ((bw) / ((bw) + (gw))); float s = 1f, x = .5f, n = SpamCount + HamCount; float fw = ((s * x) + (n * pw)) / (s + n); // Log Probability I = I == 0 ? fw : I * fw; invI = invI == 0 ? (1 - fw) : invI * (1 - fw); } } //Calculate Prediction float prediction = I / (I + invI); if (prediction <= .45) { // No Spam // Teach the Ham file based on the prediction //Tokenizer.TeachListFile(hamWordsFilename, msgTokens, HamTab); return(false); } else if (prediction >= .55) { // Spam // Teach the Spam file based on the prediction //Tokenizer.TeachListFile(spamWordsFilename, msgTokens, SpamTab); return(true); } // prediction > .45 && prediction < .55 - Unable to determine - by default no SPAM // Teach the Ham file based on the prediction //Tokenizer.TeachListFile(hamWordsFilename, msgTokens, HamTab); return(false); }