public void Given_never_before_seen_text() { var detector = new SpamDetector(); var corpus = @"欧拉"; Assert.AreEqual(detector.IsSpam(corpus), CategorizationResult.Undetermined); }
public void Given_a_corpus_that_is_half_spam_and_half_unknown() { var detector = new SpamDetector(); detector.SpamFound(@"做人了"); var result = detector.IsSpam(@"我不做人了!"); Assert.AreEqual(result, CategorizationResult.Undetermined); }
public void Given_text_marked_as_spam_then_marked_as_ham() { var detector = new SpamDetector(); var spamCorpus = @"你要说的下一句话是……"; detector.SpamFound(spamCorpus); detector.HamFound(spamCorpus); Assert.AreEqual(detector.IsSpam(spamCorpus), CategorizationResult.Undetermined); }
public void LongTest() { var corpus = @"不愧是DIO!我们不敢做的事,他毫不在乎地做了!真是佩服,真是我们的偶像!"; var currentPath = @".\"; var spamPath = Path.Combine(currentPath, @"Resources", @"spam.txt"); var hamPath = Path.Combine(currentPath, @"Resources", @"ham.txt"); var spamDetector = new SpamDetector(); spamDetector.Train(spamPath, hamPath); var utf8 = new UTF8Encoding(false); var spams = File.ReadAllLines(spamPath, utf8); var hams = File.ReadAllLines(hamPath, utf8); var r0 = 0; Parallel.For(72001, 80000, i => { var result = spamDetector.IsSpam(spams[i]); if (result == CategorizationResult.First) { Interlocked.Increment(ref r0); } }); var r = r0 / 8000d * 100; Console.WriteLine($@"{r}%"); Assert.IsTrue(r > 0.97); var r1 = 0; Parallel.For(648001, 720000, i => { var result = spamDetector.IsSpam(hams[i]); if (result != CategorizationResult.First) { Interlocked.Increment(ref r1); } }); r = r1 / 72000d * 100; Console.WriteLine($@"{r}%"); Assert.IsTrue(r > 0.97); Assert.AreEqual(spamDetector.IsSpam(corpus), CategorizationResult.Second); }
public void Given_text_that_has_known_ham_and_unknown_words() { var detector = new SpamDetector(); var ham = @"我在短暂的人生里发现...一个人越是玩弄阴谋,就越会感到人类的能力是有极限的...."; var mysteryMeat = @"欧拉欧拉欧拉欧拉欧拉欧拉欧拉"; detector.HamFound(ham); var result = detector.IsSpam(mysteryMeat); Assert.AreEqual(result, CategorizationResult.Undetermined); }
public void Given_text_that_has_an_equivalent_number_of_ham_and_spam() { var detector = new SpamDetector(); var ham = @"不愧是DIO!"; var spam = @"不过,我拒绝"; detector.HamFound(ham); detector.SpamFound(spam); var result = detector.IsSpam(@"不愧是DIO!不过,我拒绝"); Assert.AreEqual(result, CategorizationResult.Undetermined); }
public void Given_text_previously_marked_as_spam() { var detector = new SpamDetector(); var corpus = @"无駄"; var ham = @"我在短暂的人生里发现...一个人越是玩弄阴谋,就越会感到人类的能力是有极限的...."; detector.SpamFound(corpus); detector.HamFound(ham); var isSpam = detector.IsSpam(corpus); Assert.AreEqual(isSpam, CategorizationResult.First); }
public void Test() { var corpus = @"不愧是DIO!我们不敢做的事,他毫不在乎地做了!真是佩服,真是我们的偶像!"; var currentPath = @".\"; var spamPath = Path.Combine(currentPath, @"Resources", @"spam.txt"); var hamPath = Path.Combine(currentPath, @"Resources", @"ham.txt"); var spamDetector = new SpamDetector(); spamDetector.Train(spamPath, hamPath); Assert.AreEqual(spamDetector.IsSpam(corpus), CategorizationResult.Second); }
public void Given_a_corpus_that_is_heavily_weighted_towards_being_spam() { var ham = @"做人了"; var spam = @"我不做人了!"; var detector = new SpamDetector(); detector.HamFound(ham); detector.SpamFound(spam); var result = detector.IsSpam(@"我不做人了!"); Assert.AreEqual(result, CategorizationResult.First); }
private static void Main(string[] args) { var detector = new SpamDetector(); var tests = new Dictionary <string, bool> { { "Sdf sdfsdfds fsff Dds fsf", true }, { "wererwwrwerweerer rw rewrwerwererwr", true }, { "hwajt the fdu eh ui futugt tge ufgb ets hewat ti sdo it", true }, { "yrt asre the bertpera sojifsi hwrifle seen fie sue usertaje rwewererwer", true }, { "EiybvisgfdsffbiFALFIFOBIOFUIBFRBsjdkdkdsjf", true }, { "igish ssddf sdf", true }, { "ioacdu uqeidiqdudaisdu a s diudsdi dusu doidu aso diuodsi uoiduaudsiaudiudasd aud", true }, { "zvuoiyvci zoioizuoicu i udaou oiuoiuiweuiou we uoqu urqu ieuiuioqouwu we uuiowu", true }, { "wert yuos", true }, { "uadfa ffo sod fhfofdos fa jisjd efdqw wqddwdww", true }, { "qrqrqr eri erer yqe ryry dfqeroworyqrqer rrqrere wrqo", true }, { ".snoitcurtsni eht yb deificeps snoitarepo )O/I( tuptuo/tupni dna lortnoc ,lacigol ,citemhtira cisab eht gnimrofrep yb margorp retupmoc a fo snoitcurtsni eht tuo seirrac taht retupmoc a nihtiw yrtiucric cinortcele eht si )UPC( tinu gnissecorp lartnec A", true }, { "easd ojd eqorj daiweo jowfie qjo qwej jo w weji", true }, { "nzcna di sidadoitna ffeha ef eq h hqq fqh erhe rr qrhre afe oqiwhi qweihuho f", true }, { "rtesting sdf what sdf re foosdf", true }, { "This is an example", false }, { "Yes", false }, { "for loop", false }, { "Awesome", false }, { "dictionary", false }, { "apple bannana orange", false }, { "Ok thanks", false }, { "A central processing unit (CPU) is the electronic circuitry within a computer that carries out the instructions of a computer program by performing the basic arithmetic, logical, control and input/output (I/O) operations specified by the instructions.", false }, { "This library detects if a phrase is spam or a legitimate sentence", false }, { "Computer programming, often shortened to programming, sometimes called coding", false }, { "In cryptography, encryption is the process of encoding messages or information in such a way that only authorized parties can read it.", false }, { "keyboard spam detector", false }, }; foreach (var test in tests) { var result = detector.IsSpam(test.Key); Console.ForegroundColor = ConsoleColor.Green; if (result != test.Value) { Console.ForegroundColor = ConsoleColor.Red; } Console.Write(result.ToString().ToUpper()); Console.ForegroundColor = ConsoleColor.Gray; Console.WriteLine(" " + test.Key); } Console.WriteLine(); while (true) { var phrase = Console.ReadLine(); var result = detector.IsSpam(phrase); Console.ForegroundColor = ConsoleColor.Green; if (result) { Console.ForegroundColor = ConsoleColor.Red; } Console.WriteLine(result ? "Spam" : "Not Spam"); Console.ForegroundColor = ConsoleColor.Gray; } }