public void Given_never_before_seen_text()
        {
            var detector = new SpamDetector();
            var corpus   = @"欧拉";

            Assert.AreEqual(detector.IsSpam(corpus), CategorizationResult.Undetermined);
        }
        public void Given_a_corpus_that_is_half_spam_and_half_unknown()
        {
            var detector = new SpamDetector();

            detector.SpamFound(@"做人了");

            var result = detector.IsSpam(@"我不做人了!");

            Assert.AreEqual(result, CategorizationResult.Undetermined);
        }
        public void Given_text_marked_as_spam_then_marked_as_ham()
        {
            var detector   = new SpamDetector();
            var spamCorpus = @"你要说的下一句话是……";

            detector.SpamFound(spamCorpus);
            detector.HamFound(spamCorpus);

            Assert.AreEqual(detector.IsSpam(spamCorpus), CategorizationResult.Undetermined);
        }
Beispiel #4
0
        public void LongTest()
        {
            var corpus      = @"不愧是DIO!我们不敢做的事,他毫不在乎地做了!真是佩服,真是我们的偶像!";
            var currentPath = @".\";
            var spamPath    = Path.Combine(currentPath, @"Resources", @"spam.txt");
            var hamPath     = Path.Combine(currentPath, @"Resources", @"ham.txt");

            var spamDetector = new SpamDetector();

            spamDetector.Train(spamPath, hamPath);
            var utf8  = new UTF8Encoding(false);
            var spams = File.ReadAllLines(spamPath, utf8);
            var hams  = File.ReadAllLines(hamPath, utf8);
            var r0    = 0;

            Parallel.For(72001, 80000, i =>
            {
                var result = spamDetector.IsSpam(spams[i]);
                if (result == CategorizationResult.First)
                {
                    Interlocked.Increment(ref r0);
                }
            });
            var r = r0 / 8000d * 100;

            Console.WriteLine($@"{r}%");
            Assert.IsTrue(r > 0.97);
            var r1 = 0;

            Parallel.For(648001, 720000, i =>
            {
                var result = spamDetector.IsSpam(hams[i]);
                if (result != CategorizationResult.First)
                {
                    Interlocked.Increment(ref r1);
                }
            });
            r = r1 / 72000d * 100;
            Console.WriteLine($@"{r}%");
            Assert.IsTrue(r > 0.97);
            Assert.AreEqual(spamDetector.IsSpam(corpus), CategorizationResult.Second);
        }
        public void Given_text_that_has_known_ham_and_unknown_words()
        {
            var detector    = new SpamDetector();
            var ham         = @"我在短暂的人生里发现...一个人越是玩弄阴谋,就越会感到人类的能力是有极限的....";
            var mysteryMeat = @"欧拉欧拉欧拉欧拉欧拉欧拉欧拉";

            detector.HamFound(ham);

            var result = detector.IsSpam(mysteryMeat);

            Assert.AreEqual(result, CategorizationResult.Undetermined);
        }
        public void Given_text_that_has_an_equivalent_number_of_ham_and_spam()
        {
            var detector = new SpamDetector();
            var ham      = @"不愧是DIO!";
            var spam     = @"不过,我拒绝";

            detector.HamFound(ham);
            detector.SpamFound(spam);

            var result = detector.IsSpam(@"不愧是DIO!不过,我拒绝");

            Assert.AreEqual(result, CategorizationResult.Undetermined);
        }
        public void Given_text_previously_marked_as_spam()
        {
            var detector = new SpamDetector();
            var corpus   = @"无駄";
            var ham      = @"我在短暂的人生里发现...一个人越是玩弄阴谋,就越会感到人类的能力是有极限的....";

            detector.SpamFound(corpus);
            detector.HamFound(ham);

            var isSpam = detector.IsSpam(corpus);

            Assert.AreEqual(isSpam, CategorizationResult.First);
        }
Beispiel #8
0
        public void Test()
        {
            var corpus      = @"不愧是DIO!我们不敢做的事,他毫不在乎地做了!真是佩服,真是我们的偶像!";
            var currentPath = @".\";
            var spamPath    = Path.Combine(currentPath, @"Resources", @"spam.txt");
            var hamPath     = Path.Combine(currentPath, @"Resources", @"ham.txt");

            var spamDetector = new SpamDetector();

            spamDetector.Train(spamPath, hamPath);

            Assert.AreEqual(spamDetector.IsSpam(corpus), CategorizationResult.Second);
        }
        public void Given_a_corpus_that_is_heavily_weighted_towards_being_spam()
        {
            var ham  = @"做人了";
            var spam = @"我不做人了!";

            var detector = new SpamDetector();

            detector.HamFound(ham);
            detector.SpamFound(spam);

            var result = detector.IsSpam(@"我不做人了!");

            Assert.AreEqual(result, CategorizationResult.First);
        }
Beispiel #10
0
        private static void Main(string[] args)
        {
            var detector = new SpamDetector();
            var tests    = new Dictionary <string, bool>
            {
                { "Sdf sdfsdfds fsff Dds fsf", true },
                { "wererwwrwerweerer rw rewrwerwererwr", true },
                { "hwajt the fdu eh ui futugt tge ufgb ets hewat ti sdo it", true },
                { "yrt asre the bertpera sojifsi hwrifle seen fie sue usertaje rwewererwer", true },
                { "EiybvisgfdsffbiFALFIFOBIOFUIBFRBsjdkdkdsjf", true },
                { "igish ssddf sdf", true },
                { "ioacdu uqeidiqdudaisdu a s diudsdi dusu doidu aso diuodsi uoiduaudsiaudiudasd aud", true },
                { "zvuoiyvci zoioizuoicu i udaou oiuoiuiweuiou we uoqu urqu ieuiuioqouwu we uuiowu", true },
                { "wert yuos", true },
                { "uadfa ffo sod fhfofdos fa jisjd efdqw wqddwdww", true },
                { "qrqrqr eri erer yqe ryry dfqeroworyqrqer rrqrere wrqo", true },
                {
                    ".snoitcurtsni eht yb deificeps snoitarepo )O/I( tuptuo/tupni dna lortnoc ,lacigol ,citemhtira cisab eht gnimrofrep yb margorp retupmoc a fo snoitcurtsni eht tuo seirrac taht retupmoc a nihtiw yrtiucric cinortcele eht si )UPC( tinu gnissecorp lartnec A",
                    true
                },
                { "easd ojd eqorj daiweo jowfie qjo qwej jo w weji", true },
                { "nzcna di sidadoitna ffeha ef eq h hqq fqh erhe rr qrhre afe oqiwhi qweihuho f", true },
                { "rtesting sdf what sdf re foosdf", true },
                { "This is an example", false },
                { "Yes", false },
                { "for loop", false },
                { "Awesome", false },
                { "dictionary", false },
                { "apple bannana orange", false },
                { "Ok thanks", false },
                {
                    "A central processing unit (CPU) is the electronic circuitry within a computer that carries out the instructions of a computer program by performing the basic arithmetic, logical, control and input/output (I/O) operations specified by the instructions.",
                    false
                },
                { "This library detects if a phrase is spam or a legitimate sentence", false },
                { "Computer programming, often shortened to programming, sometimes called coding", false },
                {
                    "In cryptography, encryption is the process of encoding messages or information in such a way that only authorized parties can read it.",
                    false
                },
                { "keyboard spam detector", false },
            };

            foreach (var test in tests)
            {
                var result = detector.IsSpam(test.Key);
                Console.ForegroundColor = ConsoleColor.Green;
                if (result != test.Value)
                {
                    Console.ForegroundColor = ConsoleColor.Red;
                }
                Console.Write(result.ToString().ToUpper());
                Console.ForegroundColor = ConsoleColor.Gray;
                Console.WriteLine(" " + test.Key);
            }
            Console.WriteLine();

            while (true)
            {
                var phrase = Console.ReadLine();
                var result = detector.IsSpam(phrase);
                Console.ForegroundColor = ConsoleColor.Green;
                if (result)
                {
                    Console.ForegroundColor = ConsoleColor.Red;
                }
                Console.WriteLine(result ? "Spam" : "Not Spam");
                Console.ForegroundColor = ConsoleColor.Gray;
            }
        }