An implementation of IClassifer based on Bayes' algorithm.
Inheritance: AbstractClassifier, ITrainableClassifier
 public void TestCaseSensitive()
 {
     var classifier = new BayesianClassifier();
     Assert.IsFalse(classifier.IsCaseSensitive);
     classifier.IsCaseSensitive = true;
     Assert.IsTrue(classifier.IsCaseSensitive);
 }
        private void CalculateProbability()
        {
            _probability = IClassifierConstants.NEUTRAL_PROBABILITY;

            if (_matchingCount == 0)
            {
                _probability = _nonMatchingCount == 0 ? IClassifierConstants.NEUTRAL_PROBABILITY : IClassifierConstants.LOWER_BOUND;
            }
            else
            {
                _probability = BayesianClassifier.NormalizeSignificance(_matchingCount / (double)(_matchingCount + _nonMatchingCount));
            }
        }
        public void TestClassify()
        {
            var wds = new SimpleWordsDataSource();
            var classifier = new BayesianClassifier(wds);

            var sentence = new[] { "This", "is", "a", "sentence", "about", "java" };

            Assert.AreEqual(IClassifierConstants.NEUTRAL_PROBABILITY, classifier.Classify(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence), 0d);

            wds.SetWordProbability(new WordProbability("This", .5d));
            wds.SetWordProbability(new WordProbability("is", .5d));
            wds.SetWordProbability(new WordProbability("a", .5d));
            wds.SetWordProbability(new WordProbability("sentence", .2d));
            wds.SetWordProbability(new WordProbability("about", .5d));
            wds.SetWordProbability(new WordProbability("java", .99d));

            Assert.AreEqual(.96d, classifier.Classify(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence), .009d);
        }
        public void TestCalculateOverallProbability()
        {
            var prob = 0.3d;
            var wp1 = new WordProbability("myWord1", prob);
            var wp2 = new WordProbability("myWord2", prob);
            var wp3 = new WordProbability("myWord3", prob);

            var wps = new[] { wp1, wp2, wp3 };
            var errorMargin = 0.0001d;

            var xy = (prob * prob * prob);
            var z = (1-prob)*(1-prob)*(1-prob);

            var result = xy/(xy + z);

            var classifier = new BayesianClassifier();

            Assert.AreEqual(result, classifier.CalculateOverallProbability(wps), errorMargin);
        }
        public void TestCalculateOverallProbability()
        {
            double prob = 0.3d;
            WordProbability wp1 = new WordProbability("myWord1", prob);
            WordProbability wp2 = new WordProbability("myWord2", prob);
            WordProbability wp3 = new WordProbability("myWord3", prob);

            WordProbability[] wps = new WordProbability[] { wp1, wp2, wp3 };
            double errorMargin = 0.0001d;

            double xy = (prob * prob * prob);
            double z = (1-prob)*(1-prob)*(1-prob);

            double result = xy/(xy + z);

            BayesianClassifier classifier = new BayesianClassifier();

            Assert.AreEqual(result, classifier.CalculateOverallProbability(wps), errorMargin);
        }
Exemple #6
0
        private void CalculateProbability()
        {
            double result = IClassifierConstants.NEUTRAL_PROBABILITY;

            if (_matchingCount == 0)
            {
                if (_nonMatchingCount == 0)
                {
                    result = IClassifierConstants.NEUTRAL_PROBABILITY;
                }
                else
                {
                    result = IClassifierConstants.LOWER_BOUND;
                }
            }
            else
            {
                result = BayesianClassifier.NormalizeSignificance((double)_matchingCount / (double)(_matchingCount + _nonMatchingCount));
            }

            _probability = result;
        }
        public void TestTeaching()
        {
            BayesianClassifier classifier = new BayesianClassifier();

            string[] sentence1 = new string[] {"The", "menu", "tag", "library", "manages", "the",
                                "complex", "process", "of", "creating", "menus", "in",
                                "JavaScript", "The", "menu", "tag", "itself", "is",
                                "an", "abstract", "class", "that", "extends", "the",
                                "TagSupport", "class", "and", "overrides", "the",
                                "doStartTag", "and", "doEndTag", "methods.", "The",
                                "getMenu", "method,", "which", "is", "a", "template",
                                "method", "and", "should", "be", "overridden", "in",
                                "the", "subclasses,", "provides", "JavaScript", "to",
                                "add", "menu", "items", "in", "the", "menu",
                                "structure", "created", "in", "the", "doStartTag",
                                "method", "Subclasses", "of", "the", "menu", "tag",
                                "override", "the", "getMenu", "method,", "which",
                                "uses", "menu", "builders", "to", "render", "menu",
                                "data", "from", "the", "data", "source"};

            string[] sentence2 = new string[] {"I", "witness", "a", "more", "subtle",
                                "demonstration", "of", "real", "time", "physics",
                                "simulation", "at", "the", "tiny", "Palo", "Alto",
                                "office", "of", "Havok", "a", "competing", "physics",
                                "engine", "shop", "On", "the", "screen", "a",
                                "computer", "generated", "sailboat", "floats", "in",
                                "a", "stone", "lined", "pool", "of", "water", "The",
                                "company's", "genial", "Irish", "born", "cofounder",
                                "Hugh", "Reynolds", "shows", "me", "how", "to",
                                "push", "the", "boat", "with", "a", "mouse", "When",
                                "I", "nudge", "it", "air", "fills", "the", "sail",
                                "causing", "the", "ship", "to", "tilt", "leeward",
                                "Ripples", "in", "the", "water", "deflect", "off",
                                "the", "stones", "intersecting", "with", "one",
                                "another", "I", "urge", "the", "boat", "onward",
                                "and", "it", "glides", "effortlessly", "into", "the",
                                "wall", "Reynolds", "tosses", "in", "a", "handful",
                                "of", "virtual", "coins", "they", "spin", "through",
                                "the", "air,", "splash", "into", "the", "water,",
                                "and", "sink"};

            string[] sentence3 = new string[] {"The", "New", "Input", "Output", "NIO", "libraries",
                                "introduced", "in", "Java", "2", "Platform",
                                "Standard", "Edition", "J2SE", "1.4", "address",
                                "this", "problem", "NIO", "uses", "a", "buffer",
                                "oriented", "model", "That", "is", "NIO", "deals",
                                "with", "data", "primarily", "in", "large", "blocks",
                                "This", "eliminates", "the", "overhead", "caused",
                                "by", "the", "stream", "model", "and", "even", "makes",
                                "use", "of", "OS", "level", "facilities", "where",
                                "possible", "to", "maximize", "throughput"};

            string[] sentence4 = new string[] {"As", "governments", "scramble", "to", "contain",
                                "SARS", "the", "World", "Health", "Organisation",
                                "said", "it", "was", "extending", "the", "scope", "of",
                                "its", "April", "2", "travel", "alert", "to",
                                "include", "Beijing", "and", "the", "northern",
                                "Chinese", "province", "of", "Shanxi", "together",
                                "with", "Toronto", "the", "epicentre", "of", "the",
                                "SARS", "outbreak", "in", "Canada"};

            string[] sentence5 = new string[] {"That", "was", "our", "worst", "problem", "I",
                                "tried", "to", "see", "it", "the", "XP", "way", "Well",
                                "what", "we", "can", "do", "is", "implement",
                                "something", "I", "can't", "give", "any", "guarantees",
                                "as", "to", "how", "much", "of", "it", "will", "be",
                                "implemented", "in", "a", "month", "I", "won't",
                                "even", "hazard", "a", "guess", "as", "to", "how",
                                "long", "it", "would", "take", "to", "implement", "as",
                                "a", "whole", "I", "can't", "draw", "UML", "diagrams",
                                "for", "it", "or", "write", "technical", "specs",
                                "that", "would", "take", "time", "from", "coding",
                                "it", "which", "we", "can't", "afford", "Oh", "and",
                                "I", "have", "two", "kids", "I", "can't", "do", "much",
                                "OverTime", "But", "I", "should", "be", "able", "to",
                                "do", "something", "simple", "that", "will", "have",
                                "very", "few", "bugs", "and", "show", "a", "working",
                                "program", "early", "and", "often"};

            classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence1);
            classifier.TeachNonMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence2);
            classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence3);
            classifier.TeachNonMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence4);
            classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence5);

            Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence1));
            Assert.IsTrue(!classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence2));
            Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence3));
            Assert.IsTrue(!classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence4));
            Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence5));
        }
 public void TestGetStopWordProvider()
 {
     var wds = new SimpleWordsDataSource();
     ITokenizer tokenizer = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
     IStopWordProvider stopWordProvider = new DefaultStopWordProvider();
     var classifier = new BayesianClassifier(wds, tokenizer, stopWordProvider);
     Assert.AreEqual(stopWordProvider, classifier.StopWordProvider);
 }
        public void TestTransformWord()
        {
            var classifier = new BayesianClassifier();
            Assert.IsFalse(classifier.IsCaseSensitive);

            string word = null;
            try
            {
                classifier.TransformWord(word);
                Assert.Fail("No exception thrown when null passed.");
            }
            catch {}

            word = "myWord";
            Assert.AreEqual(word.ToLower(), classifier.TransformWord(word));

            classifier.IsCaseSensitive = true;
            Assert.AreEqual(word, classifier.TransformWord(word));
        }
 public void TestGetWordsDataSource()
 {
     var wds = new SimpleWordsDataSource();
     var classifier = new BayesianClassifier(wds);
     Assert.AreEqual(wds, classifier.WordsDataSource);
 }
 public void TestGetTokenizer()
 {
     SimpleWordsDataSource wds = new SimpleWordsDataSource();
     ITokenizer tokenizer = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
     BayesianClassifier classifier = new BayesianClassifier(wds, tokenizer);
     Assert.AreEqual(tokenizer, classifier.Tokenizer);
 }