public void TestGetWordsDataSource()
        {
            SimpleWordsDataSource wds        = new SimpleWordsDataSource();
            BayesianClassifier    classifier = new BayesianClassifier(wds);

            Assert.AreEqual(wds, classifier.WordsDataSource);
        }
        public void TestCaseSensitive()
        {
            BayesianClassifier classifier = new BayesianClassifier();

            Assert.IsFalse(classifier.IsCaseSensitive);
            classifier.IsCaseSensitive = true;
            Assert.IsTrue(classifier.IsCaseSensitive);
        }
        public void TestGetTokenizer()
        {
            SimpleWordsDataSource wds        = new SimpleWordsDataSource();
            ITokenizer            tokenizer  = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
            BayesianClassifier    classifier = new BayesianClassifier(wds, tokenizer);

            Assert.AreEqual(tokenizer, classifier.Tokenizer);
        }
Пример #4
0
        public void TestGetStopWordProvider()
        {
            var               wds              = new SimpleWordsDataSource();
            ITokenizer        tokenizer        = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
            IStopWordProvider stopWordProvider = new DefaultStopWordProvider();
            var               classifier       = new BayesianClassifier(wds, tokenizer, stopWordProvider);

            Assert.AreEqual(stopWordProvider, classifier.StopWordProvider);
        }
Пример #5
0
        public AdminApprovalHistory(IStopWordFilter stopWordFilter, IDataExtractor dataExtractor)
        {
            IDbConnectionManager odbcConnectionManager = new OdbcConnectionManager(ConfigurationManager.AppSettings["ConnectionString"]);
            IWordsDataSource     wordDataSource        = new OdbcWordsDataSource(odbcConnectionManager);

            Classifier     = new BayesianClassifier(wordDataSource);
            StopWordFilter = stopWordFilter;
            DataExtractor  = dataExtractor;
        }
Пример #6
0
        public void ClassifierTest()
        {
            var bc = new BayesianClassifier <int>();

            bc.SetFeatureProbability(10, .50d);
            bc.SetFeatureProbability(20, .2d);
            bc.SetFeatureProbability(30, .50d);
            bc.SetFeatureProbability(69, .99d);

            var set = bc.Classify(new[] { 10, 20, 30, 69 });

            Assert.NotNull(set);
            Assert.AreEqual(1, set.Count);

            var r = set.ElementAt(0);

            Assert.AreEqual(.96d, r.Probability, .009d);
        }
        public void TestClassify()
        {
            SimpleWordsDataSource wds        = new SimpleWordsDataSource();
            BayesianClassifier    classifier = new BayesianClassifier(wds);

            string[] sentence = new string[] { "This", "is", "a", "sentence", "about", "java" };

            Assert.AreEqual(IClassifierConstants.NEUTRAL_PROBABILITY, classifier.Classify(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence), 0d);

            wds.SetWordProbability(new WordProbability("This", .5d));
            wds.SetWordProbability(new WordProbability("is", .5d));
            wds.SetWordProbability(new WordProbability("a", .5d));
            wds.SetWordProbability(new WordProbability("sentence", .2d));
            wds.SetWordProbability(new WordProbability("about", .5d));
            wds.SetWordProbability(new WordProbability("java", .99d));

            Assert.AreEqual(.96d, classifier.Classify(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence), .009d);
        }
        public void TestCalculateOverallProbability()
        {
            double          prob = 0.3d;
            WordProbability wp1  = new WordProbability("myWord1", prob);
            WordProbability wp2  = new WordProbability("myWord2", prob);
            WordProbability wp3  = new WordProbability("myWord3", prob);

            WordProbability[] wps         = new WordProbability[] { wp1, wp2, wp3 };
            double            errorMargin = 0.0001d;

            double xy = (prob * prob * prob);
            double z  = (1 - prob) * (1 - prob) * (1 - prob);

            double result = xy / (xy + z);

            BayesianClassifier classifier = new BayesianClassifier();

            Assert.AreEqual(result, classifier.CalculateOverallProbability(wps), errorMargin);
        }
        public void ClassifierTest() {

            var bc = new BayesianClassifier<int>();

            bc.SetFeatureProbability(10, .50d);
            bc.SetFeatureProbability(20, .2d);
            bc.SetFeatureProbability(30, .50d);
            bc.SetFeatureProbability(69, .99d);

            var set = bc.Classify(new[] { 10, 20, 30, 69 });

            Assert.NotNull(set);
            Assert.AreEqual(1, set.Count);

            var r = set.ElementAt(0);

            Assert.AreEqual(.96d, r.Probability, .009d);

        }
Пример #10
0
        public void TestCalculateOverallProbability()
        {
            var prob = 0.3d;
            var wp1  = new WordProbability("myWord1", prob);
            var wp2  = new WordProbability("myWord2", prob);
            var wp3  = new WordProbability("myWord3", prob);

            var wps         = new[] { wp1, wp2, wp3 };
            var errorMargin = 0.0001d;

            var xy = (prob * prob * prob);
            var z  = (1 - prob) * (1 - prob) * (1 - prob);

            var result = xy / (xy + z);

            var classifier = new BayesianClassifier();

            Assert.AreEqual(result, classifier.CalculateOverallProbability(wps), errorMargin);
        }
Пример #11
0
        public void TeachTest()
        {
            var classifier = new BayesianClassifier <string>(0);

            classifier.TeachMatch("num", "1", "2", "3", "1");
            classifier.TeachMatch("num", "1", "2", "3");
            classifier.TeachNonMatch("num", "a", "b", "c");

            classifier.TeachMatch("chr", "a", "b", "c");
            classifier.TeachNonMatch("chr", "1", "2", "3");

            var r = classifier.GetBestResult("1");

            Assert.NotNull(r);
            Assert.AreEqual("num", r.Class.Name);

            r = classifier.GetBestResult("c");

            Assert.NotNull(r);
            Assert.AreEqual("chr", r.Class.Name);
        }
        public void TestTransformWord()
        {
            BayesianClassifier classifier = new BayesianClassifier();

            Assert.IsFalse(classifier.IsCaseSensitive);

            string word = null;

            try
            {
                classifier.TransformWord(word);
                Assert.Fail("No exception thrown when null passed.");
            }
            catch {}

            word = "myWord";
            Assert.AreEqual(word.ToLower(), classifier.TransformWord(word));

            classifier.IsCaseSensitive = true;
            Assert.AreEqual(word, classifier.TransformWord(word));
        }
Пример #13
0
        public void TeachTest() {

            var classifier = new BayesianClassifier<string>(0);

            classifier.TeachMatch("num", "1", "2", "3", "1");
            classifier.TeachMatch("num", "1", "2", "3");
            classifier.TeachNonMatch("num", "a", "b", "c");

            classifier.TeachMatch("chr", "a", "b", "c");
            classifier.TeachNonMatch("chr", "1", "2", "3");

            var r = classifier.GetBestResult("1");

            Assert.NotNull(r);
            Assert.AreEqual("num", r.Class.Name);

            r = classifier.GetBestResult("c");
            
            Assert.NotNull(r);
            Assert.AreEqual("chr", r.Class.Name);

        }
Пример #14
0
        public Classifier(int topicN, string mainDirectory)
        {
            this.topicN = topicN;
            this.mainDirectory = mainDirectory;
            this.TopicFileName = "topic" + topicN + ".txt";
            this.TopicDir = "" + topicN;

            IWordsDataSource wds = new SimpleWordsDataSource();
            this.classifier = new BayesianClassifier(wds);
        }
Пример #15
0
        public void TestTeaching()
        {
            BayesianClassifier classifier = new BayesianClassifier();

            string[] sentence1 = new string[] { "The", "menu", "tag", "library", "manages", "the",
                                                "complex", "process", "of", "creating", "menus", "in",
                                                "JavaScript", "The", "menu", "tag", "itself", "is",
                                                "an", "abstract", "class", "that", "extends", "the",
                                                "TagSupport", "class", "and", "overrides", "the",
                                                "doStartTag", "and", "doEndTag", "methods.", "The",
                                                "getMenu", "method,", "which", "is", "a", "template",
                                                "method", "and", "should", "be", "overridden", "in",
                                                "the", "subclasses,", "provides", "JavaScript", "to",
                                                "add", "menu", "items", "in", "the", "menu",
                                                "structure", "created", "in", "the", "doStartTag",
                                                "method", "Subclasses", "of", "the", "menu", "tag",
                                                "override", "the", "getMenu", "method,", "which",
                                                "uses", "menu", "builders", "to", "render", "menu",
                                                "data", "from", "the", "data", "source" };

            string[] sentence2 = new string[] { "I", "witness", "a", "more", "subtle",
                                                "demonstration", "of", "real", "time", "physics",
                                                "simulation", "at", "the", "tiny", "Palo", "Alto",
                                                "office", "of", "Havok", "a", "competing", "physics",
                                                "engine", "shop", "On", "the", "screen", "a",
                                                "computer", "generated", "sailboat", "floats", "in",
                                                "a", "stone", "lined", "pool", "of", "water", "The",
                                                "company's", "genial", "Irish", "born", "cofounder",
                                                "Hugh", "Reynolds", "shows", "me", "how", "to",
                                                "push", "the", "boat", "with", "a", "mouse", "When",
                                                "I", "nudge", "it", "air", "fills", "the", "sail",
                                                "causing", "the", "ship", "to", "tilt", "leeward",
                                                "Ripples", "in", "the", "water", "deflect", "off",
                                                "the", "stones", "intersecting", "with", "one",
                                                "another", "I", "urge", "the", "boat", "onward",
                                                "and", "it", "glides", "effortlessly", "into", "the",
                                                "wall", "Reynolds", "tosses", "in", "a", "handful",
                                                "of", "virtual", "coins", "they", "spin", "through",
                                                "the", "air,", "splash", "into", "the", "water,",
                                                "and", "sink" };

            string[] sentence3 = new string[] { "The", "New", "Input", "Output", "NIO", "libraries",
                                                "introduced", "in", "Java", "2", "Platform",
                                                "Standard", "Edition", "J2SE", "1.4", "address",
                                                "this", "problem", "NIO", "uses", "a", "buffer",
                                                "oriented", "model", "That", "is", "NIO", "deals",
                                                "with", "data", "primarily", "in", "large", "blocks",
                                                "This", "eliminates", "the", "overhead", "caused",
                                                "by", "the", "stream", "model", "and", "even", "makes",
                                                "use", "of", "OS", "level", "facilities", "where",
                                                "possible", "to", "maximize", "throughput" };

            string[] sentence4 = new string[] { "As", "governments", "scramble", "to", "contain",
                                                "SARS", "the", "World", "Health", "Organisation",
                                                "said", "it", "was", "extending", "the", "scope", "of",
                                                "its", "April", "2", "travel", "alert", "to",
                                                "include", "Beijing", "and", "the", "northern",
                                                "Chinese", "province", "of", "Shanxi", "together",
                                                "with", "Toronto", "the", "epicentre", "of", "the",
                                                "SARS", "outbreak", "in", "Canada" };

            string[] sentence5 = new string[] { "That", "was", "our", "worst", "problem", "I",
                                                "tried", "to", "see", "it", "the", "XP", "way", "Well",
                                                "what", "we", "can", "do", "is", "implement",
                                                "something", "I", "can't", "give", "any", "guarantees",
                                                "as", "to", "how", "much", "of", "it", "will", "be",
                                                "implemented", "in", "a", "month", "I", "won't",
                                                "even", "hazard", "a", "guess", "as", "to", "how",
                                                "long", "it", "would", "take", "to", "implement", "as",
                                                "a", "whole", "I", "can't", "draw", "UML", "diagrams",
                                                "for", "it", "or", "write", "technical", "specs",
                                                "that", "would", "take", "time", "from", "coding",
                                                "it", "which", "we", "can't", "afford", "Oh", "and",
                                                "I", "have", "two", "kids", "I", "can't", "do", "much",
                                                "OverTime", "But", "I", "should", "be", "able", "to",
                                                "do", "something", "simple", "that", "will", "have",
                                                "very", "few", "bugs", "and", "show", "a", "working",
                                                "program", "early", "and", "often" };


            classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence1);
            classifier.TeachNonMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence2);
            classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence3);
            classifier.TeachNonMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence4);
            classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence5);

            Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence1));
            Assert.IsTrue(!classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence2));
            Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence3));
            Assert.IsTrue(!classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence4));
            Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence5));
        }