public void TestGetWordsDataSource() { SimpleWordsDataSource wds = new SimpleWordsDataSource(); BayesianClassifier classifier = new BayesianClassifier(wds); Assert.AreEqual(wds, classifier.WordsDataSource); }
public void TestCaseSensitive() { BayesianClassifier classifier = new BayesianClassifier(); Assert.IsFalse(classifier.IsCaseSensitive); classifier.IsCaseSensitive = true; Assert.IsTrue(classifier.IsCaseSensitive); }
public void TestGetTokenizer() { SimpleWordsDataSource wds = new SimpleWordsDataSource(); ITokenizer tokenizer = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS); BayesianClassifier classifier = new BayesianClassifier(wds, tokenizer); Assert.AreEqual(tokenizer, classifier.Tokenizer); }
public void TestGetStopWordProvider() { var wds = new SimpleWordsDataSource(); ITokenizer tokenizer = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS); IStopWordProvider stopWordProvider = new DefaultStopWordProvider(); var classifier = new BayesianClassifier(wds, tokenizer, stopWordProvider); Assert.AreEqual(stopWordProvider, classifier.StopWordProvider); }
public AdminApprovalHistory(IStopWordFilter stopWordFilter, IDataExtractor dataExtractor) { IDbConnectionManager odbcConnectionManager = new OdbcConnectionManager(ConfigurationManager.AppSettings["ConnectionString"]); IWordsDataSource wordDataSource = new OdbcWordsDataSource(odbcConnectionManager); Classifier = new BayesianClassifier(wordDataSource); StopWordFilter = stopWordFilter; DataExtractor = dataExtractor; }
public void ClassifierTest() { var bc = new BayesianClassifier <int>(); bc.SetFeatureProbability(10, .50d); bc.SetFeatureProbability(20, .2d); bc.SetFeatureProbability(30, .50d); bc.SetFeatureProbability(69, .99d); var set = bc.Classify(new[] { 10, 20, 30, 69 }); Assert.NotNull(set); Assert.AreEqual(1, set.Count); var r = set.ElementAt(0); Assert.AreEqual(.96d, r.Probability, .009d); }
public void TestClassify() { SimpleWordsDataSource wds = new SimpleWordsDataSource(); BayesianClassifier classifier = new BayesianClassifier(wds); string[] sentence = new string[] { "This", "is", "a", "sentence", "about", "java" }; Assert.AreEqual(IClassifierConstants.NEUTRAL_PROBABILITY, classifier.Classify(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence), 0d); wds.SetWordProbability(new WordProbability("This", .5d)); wds.SetWordProbability(new WordProbability("is", .5d)); wds.SetWordProbability(new WordProbability("a", .5d)); wds.SetWordProbability(new WordProbability("sentence", .2d)); wds.SetWordProbability(new WordProbability("about", .5d)); wds.SetWordProbability(new WordProbability("java", .99d)); Assert.AreEqual(.96d, classifier.Classify(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence), .009d); }
public void TestCalculateOverallProbability() { double prob = 0.3d; WordProbability wp1 = new WordProbability("myWord1", prob); WordProbability wp2 = new WordProbability("myWord2", prob); WordProbability wp3 = new WordProbability("myWord3", prob); WordProbability[] wps = new WordProbability[] { wp1, wp2, wp3 }; double errorMargin = 0.0001d; double xy = (prob * prob * prob); double z = (1 - prob) * (1 - prob) * (1 - prob); double result = xy / (xy + z); BayesianClassifier classifier = new BayesianClassifier(); Assert.AreEqual(result, classifier.CalculateOverallProbability(wps), errorMargin); }
public void ClassifierTest() { var bc = new BayesianClassifier<int>(); bc.SetFeatureProbability(10, .50d); bc.SetFeatureProbability(20, .2d); bc.SetFeatureProbability(30, .50d); bc.SetFeatureProbability(69, .99d); var set = bc.Classify(new[] { 10, 20, 30, 69 }); Assert.NotNull(set); Assert.AreEqual(1, set.Count); var r = set.ElementAt(0); Assert.AreEqual(.96d, r.Probability, .009d); }
public void TestCalculateOverallProbability() { var prob = 0.3d; var wp1 = new WordProbability("myWord1", prob); var wp2 = new WordProbability("myWord2", prob); var wp3 = new WordProbability("myWord3", prob); var wps = new[] { wp1, wp2, wp3 }; var errorMargin = 0.0001d; var xy = (prob * prob * prob); var z = (1 - prob) * (1 - prob) * (1 - prob); var result = xy / (xy + z); var classifier = new BayesianClassifier(); Assert.AreEqual(result, classifier.CalculateOverallProbability(wps), errorMargin); }
public void TeachTest() { var classifier = new BayesianClassifier <string>(0); classifier.TeachMatch("num", "1", "2", "3", "1"); classifier.TeachMatch("num", "1", "2", "3"); classifier.TeachNonMatch("num", "a", "b", "c"); classifier.TeachMatch("chr", "a", "b", "c"); classifier.TeachNonMatch("chr", "1", "2", "3"); var r = classifier.GetBestResult("1"); Assert.NotNull(r); Assert.AreEqual("num", r.Class.Name); r = classifier.GetBestResult("c"); Assert.NotNull(r); Assert.AreEqual("chr", r.Class.Name); }
public void TestTransformWord() { BayesianClassifier classifier = new BayesianClassifier(); Assert.IsFalse(classifier.IsCaseSensitive); string word = null; try { classifier.TransformWord(word); Assert.Fail("No exception thrown when null passed."); } catch {} word = "myWord"; Assert.AreEqual(word.ToLower(), classifier.TransformWord(word)); classifier.IsCaseSensitive = true; Assert.AreEqual(word, classifier.TransformWord(word)); }
public void TeachTest() { var classifier = new BayesianClassifier<string>(0); classifier.TeachMatch("num", "1", "2", "3", "1"); classifier.TeachMatch("num", "1", "2", "3"); classifier.TeachNonMatch("num", "a", "b", "c"); classifier.TeachMatch("chr", "a", "b", "c"); classifier.TeachNonMatch("chr", "1", "2", "3"); var r = classifier.GetBestResult("1"); Assert.NotNull(r); Assert.AreEqual("num", r.Class.Name); r = classifier.GetBestResult("c"); Assert.NotNull(r); Assert.AreEqual("chr", r.Class.Name); }
public Classifier(int topicN, string mainDirectory) { this.topicN = topicN; this.mainDirectory = mainDirectory; this.TopicFileName = "topic" + topicN + ".txt"; this.TopicDir = "" + topicN; IWordsDataSource wds = new SimpleWordsDataSource(); this.classifier = new BayesianClassifier(wds); }
public void TestTeaching() { BayesianClassifier classifier = new BayesianClassifier(); string[] sentence1 = new string[] { "The", "menu", "tag", "library", "manages", "the", "complex", "process", "of", "creating", "menus", "in", "JavaScript", "The", "menu", "tag", "itself", "is", "an", "abstract", "class", "that", "extends", "the", "TagSupport", "class", "and", "overrides", "the", "doStartTag", "and", "doEndTag", "methods.", "The", "getMenu", "method,", "which", "is", "a", "template", "method", "and", "should", "be", "overridden", "in", "the", "subclasses,", "provides", "JavaScript", "to", "add", "menu", "items", "in", "the", "menu", "structure", "created", "in", "the", "doStartTag", "method", "Subclasses", "of", "the", "menu", "tag", "override", "the", "getMenu", "method,", "which", "uses", "menu", "builders", "to", "render", "menu", "data", "from", "the", "data", "source" }; string[] sentence2 = new string[] { "I", "witness", "a", "more", "subtle", "demonstration", "of", "real", "time", "physics", "simulation", "at", "the", "tiny", "Palo", "Alto", "office", "of", "Havok", "a", "competing", "physics", "engine", "shop", "On", "the", "screen", "a", "computer", "generated", "sailboat", "floats", "in", "a", "stone", "lined", "pool", "of", "water", "The", "company's", "genial", "Irish", "born", "cofounder", "Hugh", "Reynolds", "shows", "me", "how", "to", "push", "the", "boat", "with", "a", "mouse", "When", "I", "nudge", "it", "air", "fills", "the", "sail", "causing", "the", "ship", "to", "tilt", "leeward", "Ripples", "in", "the", "water", "deflect", "off", "the", "stones", "intersecting", "with", "one", "another", "I", "urge", "the", "boat", "onward", "and", "it", "glides", "effortlessly", "into", "the", "wall", "Reynolds", "tosses", "in", "a", "handful", "of", "virtual", "coins", "they", "spin", "through", "the", "air,", "splash", "into", "the", "water,", "and", "sink" }; string[] sentence3 = new string[] { "The", "New", "Input", "Output", "NIO", "libraries", "introduced", "in", "Java", "2", "Platform", "Standard", "Edition", "J2SE", "1.4", "address", "this", "problem", "NIO", "uses", "a", "buffer", "oriented", "model", "That", "is", "NIO", "deals", "with", "data", "primarily", "in", "large", "blocks", "This", "eliminates", "the", "overhead", "caused", "by", "the", "stream", "model", "and", "even", "makes", "use", "of", "OS", "level", "facilities", "where", "possible", "to", "maximize", "throughput" }; string[] sentence4 = new string[] { "As", "governments", "scramble", "to", "contain", "SARS", "the", "World", "Health", "Organisation", "said", "it", "was", "extending", "the", "scope", "of", "its", "April", "2", "travel", "alert", "to", "include", "Beijing", "and", "the", "northern", "Chinese", "province", "of", "Shanxi", "together", "with", "Toronto", "the", "epicentre", "of", "the", "SARS", "outbreak", "in", "Canada" }; string[] sentence5 = new string[] { "That", "was", "our", "worst", "problem", "I", "tried", "to", "see", "it", "the", "XP", "way", "Well", "what", "we", "can", "do", "is", "implement", "something", "I", "can't", "give", "any", "guarantees", "as", "to", "how", "much", "of", "it", "will", "be", "implemented", "in", "a", "month", "I", "won't", "even", "hazard", "a", "guess", "as", "to", "how", "long", "it", "would", "take", "to", "implement", "as", "a", "whole", "I", "can't", "draw", "UML", "diagrams", "for", "it", "or", "write", "technical", "specs", "that", "would", "take", "time", "from", "coding", "it", "which", "we", "can't", "afford", "Oh", "and", "I", "have", "two", "kids", "I", "can't", "do", "much", "OverTime", "But", "I", "should", "be", "able", "to", "do", "something", "simple", "that", "will", "have", "very", "few", "bugs", "and", "show", "a", "working", "program", "early", "and", "often" }; classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence1); classifier.TeachNonMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence2); classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence3); classifier.TeachNonMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence4); classifier.TeachMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence5); Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence1)); Assert.IsTrue(!classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence2)); Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence3)); Assert.IsTrue(!classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence4)); Assert.IsTrue(classifier.IsMatch(ICategorizedClassifierConstants.DEFAULT_CATEGORY, sentence5)); }