public void TWCNaiveBayesianAlgorithm_PredictTokens_NoPriors() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var alg = new TWCNaiveBayesianAlgorithm() { TFWeightingScheme = Registry.TFWeightingScheme.LogNormalization, IDFWeightingScheme = Registry.IDFWeightingScheme.Standart, UsePriors = false, // !!! Preprocessor = prep }; // act alg.Train(sample); var result1 = alg.PredictTokens(testDocs()[0], 2); var result2 = alg.PredictTokens(testDocs()[1], 2); // assert Assert.AreEqual(2, result1.Length); Assert.AreEqual(CLS1, result1[0].Class); Assert.AreEqual(0.2830035D, result1[0].Score, EPS); Assert.AreEqual(CLS2, result1[1].Class); Assert.AreEqual(0.2188154D, result1[1].Score, EPS); Assert.AreEqual(CLS2, result2[0].Class); Assert.AreEqual(1.0367927D, result2[0].Score, EPS); Assert.AreEqual(CLS1, result2[1].Class); Assert.AreEqual(0.9308995D, result2[1].Score, EPS); }
public void TWCNaiveBayesianAlgorithm_PredictTokens() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var alg = new TWCNaiveBayesianAlgorithm() { TFWeightingScheme = Registry.TFWeightingScheme.LogNormalization, IDFWeightingScheme = Registry.IDFWeightingScheme.Standart, UsePriors = true, Preprocessor = prep }; // act alg.Train(sample); var result1 = alg.PredictTokens(testDocs()[0], 2); var result2 = alg.PredictTokens(testDocs()[1], 2); // assert Assert.AreEqual(2, result1.Length); Assert.AreEqual(CLS1, result1[0].Class); Assert.AreEqual(-0.1224616D, result1[0].Score, EPS); Assert.AreEqual(CLS2, result1[1].Class); Assert.AreEqual(-0.8797969D, result1[1].Score, EPS); Assert.AreEqual(CLS1, result2[0].Class); Assert.AreEqual(0.5254344D, result2[0].Score, EPS); Assert.AreEqual(CLS2, result2[1].Class); Assert.AreEqual(-0.0618195D, result2[1].Score, EPS); }
public void TWCNaiveBayesianAlgorithm_Predict_NoPriors() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var alg = new TWCNaiveBayesianAlgorithm() { TFWeightingScheme = Registry.TFWeightingScheme.LogNormalization, IDFWeightingScheme = Registry.IDFWeightingScheme.Standart, UsePriors = false, // !!! Preprocessor = prep }; // act alg.Train(sample); var result1 = alg.Predict(testDocs()[0]); var result2 = alg.Predict(testDocs()[1]); var result3 = alg.Predict(testDocs()[2]); // assert Assert.AreEqual(CLS1, result1); Assert.AreEqual(CLS2, result2); Assert.AreEqual(CLS2, result3); }
public static TextAlgorithmBase Create_TWCAlgorithm() { var proc = new TextPreprocessor(new EnglishSimpleTokenizer(), new EnglishStopwords(), new EnglishSimpleNormalizer(), new EnglishPorterStemmer()); var alg = new TWCNaiveBayesianAlgorithm() { Preprocessor = proc }; return(alg); }
public void TWCNaiveBayesianAlgorithm_ExtractFeatureVector() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var alg = new TWCNaiveBayesianAlgorithm() { TFWeightingScheme = Registry.TFWeightingScheme.LogNormalization, IDFWeightingScheme = Registry.IDFWeightingScheme.Standart, Preprocessor = prep }; bool isEmpty; // act alg.Train(sample); var result1 = alg.ExtractFeatureVector(testDocs()[0], out isEmpty); var result2 = alg.ExtractFeatureVector(testDocs()[1], out isEmpty); // assert Assert.AreEqual(8, result1.Length); Assert.AreEqual(1, result1[0]); Assert.AreEqual(0, result1[1]); Assert.AreEqual(1, result1[2]); Assert.AreEqual(0, result1[3]); Assert.AreEqual(0, result1[4]); Assert.AreEqual(0, result1[5]); Assert.AreEqual(0, result1[6]); Assert.AreEqual(0, result1[7]); Assert.AreEqual(8, result2.Length); Assert.AreEqual(1, result2[0]); Assert.AreEqual(2, result2[1]); Assert.AreEqual(1, result2[2]); Assert.AreEqual(0, result2[3]); Assert.AreEqual(2, result2[4]); Assert.AreEqual(0, result2[5]); Assert.AreEqual(1, result2[6]); Assert.AreEqual(1, result2[7]); }
public void TWCNaiveBayesianAlgorithm_Train() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var alg = new TWCNaiveBayesianAlgorithm() { TFWeightingScheme = Registry.TFWeightingScheme.LogNormalization, IDFWeightingScheme = Registry.IDFWeightingScheme.Standart, Preprocessor = prep }; // act alg.Train(sample); // assert Assert.AreEqual(sample, alg.TrainingSample); Assert.AreEqual(prep, alg.Preprocessor); Assert.AreEqual(2, alg.ClassHist.Length); Assert.AreEqual(4, alg.ClassHist[0]); Assert.AreEqual(2, alg.ClassHist[1]); Assert.AreEqual(6, alg.DataCount); Assert.AreEqual(8, alg.DataDim); Assert.AreEqual(8, alg.Vocabulary.Count); Assert.AreEqual("cat", alg.Vocabulary[0]); Assert.AreEqual("like", alg.Vocabulary[1]); Assert.AreEqual("icecream", alg.Vocabulary[2]); Assert.AreEqual("at", alg.Vocabulary[3]); Assert.AreEqual("dog", alg.Vocabulary[4]); Assert.AreEqual("meet", alg.Vocabulary[5]); Assert.AreEqual("world", alg.Vocabulary[6]); Assert.AreEqual("seven", alg.Vocabulary[7]); Assert.AreEqual(2, alg.PriorProbs.Length); Assert.AreEqual(Math.Log(4 / 6.0D), alg.PriorProbs[0], EPS); Assert.AreEqual(Math.Log(2 / 6.0D), alg.PriorProbs[1], EPS); Assert.AreEqual(2, alg.Weights.Length); Assert.AreEqual(8, alg.Weights[0].Length); Assert.AreEqual(8, alg.Weights[1].Length); Assert.AreEqual(0.1415017531D, alg.Weights[0][0], EPS); Assert.AreEqual(0.1169948657D, alg.Weights[0][1], EPS); Assert.AreEqual(0.1415017531D, alg.Weights[0][2], EPS); Assert.AreEqual(0.1415017531D, alg.Weights[0][3], EPS); Assert.AreEqual(0.0969081526D, alg.Weights[0][4], EPS); Assert.AreEqual(0.1415017531D, alg.Weights[0][5], EPS); Assert.AreEqual(0.1123506098D, alg.Weights[0][6], EPS); Assert.AreEqual(0.1077393596D, alg.Weights[0][7], EPS); Assert.AreEqual(0.0968271218, alg.Weights[1][0], EPS); Assert.AreEqual(0.1314521840, alg.Weights[1][1], EPS); Assert.AreEqual(0.1219882919, alg.Weights[1][2], EPS); Assert.AreEqual(0.1250806983, alg.Weights[1][3], EPS); Assert.AreEqual(0.1396735112, alg.Weights[1][4], EPS); Assert.AreEqual(0.1092522527, alg.Weights[1][5], EPS); Assert.AreEqual(0.1190328859, alg.Weights[1][6], EPS); Assert.AreEqual(0.1566930542, alg.Weights[1][7], EPS); }