예제 #1
0
        public void TWCNaiveBayesianAlgorithm_PredictTokens_NoPriors()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();
            var alg    = new TWCNaiveBayesianAlgorithm()
            {
                TFWeightingScheme  = Registry.TFWeightingScheme.LogNormalization,
                IDFWeightingScheme = Registry.IDFWeightingScheme.Standart,
                UsePriors          = false, // !!!
                Preprocessor       = prep
            };

            // act
            alg.Train(sample);
            var result1 = alg.PredictTokens(testDocs()[0], 2);
            var result2 = alg.PredictTokens(testDocs()[1], 2);

            // assert
            Assert.AreEqual(2, result1.Length);
            Assert.AreEqual(CLS1, result1[0].Class);
            Assert.AreEqual(0.2830035D, result1[0].Score, EPS);
            Assert.AreEqual(CLS2, result1[1].Class);
            Assert.AreEqual(0.2188154D, result1[1].Score, EPS);
            Assert.AreEqual(CLS2, result2[0].Class);
            Assert.AreEqual(1.0367927D, result2[0].Score, EPS);
            Assert.AreEqual(CLS1, result2[1].Class);
            Assert.AreEqual(0.9308995D, result2[1].Score, EPS);
        }
예제 #2
0
        public void TWCNaiveBayesianAlgorithm_PredictTokens()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();
            var alg    = new TWCNaiveBayesianAlgorithm()
            {
                TFWeightingScheme  = Registry.TFWeightingScheme.LogNormalization,
                IDFWeightingScheme = Registry.IDFWeightingScheme.Standart,
                UsePriors          = true,
                Preprocessor       = prep
            };

            // act
            alg.Train(sample);
            var result1 = alg.PredictTokens(testDocs()[0], 2);
            var result2 = alg.PredictTokens(testDocs()[1], 2);

            // assert
            Assert.AreEqual(2, result1.Length);
            Assert.AreEqual(CLS1, result1[0].Class);
            Assert.AreEqual(-0.1224616D, result1[0].Score, EPS);
            Assert.AreEqual(CLS2, result1[1].Class);
            Assert.AreEqual(-0.8797969D, result1[1].Score, EPS);
            Assert.AreEqual(CLS1, result2[0].Class);
            Assert.AreEqual(0.5254344D, result2[0].Score, EPS);
            Assert.AreEqual(CLS2, result2[1].Class);
            Assert.AreEqual(-0.0618195D, result2[1].Score, EPS);
        }
예제 #3
0
        public void TWCNaiveBayesianAlgorithm_Predict_NoPriors()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();
            var alg    = new TWCNaiveBayesianAlgorithm()
            {
                TFWeightingScheme  = Registry.TFWeightingScheme.LogNormalization,
                IDFWeightingScheme = Registry.IDFWeightingScheme.Standart,
                UsePriors          = false, // !!!
                Preprocessor       = prep
            };

            // act
            alg.Train(sample);
            var result1 = alg.Predict(testDocs()[0]);
            var result2 = alg.Predict(testDocs()[1]);
            var result3 = alg.Predict(testDocs()[2]);

            // assert
            Assert.AreEqual(CLS1, result1);
            Assert.AreEqual(CLS2, result2);
            Assert.AreEqual(CLS2, result3);
        }
예제 #4
0
        public static TextAlgorithmBase Create_TWCAlgorithm()
        {
            var proc = new TextPreprocessor(new EnglishSimpleTokenizer(),
                                            new EnglishStopwords(),
                                            new EnglishSimpleNormalizer(),
                                            new EnglishPorterStemmer());
            var alg = new TWCNaiveBayesianAlgorithm()
            {
                Preprocessor = proc
            };

            return(alg);
        }
예제 #5
0
        public void TWCNaiveBayesianAlgorithm_ExtractFeatureVector()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();
            var alg    = new TWCNaiveBayesianAlgorithm()
            {
                TFWeightingScheme  = Registry.TFWeightingScheme.LogNormalization,
                IDFWeightingScheme = Registry.IDFWeightingScheme.Standart,
                Preprocessor       = prep
            };
            bool isEmpty;

            // act
            alg.Train(sample);
            var result1 = alg.ExtractFeatureVector(testDocs()[0], out isEmpty);
            var result2 = alg.ExtractFeatureVector(testDocs()[1], out isEmpty);

            // assert
            Assert.AreEqual(8, result1.Length);
            Assert.AreEqual(1, result1[0]);
            Assert.AreEqual(0, result1[1]);
            Assert.AreEqual(1, result1[2]);
            Assert.AreEqual(0, result1[3]);
            Assert.AreEqual(0, result1[4]);
            Assert.AreEqual(0, result1[5]);
            Assert.AreEqual(0, result1[6]);
            Assert.AreEqual(0, result1[7]);

            Assert.AreEqual(8, result2.Length);
            Assert.AreEqual(1, result2[0]);
            Assert.AreEqual(2, result2[1]);
            Assert.AreEqual(1, result2[2]);
            Assert.AreEqual(0, result2[3]);
            Assert.AreEqual(2, result2[4]);
            Assert.AreEqual(0, result2[5]);
            Assert.AreEqual(1, result2[6]);
            Assert.AreEqual(1, result2[7]);
        }
예제 #6
0
        public void TWCNaiveBayesianAlgorithm_Train()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();
            var alg    = new TWCNaiveBayesianAlgorithm()
            {
                TFWeightingScheme  = Registry.TFWeightingScheme.LogNormalization,
                IDFWeightingScheme = Registry.IDFWeightingScheme.Standart,
                Preprocessor       = prep
            };

            // act
            alg.Train(sample);

            // assert
            Assert.AreEqual(sample, alg.TrainingSample);
            Assert.AreEqual(prep, alg.Preprocessor);
            Assert.AreEqual(2, alg.ClassHist.Length);
            Assert.AreEqual(4, alg.ClassHist[0]);
            Assert.AreEqual(2, alg.ClassHist[1]);
            Assert.AreEqual(6, alg.DataCount);
            Assert.AreEqual(8, alg.DataDim);

            Assert.AreEqual(8, alg.Vocabulary.Count);
            Assert.AreEqual("cat", alg.Vocabulary[0]);
            Assert.AreEqual("like", alg.Vocabulary[1]);
            Assert.AreEqual("icecream", alg.Vocabulary[2]);
            Assert.AreEqual("at", alg.Vocabulary[3]);
            Assert.AreEqual("dog", alg.Vocabulary[4]);
            Assert.AreEqual("meet", alg.Vocabulary[5]);
            Assert.AreEqual("world", alg.Vocabulary[6]);
            Assert.AreEqual("seven", alg.Vocabulary[7]);

            Assert.AreEqual(2, alg.PriorProbs.Length);
            Assert.AreEqual(Math.Log(4 / 6.0D), alg.PriorProbs[0], EPS);
            Assert.AreEqual(Math.Log(2 / 6.0D), alg.PriorProbs[1], EPS);

            Assert.AreEqual(2, alg.Weights.Length);
            Assert.AreEqual(8, alg.Weights[0].Length);
            Assert.AreEqual(8, alg.Weights[1].Length);
            Assert.AreEqual(0.1415017531D, alg.Weights[0][0], EPS);
            Assert.AreEqual(0.1169948657D, alg.Weights[0][1], EPS);
            Assert.AreEqual(0.1415017531D, alg.Weights[0][2], EPS);
            Assert.AreEqual(0.1415017531D, alg.Weights[0][3], EPS);
            Assert.AreEqual(0.0969081526D, alg.Weights[0][4], EPS);
            Assert.AreEqual(0.1415017531D, alg.Weights[0][5], EPS);
            Assert.AreEqual(0.1123506098D, alg.Weights[0][6], EPS);
            Assert.AreEqual(0.1077393596D, alg.Weights[0][7], EPS);

            Assert.AreEqual(0.0968271218, alg.Weights[1][0], EPS);
            Assert.AreEqual(0.1314521840, alg.Weights[1][1], EPS);
            Assert.AreEqual(0.1219882919, alg.Weights[1][2], EPS);
            Assert.AreEqual(0.1250806983, alg.Weights[1][3], EPS);
            Assert.AreEqual(0.1396735112, alg.Weights[1][4], EPS);
            Assert.AreEqual(0.1092522527, alg.Weights[1][5], EPS);
            Assert.AreEqual(0.1190328859, alg.Weights[1][6], EPS);
            Assert.AreEqual(0.1566930542, alg.Weights[1][7], EPS);
        }