Exemplo n.º 1
0
        public void GeneralTextAlgorithm_Predict()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();

            var kernel = new TriangularKernel();
            var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 2.0D);
            var alg    = new GeneralTextAlgorithm(subAlg)
            {
                Preprocessor = prep
            };

            // act
            alg.Train(sample);
            var result1 = alg.Predict(testDocs()[0]);
            var result2 = alg.Predict(testDocs()[1]);
            var result3 = alg.Predict(testDocs()[2]);

            // assert
            Assert.AreEqual(CLS1, result1);
            Assert.AreEqual(CLS2, result2);
            Assert.AreEqual(CLS2, result3);
        }
Exemplo n.º 2
0
        public void NaiveBayesianAlgorithm_CalculateClassScore()
        {
            // arrange
            var kernel = new TriangularKernel();
            var alg    = new NaiveBayesianKernelAlgorithm(kernel, 2.0D);
            var sample = new ClassifiedSample <double[]>
            {
                { new[] { 2.0, 1.0 }, new Class("A", 0) },
                { new[] { 0.0, 3.0 }, new Class("A", 0) },
                { new[] { 4.0, 3.0 }, new Class("B", 1) }
            };

            // act
            alg.Train(sample);
            var s11 = alg.CalculateClassScore(new[] { 1.0, 2.0 }, new Class("A", 0));
            var s12 = alg.CalculateClassScore(new[] { 1.0, 2.0 }, new Class("B", 1));
            var s21 = alg.CalculateClassScore(new[] { 2.0, 2.0 }, new Class("A", 0));
            var s22 = alg.CalculateClassScore(new[] { 2.0, 2.0 }, new Class("B", 1));
            var s31 = alg.CalculateClassScore(new[] { 3.0, 2.0 }, new Class("A", 0));
            var s32 = alg.CalculateClassScore(new[] { 3.0, 2.0 }, new Class("B", 1));

            // assert
            Assert.AreEqual(Math.Log(1 / 24.0D), s11, EPS);
            Assert.AreEqual(double.NegativeInfinity, s12);
            Assert.AreEqual(Math.Log(1 / 24.0D), s21, EPS);
            Assert.AreEqual(double.NegativeInfinity, s22);
            Assert.AreEqual(Math.Log(1 / 48.0D), s31, EPS);
            Assert.AreEqual(Math.Log(1 / 48.0D), s32, EPS);
        }
Exemplo n.º 3
0
        public void GeneralTextAlgorithm_PredictTokens()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();

            var kernel = new TriangularKernel();
            var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 0.5D)
            {
                UseKernelMinValue = true, KernelMinValue = EPS_ROUGH
            };
            var alg = new GeneralTextAlgorithm(subAlg)
            {
                Preprocessor = prep
            };

            // act
            alg.Train(sample);
            var result1 = alg.PredictTokens(testDocs()[0], 2);
            var result2 = alg.PredictTokens(testDocs()[1], 2);

            // assert
            Assert.AreEqual(2, result1.Length);
            Assert.AreEqual(CLS1, result1[0].Class);
            Assert.AreEqual(Math.Log(27.0D / 8), result1[0].Score, EPS);
            Assert.AreEqual(CLS2, result1[1].Class);
            Assert.AreEqual(Math.Log(EPS_ROUGH * EPS_ROUGH * EPS_ROUGH * 8.0D / 6), result1[1].Score, EPS);
            Assert.AreEqual(CLS2, result2[0].Class);
            Assert.AreEqual(Math.Log(EPS_ROUGH * EPS_ROUGH * EPS_ROUGH * 4.0 / 3), result2[0].Score, EPS);
            Assert.AreEqual(CLS1, result2[1].Class);
            Assert.AreEqual(Math.Log(EPS_ROUGH * EPS_ROUGH * EPS_ROUGH / 4), result2[1].Score, EPS);
        }
Exemplo n.º 4
0
        public void NaiveBayesianAlgorithm_Predict()
        {
            // arrange
            var kernel = new TriangularKernel();
            var alg    = new NaiveBayesianKernelAlgorithm(kernel, 0.3D);
            var sample = new ClassifiedSample <double[]>
            {
                { new[] { 0.2, 0.2 }, new Class("A", 0) },
                { new[] { 0.4, 0.6 }, new Class("A", 0) },
                { new[] { 0.6, 0.4 }, new Class("A", 0) },
                { new[] { 0.8, 0.6 }, new Class("B", 1) },
                { new[] { 0.8, 0.8 }, new Class("B", 1) }
            };

            // act
            alg.Train(sample);
            var res1 = alg.Predict(new[] { 0.4, 0.4 });
            var res2 = alg.Predict(new[] { 0.6, 0.6 });
            var res3 = alg.Predict(new[] { 0.9, 0.7 });

            // assert
            Assert.AreEqual(new Class("A", 0), res1);
            Assert.AreEqual(new Class("A", 0), res2);
            Assert.AreEqual(new Class("B", 1), res3);
        }
Exemplo n.º 5
0
        private void doNaiveBayesianKernelAlgorithmTest()
        {
            var kernel = new GaussianKernel();
            var alg    = new NaiveBayesianKernelAlgorithm(kernel);

            alg.Train(Data.TrainingSample);

            // LOO
            var hmin = 0.01D;
            var hmax = 5.0D;
            var step = 0.05D;

            StatUtils.OptimizeLOO(alg, hmin, hmax, step);
            var optH = alg.H;

            Console.WriteLine("Naive Bayesian: optimal h is {0}", optH);
            Console.WriteLine();

            // Margins
            Console.WriteLine("Margins:");
            calculateMargin(alg);
            Console.WriteLine();

            //Error distribution
            var message = string.Empty;

            Console.WriteLine("Errors:");
            for (double h1 = hmin; h1 < hmax; h1 += step)
            {
                var h = h1;
                if (h <= optH && h + step > optH)
                {
                    h = optH;
                }

                alg.H = h;
                var errors = alg.GetErrors(Data.Data, 0, true);
                var ec     = errors.Count();
                var dc     = Data.Data.Count;
                var pct    = Math.Round(100.0F * ec / dc, 2);
                var mes    = string.Format("{0}:\t{1} of {2}\t({3}%) {4}", Math.Round(h, 2), ec, dc, pct, h == optH ? "<-LOO optimal" : string.Empty);
                Console.WriteLine(mes);

                if (h == optH)
                {
                    message = mes;
                }
            }
            Console.WriteLine();
            Console.WriteLine("-----------------------------------------");
            Console.WriteLine("Bayesian: optimal h is {0}", optH);
            Console.WriteLine(message);

            alg.H = optH;
            Visualizer.Run(alg);
        }
Exemplo n.º 6
0
        public static TextAlgorithmBase Create_GeneralTextAlgorithm()
        {
            var proc = new TextPreprocessor(new EnglishSimpleTokenizer(),
                                            new EnglishStopwords(),
                                            new EnglishSimpleNormalizer(),
                                            new EnglishPorterStemmer());
            var kernel = new TriangularKernel();
            var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 0.5D)
            {
                UseKernelMinValue = true, KernelMinValue = 0.000001D
            };
            var alg = new GeneralTextAlgorithm(subAlg)
            {
                Preprocessor = proc
            };

            return(alg);
        }
Exemplo n.º 7
0
        public void GeneralTextAlgorithm_Train()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();

            var kernel = new TriangularKernel();
            var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 2.0D);
            var alg    = new GeneralTextAlgorithm(subAlg)
            {
                Preprocessor = prep
            };

            // act
            alg.Train(sample);

            // assert
            Assert.AreEqual(subAlg, alg.SubAlgorithm);
            var ts = subAlg.TrainingSample.ToList();

            Assert.AreEqual(1, ts[0].Key[0]);
            Assert.AreEqual(1, ts[0].Key[1]);
            Assert.AreEqual(1, ts[0].Key[2]);
            Assert.AreEqual(0, ts[0].Key[7]);
            Assert.AreEqual(CLS1, ts[0].Value);
            Assert.AreEqual(0, ts[5].Key[3]);
            Assert.AreEqual(2, ts[5].Key[4]);
            Assert.AreEqual(0, ts[5].Key[5]);
            Assert.AreEqual(1, ts[5].Key[6]);
            Assert.AreEqual(CLS2, ts[5].Value);

            Assert.AreEqual(2, subAlg.PriorProbs.Length);
            Assert.AreEqual(Math.Log(4.0D / 6), subAlg.PriorProbs[0], EPS);
            Assert.AreEqual(Math.Log(2.0D / 6), subAlg.PriorProbs[1], EPS);

            Assert.AreEqual(8, subAlg.DataDim);
            Assert.AreEqual(6, subAlg.DataCount);
            Assert.AreEqual(2, subAlg.ClassHist.Length);
            Assert.AreEqual(4, subAlg.ClassHist[0]);
            Assert.AreEqual(2, subAlg.ClassHist[1]);
        }
Exemplo n.º 8
0
        public void GeneralTextAlgorithm_ExtractFeatureVector()
        {
            // arrange
            var sample = getSample();
            var CLS1   = sample.CachedClasses.ElementAt(0);
            var CLS2   = sample.CachedClasses.ElementAt(1);
            var prep   = getDefaultPreprocessor();

            var kernel = new TriangularKernel();
            var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 2.0D);
            var alg    = new GeneralTextAlgorithm(subAlg)
            {
                Preprocessor = prep
            };
            bool isEmpty;

            // act
            alg.Train(sample);
            var result1 = alg.ExtractFeatureVector(testDocs()[0], out isEmpty);
            var result2 = alg.ExtractFeatureVector(testDocs()[1], out isEmpty);

            // assert
            Assert.AreEqual(8, result1.Length);
            Assert.AreEqual(1, result1[0]);
            Assert.AreEqual(0, result1[1]);
            Assert.AreEqual(1, result1[2]);
            Assert.AreEqual(0, result1[3]);
            Assert.AreEqual(0, result1[4]);
            Assert.AreEqual(0, result1[5]);
            Assert.AreEqual(0, result1[6]);
            Assert.AreEqual(0, result1[7]);

            Assert.AreEqual(8, result2.Length);
            Assert.AreEqual(1, result2[0]);
            Assert.AreEqual(2, result2[1]);
            Assert.AreEqual(1, result2[2]);
            Assert.AreEqual(0, result2[3]);
            Assert.AreEqual(2, result2[4]);
            Assert.AreEqual(0, result2[5]);
            Assert.AreEqual(1, result2[6]);
            Assert.AreEqual(1, result2[7]);
        }