public void GeneralTextAlgorithm_Predict() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var kernel = new TriangularKernel(); var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 2.0D); var alg = new GeneralTextAlgorithm(subAlg) { Preprocessor = prep }; // act alg.Train(sample); var result1 = alg.Predict(testDocs()[0]); var result2 = alg.Predict(testDocs()[1]); var result3 = alg.Predict(testDocs()[2]); // assert Assert.AreEqual(CLS1, result1); Assert.AreEqual(CLS2, result2); Assert.AreEqual(CLS2, result3); }
public void NaiveBayesianAlgorithm_CalculateClassScore() { // arrange var kernel = new TriangularKernel(); var alg = new NaiveBayesianKernelAlgorithm(kernel, 2.0D); var sample = new ClassifiedSample <double[]> { { new[] { 2.0, 1.0 }, new Class("A", 0) }, { new[] { 0.0, 3.0 }, new Class("A", 0) }, { new[] { 4.0, 3.0 }, new Class("B", 1) } }; // act alg.Train(sample); var s11 = alg.CalculateClassScore(new[] { 1.0, 2.0 }, new Class("A", 0)); var s12 = alg.CalculateClassScore(new[] { 1.0, 2.0 }, new Class("B", 1)); var s21 = alg.CalculateClassScore(new[] { 2.0, 2.0 }, new Class("A", 0)); var s22 = alg.CalculateClassScore(new[] { 2.0, 2.0 }, new Class("B", 1)); var s31 = alg.CalculateClassScore(new[] { 3.0, 2.0 }, new Class("A", 0)); var s32 = alg.CalculateClassScore(new[] { 3.0, 2.0 }, new Class("B", 1)); // assert Assert.AreEqual(Math.Log(1 / 24.0D), s11, EPS); Assert.AreEqual(double.NegativeInfinity, s12); Assert.AreEqual(Math.Log(1 / 24.0D), s21, EPS); Assert.AreEqual(double.NegativeInfinity, s22); Assert.AreEqual(Math.Log(1 / 48.0D), s31, EPS); Assert.AreEqual(Math.Log(1 / 48.0D), s32, EPS); }
public void GeneralTextAlgorithm_PredictTokens() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var kernel = new TriangularKernel(); var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 0.5D) { UseKernelMinValue = true, KernelMinValue = EPS_ROUGH }; var alg = new GeneralTextAlgorithm(subAlg) { Preprocessor = prep }; // act alg.Train(sample); var result1 = alg.PredictTokens(testDocs()[0], 2); var result2 = alg.PredictTokens(testDocs()[1], 2); // assert Assert.AreEqual(2, result1.Length); Assert.AreEqual(CLS1, result1[0].Class); Assert.AreEqual(Math.Log(27.0D / 8), result1[0].Score, EPS); Assert.AreEqual(CLS2, result1[1].Class); Assert.AreEqual(Math.Log(EPS_ROUGH * EPS_ROUGH * EPS_ROUGH * 8.0D / 6), result1[1].Score, EPS); Assert.AreEqual(CLS2, result2[0].Class); Assert.AreEqual(Math.Log(EPS_ROUGH * EPS_ROUGH * EPS_ROUGH * 4.0 / 3), result2[0].Score, EPS); Assert.AreEqual(CLS1, result2[1].Class); Assert.AreEqual(Math.Log(EPS_ROUGH * EPS_ROUGH * EPS_ROUGH / 4), result2[1].Score, EPS); }
public void NaiveBayesianAlgorithm_Predict() { // arrange var kernel = new TriangularKernel(); var alg = new NaiveBayesianKernelAlgorithm(kernel, 0.3D); var sample = new ClassifiedSample <double[]> { { new[] { 0.2, 0.2 }, new Class("A", 0) }, { new[] { 0.4, 0.6 }, new Class("A", 0) }, { new[] { 0.6, 0.4 }, new Class("A", 0) }, { new[] { 0.8, 0.6 }, new Class("B", 1) }, { new[] { 0.8, 0.8 }, new Class("B", 1) } }; // act alg.Train(sample); var res1 = alg.Predict(new[] { 0.4, 0.4 }); var res2 = alg.Predict(new[] { 0.6, 0.6 }); var res3 = alg.Predict(new[] { 0.9, 0.7 }); // assert Assert.AreEqual(new Class("A", 0), res1); Assert.AreEqual(new Class("A", 0), res2); Assert.AreEqual(new Class("B", 1), res3); }
private void doNaiveBayesianKernelAlgorithmTest() { var kernel = new GaussianKernel(); var alg = new NaiveBayesianKernelAlgorithm(kernel); alg.Train(Data.TrainingSample); // LOO var hmin = 0.01D; var hmax = 5.0D; var step = 0.05D; StatUtils.OptimizeLOO(alg, hmin, hmax, step); var optH = alg.H; Console.WriteLine("Naive Bayesian: optimal h is {0}", optH); Console.WriteLine(); // Margins Console.WriteLine("Margins:"); calculateMargin(alg); Console.WriteLine(); //Error distribution var message = string.Empty; Console.WriteLine("Errors:"); for (double h1 = hmin; h1 < hmax; h1 += step) { var h = h1; if (h <= optH && h + step > optH) { h = optH; } alg.H = h; var errors = alg.GetErrors(Data.Data, 0, true); var ec = errors.Count(); var dc = Data.Data.Count; var pct = Math.Round(100.0F * ec / dc, 2); var mes = string.Format("{0}:\t{1} of {2}\t({3}%) {4}", Math.Round(h, 2), ec, dc, pct, h == optH ? "<-LOO optimal" : string.Empty); Console.WriteLine(mes); if (h == optH) { message = mes; } } Console.WriteLine(); Console.WriteLine("-----------------------------------------"); Console.WriteLine("Bayesian: optimal h is {0}", optH); Console.WriteLine(message); alg.H = optH; Visualizer.Run(alg); }
public static TextAlgorithmBase Create_GeneralTextAlgorithm() { var proc = new TextPreprocessor(new EnglishSimpleTokenizer(), new EnglishStopwords(), new EnglishSimpleNormalizer(), new EnglishPorterStemmer()); var kernel = new TriangularKernel(); var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 0.5D) { UseKernelMinValue = true, KernelMinValue = 0.000001D }; var alg = new GeneralTextAlgorithm(subAlg) { Preprocessor = proc }; return(alg); }
public void GeneralTextAlgorithm_Train() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var kernel = new TriangularKernel(); var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 2.0D); var alg = new GeneralTextAlgorithm(subAlg) { Preprocessor = prep }; // act alg.Train(sample); // assert Assert.AreEqual(subAlg, alg.SubAlgorithm); var ts = subAlg.TrainingSample.ToList(); Assert.AreEqual(1, ts[0].Key[0]); Assert.AreEqual(1, ts[0].Key[1]); Assert.AreEqual(1, ts[0].Key[2]); Assert.AreEqual(0, ts[0].Key[7]); Assert.AreEqual(CLS1, ts[0].Value); Assert.AreEqual(0, ts[5].Key[3]); Assert.AreEqual(2, ts[5].Key[4]); Assert.AreEqual(0, ts[5].Key[5]); Assert.AreEqual(1, ts[5].Key[6]); Assert.AreEqual(CLS2, ts[5].Value); Assert.AreEqual(2, subAlg.PriorProbs.Length); Assert.AreEqual(Math.Log(4.0D / 6), subAlg.PriorProbs[0], EPS); Assert.AreEqual(Math.Log(2.0D / 6), subAlg.PriorProbs[1], EPS); Assert.AreEqual(8, subAlg.DataDim); Assert.AreEqual(6, subAlg.DataCount); Assert.AreEqual(2, subAlg.ClassHist.Length); Assert.AreEqual(4, subAlg.ClassHist[0]); Assert.AreEqual(2, subAlg.ClassHist[1]); }
public void GeneralTextAlgorithm_ExtractFeatureVector() { // arrange var sample = getSample(); var CLS1 = sample.CachedClasses.ElementAt(0); var CLS2 = sample.CachedClasses.ElementAt(1); var prep = getDefaultPreprocessor(); var kernel = new TriangularKernel(); var subAlg = new NaiveBayesianKernelAlgorithm(kernel, 2.0D); var alg = new GeneralTextAlgorithm(subAlg) { Preprocessor = prep }; bool isEmpty; // act alg.Train(sample); var result1 = alg.ExtractFeatureVector(testDocs()[0], out isEmpty); var result2 = alg.ExtractFeatureVector(testDocs()[1], out isEmpty); // assert Assert.AreEqual(8, result1.Length); Assert.AreEqual(1, result1[0]); Assert.AreEqual(0, result1[1]); Assert.AreEqual(1, result1[2]); Assert.AreEqual(0, result1[3]); Assert.AreEqual(0, result1[4]); Assert.AreEqual(0, result1[5]); Assert.AreEqual(0, result1[6]); Assert.AreEqual(0, result1[7]); Assert.AreEqual(8, result2.Length); Assert.AreEqual(1, result2[0]); Assert.AreEqual(2, result2[1]); Assert.AreEqual(1, result2[2]); Assert.AreEqual(0, result2[3]); Assert.AreEqual(2, result2[4]); Assert.AreEqual(0, result2[5]); Assert.AreEqual(1, result2[6]); Assert.AreEqual(1, result2[7]); }