public SupportVectorMachine <Gaussian> Learn() { //Creates new SVM and teach it var teacher = new OneclassSupportVectorLearning <Gaussian>() { //Gaussian kernel Kernel = new Gaussian(0.1), Nu = 0.1 }; return(teacher.Learn(trainDataJagged)); }
public void learn_test() { #region doc_learn // Ensure that results are reproducible Accord.Math.Random.Generator.Seed = 0; // Generate some data to be learned double[][] inputs = { new double[] { +1.0312479734420776 }, new double[] { +0.99444115161895752 }, new double[] { +0.21835240721702576 }, new double[] { +0.47197291254997253 }, new double[] { +0.68701112270355225 }, new double[] { -0.58556461334228516 }, new double[] { -0.64154046773910522 }, new double[] { -0.66485315561294556 }, new double[] { +0.37940266728401184 }, new double[] { -0.61046308279037476 } }; // Create a new One-class SVM learning algorithm var teacher = new OneclassSupportVectorLearning <Linear>() { Kernel = new Linear(), // or, for example, 'new Gaussian(0.9)' Nu = 0.1 }; // Learn a support vector machine var svm = teacher.Learn(inputs); // Test the machine double[] prediction = svm.Score(inputs); // Compute the log-likelihood of the answer double ll = new LogLikelihoodLoss().Loss(prediction); #endregion Assert.AreEqual(-1.6653345369377348E-16, ll, 1e-10); Assert.AreEqual(2, svm.Weights.Length); Assert.AreEqual(0.39198910030993617, svm.Weights[0], 1e-10); Assert.AreEqual(0.60801089969006383, svm.Weights[1], 1e-10); Assert.AreEqual(inputs[0][0], svm.SupportVectors[0][0], 1e-10); Assert.AreEqual(inputs[7][0], svm.SupportVectors[1][0], 1e-10); }
public void RunTest() { Accord.Math.Tools.SetupGenerator(0); var dist = NormalDistribution.Standard; double[] x = { +1.0312479734420776, +0.99444115161895752, +0.21835240721702576, +0.47197291254997253, +0.68701112270355225, -0.58556461334228516, -0.64154046773910522, -0.66485315561294556, +0.37940266728401184, -0.61046308279037476 }; double[][] inputs = Jagged.ColumnVector(x); IKernel kernel = new Linear(); var machine = new KernelSupportVectorMachine(kernel, inputs: 1); var teacher = new OneclassSupportVectorLearning(machine, inputs) { Nu = 0.1 }; // Run the learning algorithm double error = teacher.Run(); Assert.AreEqual(2, machine.Weights.Length); Assert.AreEqual(0.39198910030993617, machine.Weights[0]); Assert.AreEqual(0.60801089969006383, machine.Weights[1]); Assert.AreEqual(inputs[0][0], machine.SupportVectors[0][0]); Assert.AreEqual(inputs[7][0], machine.SupportVectors[1][0]); Assert.AreEqual(0.0, error, 1e-10); }
private static void BuildOneClassSVM(Frame <int, string> featuresDF) { // First 13 components explain about 50% of the variance int numComponents = 13; string[] cols = featuresDF.ColumnKeys.Where((x, i) => i < numComponents).ToArray(); var rnd = new Random(1); int[] trainIdx = featuresDF["is_fraud"] .Where(x => x.Value == 0) .Keys .OrderBy(x => rnd.Next()) .Take(15000) .ToArray(); var normalDF = featuresDF.Rows[ trainIdx ].Columns[cols]; //var normalDF = featuresDF.Columns[cols]; double[][] normalData = BuildJaggedArray( normalDF.ToArray2D <double>(), normalDF.RowCount, cols.Length ); var teacher = new OneclassSupportVectorLearning <Gaussian>(); var model = teacher.Learn(normalData); int[] testIdx = featuresDF["is_fraud"] .Where(x => x.Value > 0) .Keys .Concat( featuresDF["is_fraud"] .Where(x => x.Value == 0 && !trainIdx.Contains(x.Key)) .Keys .OrderBy(x => rnd.Next()) .Take(5000) .ToArray() ).ToArray(); var fraudDF = featuresDF.Rows[ testIdx ].Columns[cols]; double[][] fraudData = BuildJaggedArray( fraudDF.ToArray2D <double>(), fraudDF.RowCount, cols.Length ); int[] fraudLabels = featuresDF.Rows[ testIdx ].GetColumn <int>("is_fraud").ValuesAll.ToArray(); for (int j = 0; j <= 10; j++) { model.Threshold = -1 + j / 10.0; int[] detected = new int[fraudData.Length]; double[] probs = new double[fraudData.Length]; for (int i = 0; i < fraudData.Length; i++) { bool isNormal = model.Decide(fraudData[i]); detected[i] = isNormal ? 0 : 1; } Console.WriteLine("\n\n---- One-Class SVM Results ----"); Console.WriteLine("* Threshold: {0:0.00000}", model.Threshold); double correctPreds = fraudLabels .Select((x, i) => detected[i] == 1 && x == 1 ? 1 : 0) .Sum(); double precision = correctPreds / detected.Sum(); double overallRecall = correctPreds / fraudLabels.Sum(); Console.WriteLine("* Overall Fraud Detection: {0:0.00}%", overallRecall * 100.0); Console.WriteLine("* Precision: {0:0.00}%", (precision) * 100.0); Console.WriteLine("* False Alarm Rate: {0:0.00}%", (1 - precision) * 100.0); } }
public static DetectorParameters PerformTraining(double[][] positive_data, double[][] negative_data, DetectorParameters current_detector_params) { // Create a new One-class SVM learning algorithm var teacher = new OneclassSupportVectorLearning <Gaussian>() { UseKernelEstimation = true, Nu = 0.1 }; // Learn a support vector machine var svm = teacher.Learn(positive_data); current_detector_params.SVM = svm; int trues = 0; int falses = 0; foreach (double[] d_val in positive_data) { var prob = svm.Probability(d_val); var log_likelihood = svm.LogLikelihood(d_val); var decision = log_likelihood > current_detector_params.LogLikelihoodThreshold; current_detector_params.MinLogLikelihoodPositive = Math.Min(log_likelihood, current_detector_params.MinLogLikelihoodPositive); if (decision == true) { trues++; } else { falses++; } } System.Console.WriteLine("Positive Trues: " + trues.ToString() + " Falses: " + falses.ToString() + " (Min log-likelihood: " + current_detector_params.MinLogLikelihoodPositive.ToString() + ")"); trues = 0; falses = 0; foreach (double[] d_val in negative_data) { var prob = svm.Probability(d_val); var log_likelihood = svm.LogLikelihood(d_val); var decision = log_likelihood > current_detector_params.LogLikelihoodThreshold; current_detector_params.MaxLogLikelihoodNegative = Math.Max(log_likelihood, current_detector_params.MaxLogLikelihoodNegative); if (decision == true) { trues++; } else { falses++; } } System.Console.WriteLine("Negative Trues: " + trues.ToString() + " Falses: " + falses.ToString() + " (Max log-likelihood: " + current_detector_params.MaxLogLikelihoodNegative.ToString() + ")"); // We want the threshold to be more on the side of "not loading", as errors during gameplay are worse. current_detector_params.LogLikelihoodThreshold = (3.5 * current_detector_params.MaxLogLikelihoodNegative + 2.5 * current_detector_params.MinLogLikelihoodPositive) / 6.0; current_detector_params.DetectsPerfectly = current_detector_params.MaxLogLikelihoodNegative < current_detector_params.MinLogLikelihoodPositive; current_detector_params.LogLikelihoodDistance = current_detector_params.MinLogLikelihoodPositive - current_detector_params.MaxLogLikelihoodNegative; return(current_detector_params); }
public void square_test() { // Example from https://stackoverflow.com/questions/38642615/why-does-this-clean-data-provide-strange-svm-classification-results double[][] inputs = new double[49][]; int i = 0; for (double x = -0.3; x <= 0.31; x += 0.1) { for (double y = -0.3; y <= 0.31; y += 0.1) { inputs[i++] = new double[] { x, y } } } ; // Generate inlier and outlier test points. double[][] outliers = { new double[] { 1E6, 1E6 }, // Very far outlier new double[] { 0, 1E6 }, // Very far outlier new double[] { 100, -100 }, // Far outlier new double[] { 0, -100 }, // Far outlier new double[] { -10, -10 }, // Still far outlier new double[] { 0, -10 }, // Still far outlier new double[] { 0.6, 0.6 }, // Close outlier new double[] { 0.5, 0.0 } // Close outlier }; double[][] inliers = { new double[] { 0.0, 0.0 }, // Middle of cluster new double[] { .15, .15 }, // Halfway to corner of cluster new double[] { -0.1, 0 }, // Comfortably inside cluster new double[] { 0.25, 0 }, // Near inside edge of cluster new double[] { 0.28,0.28 } // Near inside edge of cluster }; var teacher = new OneclassSupportVectorLearning <Gaussian>() { Nu = 0.05, Tolerance = 1e-2, Kernel = new Gaussian(1) }; var svm = teacher.Learn(inputs); double[] a = svm.Score(outliers); double[] b = svm.Score(inliers); double[] c = svm.Score(inputs); string stra = a.ToCSharp(); string strb = b.ToCSharp(); string strc = c.ToCSharp(); double[] ea = { -2.06303275869732, -2.06303275869732, -2.06303275869732, -2.06303275869732, -2.06303275869732, -2.06303275869732, -0.43909532904464, -0.0610610987108576 }; double[] eb = { 0.176098645217194, 0.13254525498832, 0.1651082775092, 0.115325884477755, 0.0260693377780776 }; double[] ec = { -1.33226762955019E-15, 0.0467039286535355, 0.0749106707071278, 0.0838648156076344, 0.0733262829494159, 0.0435809252346452, -0.00457227086652812, 0.0474811783155659, 0.0955242612540407, 0.124652669315224, 0.134085905268251, 0.123570483126211, 0.0933911526436562, 0.0443581748966757, 0.0764893734298348, 0.125461639137703, 0.155267627012948, 0.165108277509199, 0.154718603422687, 0.12437945294869, 0.0749049856721229, 0.086246524528469, 0.13571268664663, 0.165933589606133, 0.176098645217194, 0.165933589606133, 0.135712686646629, 0.0862465245284685, 0.0764893734298354, 0.126000305758801, 0.156361734225727, 0.166758901703067, 0.156910757815989, 0.127082491947813, 0.0780737611875469, 0.0474811783155671, 0.0965860327119062, 0.126809275581279, 0.137339468025008, 0.127891461770292, 0.0987191413222903, 0.0506041817344567, 6.66133814775094E-16, 0.0482584279775976, 0.0780680761525424, 0.0886282334493032, 0.0796524639102539, 0.051381431396487, 0.00457227086652678 }; Assert.IsTrue(ea.IsEqual(a, 1e-5)); Assert.IsTrue(eb.IsEqual(b, 1e-5)); Assert.IsTrue(ec.IsEqual(c, 1e-5)); bool[] da = svm.Decide(outliers); bool[] db = svm.Decide(inliers); Assert.IsTrue(da.All(x => x == false)); Assert.IsTrue(db.All(x => x == true)); }
public DetectionResults Filter(DocumentClusters document) { if (document.Clusters.Length < 3) { logger.Info("Not enought text clusters for clustering"); return(new DetectionResults(document.Clusters)); } double[][] observations = vectorSource.GetVectors(document.Clusters, NormalizationType.None); var standardizer = Standardizer.GetNumericStandardizer(observations); observations = standardizer.StandardizeAll(observations); var data = observations.ToArray(); for (int i = 0; i < observations.Length; i++) { for (int j = 0; j < observations[i].Length; j++) { if (double.IsNaN(observations[i][j])) { observations[i][j] = 0; } } } var teacher = new OneclassSupportVectorLearning <Gaussian> { Kernel = Gaussian.FromGamma(1.0 / data.Length), Nu = 0.5, Shrinking = true, Tolerance = 0.001 }; var svm = teacher.Learn(data); double[] prediction = svm.Score(data); Dictionary <int, List <double> > weights = new Dictionary <int, List <double> >(); for (int i = 0; i < prediction.Length; i++) { foreach (var sentenceItem in document.Clusters[i].Sentences) { if (!weights.TryGetValue(sentenceItem.Index, out var classType)) { classType = new List <double>(); weights[sentenceItem.Index] = classType; } classType.Add(prediction[i]); } } List <ProcessingTextBlock> anomaly = new List <ProcessingTextBlock>(); List <ProcessingTextBlock> resultData = new List <ProcessingTextBlock>(); List <SentenceItem> sentences = new List <SentenceItem>(); ProcessingTextBlock cluster; bool?lastResult = null; var cutoffIndex = (int)(weights.Count * 0.2); var cutoff = weights.Select(item => item.Value.Sum()).OrderBy(item => item).Skip(cutoffIndex).First(); var allSentences = document.Clusters.SelectMany(item => item.Sentences) .Distinct() .OrderBy(item => item.Index) .ToArray(); if (allSentences.Length != weights.Count) { throw new ArgumentOutOfRangeException(nameof(document), "Sentence length mismatch"); } foreach (var sentence in allSentences) { var current = weights[sentence.Index].Sum(); var result = current > cutoff; if (lastResult != null && result != lastResult) { cluster = new ProcessingTextBlock(sentences.ToArray()); sentences.Clear(); if (lastResult.Value) { resultData.Add(cluster); } else { anomaly.Add(cluster); } } sentences.Add(sentence); lastResult = result; } cluster = new ProcessingTextBlock(sentences.ToArray()); sentences.Clear(); if (lastResult.Value) { resultData.Add(cluster); } else { anomaly.Add(cluster); } StringBuilder builder = new StringBuilder(); foreach (var textCluster in anomaly) { foreach (var sentenceItem in textCluster.Sentences) { builder.AppendLine(sentenceItem.Text); } } return(new DetectionResults(resultData.ToArray(), anomaly.ToArray())); }