Ejemplo n.º 1
0
        public SupportVectorMachine <Gaussian> Learn()
        {
            //Creates new SVM and teach it
            var teacher = new OneclassSupportVectorLearning <Gaussian>()
            {
                //Gaussian kernel
                Kernel = new Gaussian(0.1),
                Nu     = 0.1
            };

            return(teacher.Learn(trainDataJagged));
        }
Ejemplo n.º 2
0
        public void learn_test()
        {
            #region doc_learn
            // Ensure that results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // Generate some data to be learned
            double[][] inputs =
            {
                new double[] {  +1.0312479734420776 },
                new double[] { +0.99444115161895752 },
                new double[] { +0.21835240721702576 },
                new double[] { +0.47197291254997253 },
                new double[] { +0.68701112270355225 },
                new double[] { -0.58556461334228516 },
                new double[] { -0.64154046773910522 },
                new double[] { -0.66485315561294556 },
                new double[] { +0.37940266728401184 },
                new double[] { -0.61046308279037476 }
            };


            // Create a new One-class SVM learning algorithm
            var teacher = new OneclassSupportVectorLearning <Linear>()
            {
                Kernel = new Linear(), // or, for example, 'new Gaussian(0.9)'
                Nu     = 0.1
            };

            // Learn a support vector machine
            var svm = teacher.Learn(inputs);

            // Test the machine
            double[] prediction = svm.Score(inputs);

            // Compute the log-likelihood of the answer
            double ll = new LogLikelihoodLoss().Loss(prediction);
            #endregion

            Assert.AreEqual(-1.6653345369377348E-16, ll, 1e-10);
            Assert.AreEqual(2, svm.Weights.Length);
            Assert.AreEqual(0.39198910030993617, svm.Weights[0], 1e-10);
            Assert.AreEqual(0.60801089969006383, svm.Weights[1], 1e-10);
            Assert.AreEqual(inputs[0][0], svm.SupportVectors[0][0], 1e-10);
            Assert.AreEqual(inputs[7][0], svm.SupportVectors[1][0], 1e-10);
        }
Ejemplo n.º 3
0
        public void RunTest()
        {
            Accord.Math.Tools.SetupGenerator(0);

            var dist = NormalDistribution.Standard;

            double[] x =
            {
                +1.0312479734420776,
                +0.99444115161895752,
                +0.21835240721702576,
                +0.47197291254997253,
                +0.68701112270355225,
                -0.58556461334228516,
                -0.64154046773910522,
                -0.66485315561294556,
                +0.37940266728401184,
                -0.61046308279037476
            };

            double[][] inputs = Jagged.ColumnVector(x);

            IKernel kernel = new Linear();

            var machine = new KernelSupportVectorMachine(kernel, inputs: 1);

            var teacher = new OneclassSupportVectorLearning(machine, inputs)
            {
                Nu = 0.1
            };

            // Run the learning algorithm
            double error = teacher.Run();

            Assert.AreEqual(2, machine.Weights.Length);
            Assert.AreEqual(0.39198910030993617, machine.Weights[0]);
            Assert.AreEqual(0.60801089969006383, machine.Weights[1]);
            Assert.AreEqual(inputs[0][0], machine.SupportVectors[0][0]);
            Assert.AreEqual(inputs[7][0], machine.SupportVectors[1][0]);
            Assert.AreEqual(0.0, error, 1e-10);
        }
Ejemplo n.º 4
0
        private static void BuildOneClassSVM(Frame <int, string> featuresDF)
        {
            // First 13 components explain about 50% of the variance
            int numComponents = 13;

            string[] cols = featuresDF.ColumnKeys.Where((x, i) => i < numComponents).ToArray();

            var rnd = new Random(1);

            int[] trainIdx = featuresDF["is_fraud"]
                             .Where(x => x.Value == 0)
                             .Keys
                             .OrderBy(x => rnd.Next())
                             .Take(15000)
                             .ToArray();
            var normalDF = featuresDF.Rows[
                trainIdx
                           ].Columns[cols];

            //var normalDF = featuresDF.Columns[cols];

            double[][] normalData = BuildJaggedArray(
                normalDF.ToArray2D <double>(), normalDF.RowCount, cols.Length
                );

            var teacher = new OneclassSupportVectorLearning <Gaussian>();
            var model   = teacher.Learn(normalData);

            int[] testIdx = featuresDF["is_fraud"]
                            .Where(x => x.Value > 0)
                            .Keys
                            .Concat(
                featuresDF["is_fraud"]
                .Where(x => x.Value == 0 && !trainIdx.Contains(x.Key))
                .Keys
                .OrderBy(x => rnd.Next())
                .Take(5000)
                .ToArray()
                ).ToArray();

            var fraudDF = featuresDF.Rows[
                testIdx
                          ].Columns[cols];

            double[][] fraudData = BuildJaggedArray(
                fraudDF.ToArray2D <double>(), fraudDF.RowCount, cols.Length
                );

            int[] fraudLabels = featuresDF.Rows[
                testIdx
                                ].GetColumn <int>("is_fraud").ValuesAll.ToArray();

            for (int j = 0; j <= 10; j++)
            {
                model.Threshold = -1 + j / 10.0;

                int[]    detected = new int[fraudData.Length];
                double[] probs    = new double[fraudData.Length];
                for (int i = 0; i < fraudData.Length; i++)
                {
                    bool isNormal = model.Decide(fraudData[i]);
                    detected[i] = isNormal ? 0 : 1;
                }

                Console.WriteLine("\n\n---- One-Class SVM Results ----");
                Console.WriteLine("* Threshold: {0:0.00000}", model.Threshold);
                double correctPreds = fraudLabels
                                      .Select((x, i) => detected[i] == 1 && x == 1 ? 1 : 0)
                                      .Sum();
                double precision     = correctPreds / detected.Sum();
                double overallRecall = correctPreds / fraudLabels.Sum();
                Console.WriteLine("* Overall Fraud Detection: {0:0.00}%", overallRecall * 100.0);
                Console.WriteLine("* Precision: {0:0.00}%", (precision) * 100.0);
                Console.WriteLine("* False Alarm Rate: {0:0.00}%", (1 - precision) * 100.0);
            }
        }
Ejemplo n.º 5
0
        public static DetectorParameters PerformTraining(double[][] positive_data, double[][] negative_data, DetectorParameters current_detector_params)
        {
            // Create a new One-class SVM learning algorithm
            var teacher = new OneclassSupportVectorLearning <Gaussian>()
            {
                UseKernelEstimation = true,
                Nu = 0.1
            };


            // Learn a support vector machine
            var svm = teacher.Learn(positive_data);

            current_detector_params.SVM = svm;

            int trues  = 0;
            int falses = 0;

            foreach (double[] d_val in positive_data)
            {
                var prob           = svm.Probability(d_val);
                var log_likelihood = svm.LogLikelihood(d_val);
                var decision       = log_likelihood > current_detector_params.LogLikelihoodThreshold;

                current_detector_params.MinLogLikelihoodPositive = Math.Min(log_likelihood, current_detector_params.MinLogLikelihoodPositive);

                if (decision == true)
                {
                    trues++;
                }
                else
                {
                    falses++;
                }
            }

            System.Console.WriteLine("Positive Trues: " + trues.ToString() + " Falses: " + falses.ToString() + " (Min log-likelihood: " + current_detector_params.MinLogLikelihoodPositive.ToString() + ")");


            trues  = 0;
            falses = 0;


            foreach (double[] d_val in negative_data)
            {
                var prob           = svm.Probability(d_val);
                var log_likelihood = svm.LogLikelihood(d_val);
                var decision       = log_likelihood > current_detector_params.LogLikelihoodThreshold;

                current_detector_params.MaxLogLikelihoodNegative = Math.Max(log_likelihood, current_detector_params.MaxLogLikelihoodNegative);

                if (decision == true)
                {
                    trues++;
                }
                else
                {
                    falses++;
                }
            }


            System.Console.WriteLine("Negative Trues: " + trues.ToString() + " Falses: " + falses.ToString() + " (Max log-likelihood: " + current_detector_params.MaxLogLikelihoodNegative.ToString() + ")");

            // We want the threshold to be more on the side of "not loading", as errors during gameplay are worse.
            current_detector_params.LogLikelihoodThreshold = (3.5 * current_detector_params.MaxLogLikelihoodNegative + 2.5 * current_detector_params.MinLogLikelihoodPositive) / 6.0;
            current_detector_params.DetectsPerfectly       = current_detector_params.MaxLogLikelihoodNegative < current_detector_params.MinLogLikelihoodPositive;
            current_detector_params.LogLikelihoodDistance  = current_detector_params.MinLogLikelihoodPositive - current_detector_params.MaxLogLikelihoodNegative;

            return(current_detector_params);
        }
Ejemplo n.º 6
0
        public void square_test()
        {
            // Example from https://stackoverflow.com/questions/38642615/why-does-this-clean-data-provide-strange-svm-classification-results

            double[][] inputs = new double[49][];
            int        i      = 0;

            for (double x = -0.3; x <= 0.31; x += 0.1)
            {
                for (double y = -0.3; y <= 0.31; y += 0.1)
                {
                    inputs[i++] = new double[] { x, y }
                }
            }
            ;

            // Generate inlier and outlier test points.
            double[][] outliers =
            {
                new double[] { 1E6,  1E6 }, // Very far outlier
                new double[] {   0,  1E6 }, // Very far outlier
                new double[] { 100, -100 }, // Far outlier
                new double[] {   0, -100 }, // Far outlier
                new double[] { -10,  -10 }, // Still far outlier
                new double[] {   0,  -10 }, // Still far outlier
                new double[] { 0.6,  0.6 }, // Close outlier
                new double[] { 0.5,  0.0 }  // Close outlier
            };

            double[][] inliers =
            {
                new double[] {  0.0, 0.0 },  // Middle of cluster
                new double[] {  .15, .15 },  // Halfway to corner of cluster
                new double[] { -0.1,   0 },  // Comfortably inside cluster
                new double[] { 0.25,   0 },  // Near inside edge of cluster
                new double[] { 0.28,0.28 }   // Near inside edge of cluster
            };


            var teacher = new OneclassSupportVectorLearning <Gaussian>()
            {
                Nu        = 0.05,
                Tolerance = 1e-2,
                Kernel    = new Gaussian(1)
            };

            var svm = teacher.Learn(inputs);

            double[] a = svm.Score(outliers);
            double[] b = svm.Score(inliers);
            double[] c = svm.Score(inputs);

            string stra = a.ToCSharp();
            string strb = b.ToCSharp();
            string strc = c.ToCSharp();

            double[] ea =
            {
                -2.06303275869732, -2.06303275869732,
                -2.06303275869732, -2.06303275869732,
                -2.06303275869732, -2.06303275869732,
                -0.43909532904464, -0.0610610987108576
            };

            double[] eb =
            {
                0.176098645217194,  0.13254525498832,
                0.1651082775092,   0.115325884477755,
                0.0260693377780776
            };

            double[] ec =
            {
                -1.33226762955019E-15, 0.0467039286535355,   0.0749106707071278, 0.0838648156076344,
                0.0733262829494159,    0.0435809252346452, -0.00457227086652812, 0.0474811783155659,
                0.0955242612540407,     0.124652669315224,    0.134085905268251,  0.123570483126211,
                0.0933911526436562,    0.0443581748966757,   0.0764893734298348,  0.125461639137703,
                0.155267627012948,      0.165108277509199,    0.154718603422687,   0.12437945294869,
                0.0749049856721229,     0.086246524528469,     0.13571268664663,  0.165933589606133,
                0.176098645217194,      0.165933589606133,    0.135712686646629, 0.0862465245284685,
                0.0764893734298354,     0.126000305758801,    0.156361734225727,  0.166758901703067,
                0.156910757815989,      0.127082491947813,   0.0780737611875469, 0.0474811783155671,
                0.0965860327119062,     0.126809275581279,    0.137339468025008,  0.127891461770292,
                0.0987191413222903,    0.0506041817344567, 6.66133814775094E-16, 0.0482584279775976,
                0.0780680761525424,    0.0886282334493032,   0.0796524639102539,  0.051381431396487,
                0.00457227086652678
            };

            Assert.IsTrue(ea.IsEqual(a, 1e-5));
            Assert.IsTrue(eb.IsEqual(b, 1e-5));
            Assert.IsTrue(ec.IsEqual(c, 1e-5));

            bool[] da = svm.Decide(outliers);
            bool[] db = svm.Decide(inliers);

            Assert.IsTrue(da.All(x => x == false));
            Assert.IsTrue(db.All(x => x == true));
        }
Ejemplo n.º 7
0
        public DetectionResults Filter(DocumentClusters document)
        {
            if (document.Clusters.Length < 3)
            {
                logger.Info("Not enought text clusters for clustering");
                return(new DetectionResults(document.Clusters));
            }

            double[][] observations = vectorSource.GetVectors(document.Clusters, NormalizationType.None);
            var        standardizer = Standardizer.GetNumericStandardizer(observations);

            observations = standardizer.StandardizeAll(observations);
            var data = observations.ToArray();

            for (int i = 0; i < observations.Length; i++)
            {
                for (int j = 0; j < observations[i].Length; j++)
                {
                    if (double.IsNaN(observations[i][j]))
                    {
                        observations[i][j] = 0;
                    }
                }
            }

            var teacher = new OneclassSupportVectorLearning <Gaussian>
            {
                Kernel    = Gaussian.FromGamma(1.0 / data.Length),
                Nu        = 0.5,
                Shrinking = true,
                Tolerance = 0.001
            };

            var svm = teacher.Learn(data);

            double[] prediction = svm.Score(data);

            Dictionary <int, List <double> > weights = new Dictionary <int, List <double> >();

            for (int i = 0; i < prediction.Length; i++)
            {
                foreach (var sentenceItem in document.Clusters[i].Sentences)
                {
                    if (!weights.TryGetValue(sentenceItem.Index, out var classType))
                    {
                        classType = new List <double>();
                        weights[sentenceItem.Index] = classType;
                    }

                    classType.Add(prediction[i]);
                }
            }

            List <ProcessingTextBlock> anomaly    = new List <ProcessingTextBlock>();
            List <ProcessingTextBlock> resultData = new List <ProcessingTextBlock>();
            List <SentenceItem>        sentences  = new List <SentenceItem>();
            ProcessingTextBlock        cluster;
            bool?lastResult   = null;
            var  cutoffIndex  = (int)(weights.Count * 0.2);
            var  cutoff       = weights.Select(item => item.Value.Sum()).OrderBy(item => item).Skip(cutoffIndex).First();
            var  allSentences = document.Clusters.SelectMany(item => item.Sentences)
                                .Distinct()
                                .OrderBy(item => item.Index)
                                .ToArray();

            if (allSentences.Length != weights.Count)
            {
                throw new ArgumentOutOfRangeException(nameof(document), "Sentence length mismatch");
            }

            foreach (var sentence in allSentences)
            {
                var current = weights[sentence.Index].Sum();
                var result  = current > cutoff;
                if (lastResult != null &&
                    result != lastResult)
                {
                    cluster = new ProcessingTextBlock(sentences.ToArray());
                    sentences.Clear();
                    if (lastResult.Value)
                    {
                        resultData.Add(cluster);
                    }
                    else
                    {
                        anomaly.Add(cluster);
                    }
                }

                sentences.Add(sentence);
                lastResult = result;
            }

            cluster = new ProcessingTextBlock(sentences.ToArray());
            sentences.Clear();
            if (lastResult.Value)
            {
                resultData.Add(cluster);
            }
            else
            {
                anomaly.Add(cluster);
            }

            StringBuilder builder = new StringBuilder();

            foreach (var textCluster in anomaly)
            {
                foreach (var sentenceItem in textCluster.Sentences)
                {
                    builder.AppendLine(sentenceItem.Text);
                }
            }

            return(new DetectionResults(resultData.ToArray(), anomaly.ToArray()));
        }