コード例 #1
0
        public void WriteBoolTest_compressed()
        {
            double[][] samples = new[]
            {
                new double[] { 1, 2, 0, 3, 0 },
                new double[] { 6, 0, 4, 2, 0 },
                new double[] { 0, 0, 0, 0, 0 },
            };

            bool[] outputs = { false, true, false };

            SparseFormat.Save(samples, outputs, "test.txt.gz", compression: SerializerCompression.GZip);

            Sparse <double>[] newSamples;
            bool[]            newOutput;
            SparseFormat.Load("test.txt.gz", out newSamples, out newOutput, compression: SerializerCompression.GZip);


            SparseFormat.Save(newSamples, newOutput, "test2.txt");

            string actual   = File.ReadAllText("test2.txt");
            string expected = @"-1 1:1 2:2 4:3
1 1:6 3:4 4:2
-1 
";

            Assert.AreEqual(expected, actual);
        }
コード例 #2
0
        public void WriteBoolTest_compressed()
        {
            double[][] samples = new[]
            {
                new double[] { 1, 2, 0, 3, 0 },
                new double[] { 6, 0, 4, 2, 0 },
                new double[] { 0, 0, 0, 0, 0 },
            };

            bool[] outputs = { false, true, false };

            SparseFormat.Save(samples, outputs, test_txt_gz, compression: SerializerCompression.GZip);

            Sparse <double>[] newSamples;
            bool[]            newOutput;
            SparseFormat.Load(test_txt_gz, out newSamples, out newOutput, compression: SerializerCompression.GZip);


            SparseFormat.Save(newSamples, newOutput, Path.Combine(TestContext.CurrentContext.TestDirectory, "test2.txt"));

            string actual   = File.ReadAllText(test2_txt);
            string expected = @"-1 1:1 2:2 4:3
1 1:6 3:4 4:2
-1 
";

            expected = expected.Replace("\r\n", Environment.NewLine);

            Assert.AreEqual(expected, actual);
        }
コード例 #3
0
        public void WriteBoolTest()
        {
            double[][] samples = new[]
            {
                new double[] { 1, 2, 0, 3, 0 },
                new double[] { 6, 0, 4, 2, 0 },
                new double[] { 0, 0, 0, 0, 0 },
            };

            bool[] outputs = { false, true, false };

            SparseFormat.Save(samples, outputs, "test.txt");

            string actual   = File.ReadAllText("test.txt");
            string expected = @"-1 1:1 2:2 4:3
1 1:6 3:4 4:2
-1 
";

            Assert.AreEqual(expected, actual);
        }
コード例 #4
0
        public void WriteBoolTest()
        {
            double[][] samples = new[]
            {
                new double[] { 1, 2, 0, 3, 0 },
                new double[] { 6, 0, 4, 2, 0 },
                new double[] { 0, 0, 0, 0, 0 },
            };

            bool[] outputs = { false, true, false };

            SparseFormat.Save(samples, outputs, test_txt);

            string actual   = File.ReadAllText(test_txt);
            string expected = @"-1 1:1 2:2 4:3
1 1:6 3:4 4:2
-1 
";

            expected = expected.Replace("\r\n", Environment.NewLine);

            Assert.AreEqual(expected, actual);
        }
コード例 #5
0
ファイル: Program.cs プロジェクト: xiubjarne/framework
        private static void TestLinearASGD()
        {
            // http://leon.bottou.org/projects/sgd

            string codebookPath = "codebook.bin";
            string x_train_fn   = "x_train.txt.gz";
            string x_test_fn    = "x_test.txt.gz";

            Sparse <double>[] xTrain = null, xTest = null;
            bool[]            yTrain = null, yTest = null;

            // Check if we have the precomputed dataset on disk
            if (!File.Exists(x_train_fn) || !File.Exists(x_train_fn))
            {
                Console.WriteLine("Downloading dataset");
                RCV1v2 rcv1v2 = new RCV1v2(@"C:\Temp\");

                // Note: Leon Bottou's SGD inverts training and
                // testing when benchmarking in this dataset
                var trainWords = rcv1v2.Testing.Item1;
                var testWords  = rcv1v2.Training.Item1;

                string positiveClass = "CCAT";
                yTrain = rcv1v2.Testing.Item2.Apply(x => x.Contains(positiveClass));
                yTest  = rcv1v2.Training.Item2.Apply(x => x.Contains(positiveClass));

                TFIDF tfidf;
                if (!File.Exists(codebookPath))
                {
                    Console.WriteLine("Learning TD-IDF");
                    // Create a TF-IDF considering only words that
                    // exist in both the training and testing sets
                    tfidf = new TFIDF(testWords)
                    {
                        Tf  = TermFrequency.Log,
                        Idf = InverseDocumentFrequency.Default,
                    };

                    // Learn the training set
                    tfidf.Learn(trainWords);

                    Console.WriteLine("Saving codebook");
                    tfidf.Save(codebookPath);
                }
                else
                {
                    Console.WriteLine("Loading codebook");
                    Serializer.Load(codebookPath, out tfidf);
                }

                if (!File.Exists(x_train_fn))
                {
                    // Transform and normalize training set
                    Console.WriteLine("Pre-processing training set");
                    xTrain = tfidf.Transform(trainWords, out xTrain);

                    Console.WriteLine("Post-processing training set");
                    xTrain = xTrain.Divide(Norm.Euclidean(xTrain, dimension: 1), result: xTrain);

                    Console.WriteLine("Saving training set to disk");
                    SparseFormat.Save(xTrain, yTrain, x_train_fn, compression: SerializerCompression.GZip);
                }

                if (!File.Exists(x_test_fn))
                {
                    // Transform and normalize testing set
                    Console.WriteLine("Pre-processing testing set");
                    xTest = tfidf.Transform(testWords, out xTest);

                    Console.WriteLine("Post-processing testing set");
                    xTest = xTest.Divide(Norm.Euclidean(xTest, dimension: 1), result: xTest);

                    Console.WriteLine("Saving testing set to disk");
                    SparseFormat.Save(xTest, yTest, x_test_fn, compression: SerializerCompression.GZip);
                }
            }
            else
            {
                Console.WriteLine("Loading dataset from disk");
                if (xTrain == null || yTrain == null)
                {
                    SparseFormat.Load(x_train_fn, out xTrain, out yTrain, compression: SerializerCompression.GZip);
                }
                if (xTest == null || yTest == null)
                {
                    SparseFormat.Load(x_test_fn, out xTest, out yTest, compression: SerializerCompression.GZip);
                }
            }

            int positiveTrain = yTrain.Count(x => x);
            int positiveTest  = yTest.Count(x => x);
            int negativeTrain = yTrain.Length - positiveTrain;
            int negativeTest  = yTest.Length - positiveTest;

            Console.WriteLine("Training samples: {0} [{1}+, {2}-]", positiveTrain + negativeTrain, positiveTrain, negativeTrain);
            Console.WriteLine("Negative samples: {0} [{1}+, {2}-]", positiveTest + negativeTest, positiveTest, negativeTest);

            // Create and learn a linear sparse binary support vector machine
            var learn = new AveragedStochasticGradientDescent <Linear, Sparse <double> >()
            {
                MaxIterations = 5,
                Tolerance     = 0,
            };

            Console.WriteLine("Learning training set");
            Stopwatch sw  = Stopwatch.StartNew();
            var       svm = learn.Learn(xTrain, yTrain);

            Console.WriteLine(sw.Elapsed);


            Console.WriteLine("Predicting training set");
            sw = Stopwatch.StartNew();
            bool[] trainPred = svm.Decide(xTrain);
            Console.WriteLine(sw.Elapsed);

            var train = new ConfusionMatrix(trainPred, yTrain);

            Console.WriteLine("Train acc: " + train.Accuracy);


            Console.WriteLine("Predicting testing set");
            sw = Stopwatch.StartNew();
            bool[] testPred = svm.Decide(xTest);
            Console.WriteLine(sw.Elapsed);

            var test = new ConfusionMatrix(testPred, yTest);

            Console.WriteLine("Test acc: " + test.Accuracy);
        }