Esempio n. 1
0
        void tsnetest()
        {
            Accord.Math.Random.Generator.Seed = 0;

            // Declare some observations
            double[][] observations =
            {
                new double[] { -5, -2, -1 },
                new double[] { -5, -5, -6 },
                new double[] {  2,  1,  1 },
                new double[] {  1,  1,  2 },
                new double[] {  1,  2,  2 },
                new double[] {  3,  1,  2 },
                new double[] { 11,  5,  4 },
                new double[] { 15,  5,  6 },
                new double[] { 10,  5,  6 },
            };

            // Create a new t-SNE algorithm
            TSNE tSNE = new TSNE()
            {
                NumberOfOutputs = 2,
                Perplexity      = 1.5
            };

            // Transform to a reduced dimensionality space
            double[][] output = tSNE.Transform(observations);

            // Make it 1-dimensional
            double[] y = output.Reshape();
        }
Esempio n. 2
0
        public void computeGaussianPerplexity2_larger()
        {
            var points = yinyang.Submatrix(null, 0, 1).ToJagged();

            double perplexity = 0.5;
            int    N          = points.Rows();
            int    D          = points.Columns();
            int    K          = (int)(3 * perplexity);

            double[,] X = points.ToMatrix();
            double[][] x = X.ToJagged();

            uint[]   expected_row = Vector.Zeros <uint>(200);
            uint[]   expected_col = Vector.Zeros <uint>(200);
            double[] expected_val = Vector.Zeros <double>(200);
            TSNEWrapper.computeGaussianPerplexity(X, N, D, expected_row, expected_col, expected_val, perplexity, K);

            int[]    actual_row = null;
            int[]    actual_col = null;
            double[] actual_val = null;
            TSNE.computeGaussianPerplexity(x, N, D, ref actual_row, ref actual_col, ref actual_val, perplexity, K);

            expected_row = expected_row.First(101);
            expected_col = expected_col.First(100);
            expected_val = expected_val.First(100);
            Assert.IsTrue(actual_row.IsEqual(expected_row));
            Assert.IsTrue(actual_col.IsEqual(expected_col));
            Assert.IsTrue(actual_val.IsEqual(expected_val, 1e-4));
        }
Esempio n. 3
0
        public void ConstructorTest()
        {
            Accord.Math.Random.Generator.Seed = 0;

            string mnistPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Resources", "mnist", "train-images-idx3-ubyte.gz");

            IdxReader idxReader = new IdxReader(mnistPath);

            double[][] X = idxReader.ReadToEndAsVectors <double>();

            Assert.AreEqual(X.Length, 60000);
            Assert.AreEqual(X[59999].Length, 784);

            // Perform the initial dimensionality reduction using PCA
            //var pca = new PrincipalComponentAnalysis(numberOfOutputs: 2);
            //pca.Learn(X);
            //pca.Save(@"pca_v3_1.bin");

            string pcaPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Resources", "mnist", "pca_mnist_v3_1.bin");

            var pca = Serializer.Load <PrincipalComponentAnalysis>(pcaPath);

            X = pca.Transform(X);

            TSNE tSNE = new TSNE();
            var  Y    = tSNE.Transform(X);

            Assert.Fail();
        }
Esempio n. 4
0
        public void computeGaussianPerplexity_1()
        {
            double[][] points =
            {
                new double[] { 2, 3, 2 },
                new double[] { 5, 4, 5 },
                new double[] { 9, 6, 4 },
                new double[] { 4, 7, 5 },
                new double[] { 8, 1, 1 },
                new double[] { 1, 2, 4 },
            };

            double perplexity = 0.5;
            int    N          = points.Length;
            int    D          = 3;

            var X = points.ToMatrix();

            double[,] expected = new double[N, N];
            TSNEWrapper.computeGaussianPerplexity(X, N, D, expected, perplexity);

            double[][] actual = Jagged.Zeros(N, N);
            TSNE.computeGaussianPerplexity(points, N, D, ref actual, perplexity);

            Assert.IsTrue(actual.IsEqual(expected, rtol: 1e-5));
        }
Esempio n. 5
0
        public void ConstructorTest()
        {
            Accord.Math.Random.Generator.Seed = 0;

            IdxReader idxReader = new IdxReader(Resources.train_images_idx3_ubyte);

            double[][] X = idxReader.ReadToEndAsVectors <double>();

            Assert.AreEqual(X.Length, 60000);
            Assert.AreEqual(X[59999].Length, 784);

            // Perform the initial dimensionality reduction using PCA
            //var pca = new PrincipalComponentAnalysis(numberOfOutputs: 2);
            //pca.Learn(X);
            //pca.Save(@"pca_v3_1.bin");

            var pca = Serializer.Load <PrincipalComponentAnalysis>(Properties.Resources.pca_mnist_v3_1);

            X = pca.Transform(X);

            TSNE tSNE = new TSNE();
            var  Y    = tSNE.Transform(X);

            Assert.Fail();
        }
Esempio n. 6
0
        public void compute_squared_distance_larger()
        {
            var points = yinyang.Submatrix(null, 0, 1).ToJagged();

            var X = points.ToMatrix();
            int N = X.Rows();
            int D = X.Columns();

            double[,] expected = new double[N, N];
            TSNEWrapper.computeSquaredEuclideanDistance(X, expected);

            double[][] actual = Jagged.Zeros(N, N);
            TSNE.computeSquaredEuclideanDistance(points, N, D, actual);

            Assert.IsTrue(actual.IsEqual(expected));
        }
Esempio n. 7
0
        public void computeGradient_1()
        {
            Accord.Math.Random.Generator.Seed = 0;

            double perplexity = 0.5;
            double theta      = 0.5;
            int    N          = 100;
            int    K          = (int)(3 * perplexity);
            int    D          = 3;

            uint[]   row_P = Vector.Create(N + 1, new uint[] { 0, 1, 2, 3, 4, 5, 6 });
            uint[]   col_P = Vector.Create(N * K, new uint[] { 5, 3, 1, 1, 2, 1 });
            double[] val_P = Vector.Create(N * K, new double[]
            {
                0.83901046609114708,
                0.39701047304189827,
                0.19501046869768451,
                0.59401047304189827,
                0.49301046869768484,
                0.59901046869768451,
            });

            double[,] P = Matrix.Random(N, N, new NormalDistribution());
            double[][] p = P.ToJagged();

            double[,] Y = Matrix.Random(N, D, new NormalDistribution());
            double[][] y = Y.ToJagged();



            uint[]   expected_row = Vector.Create(row_P);
            uint[]   expected_col = Vector.Create(col_P);
            double[] expected_val = Vector.Create(val_P);
            double[,] expected = Matrix.Zeros(N, D);
            TSNEWrapper.computeGradient(P, expected_row, expected_col, expected_val, Y, N, D, expected, theta);

            int[]      actual_row = row_P.To <int[]>();
            int[]      actual_col = col_P.To <int[]>();
            double[]   actual_val = (double[])val_P.Clone();
            double[][] actual     = Jagged.Zeros(N, D);
            TSNE.computeGradient(p, actual_row, actual_col, actual_val, y, N, D, actual, theta);

            Assert.IsTrue(actual.IsEqual(expected));
            Assert.IsTrue(actual_row.IsEqual(expected_row));
            Assert.IsTrue(actual_col.IsEqual(expected_col));
            Assert.IsTrue(actual_val.IsEqual(expected_val, 1e-4));
        }
Esempio n. 8
0
        public void evaluateError_2()
        {
            int N = 6;
            int D = 2;

            double[,] P = Matrix.Random(6, 6, new NormalDistribution());
            double[][] p = P.ToJagged();

            double[,] Y = Matrix.Random(6, 2, new NormalDistribution());
            double[][] y = Y.ToJagged();

            double expected = TSNEWrapper.evaluateError(P, Y, N, D);

            double actual = TSNE.evaluateError(p, y, N, D);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 9
0
        public void computeGaussianPerplexity_larger()
        {
            var points = yinyang.Submatrix(null, 0, 1).ToJagged();

            double perplexity = 0.5;
            int    N          = points.Rows();
            int    D          = points.Columns();

            var X = points.ToMatrix();

            double[,] expected = new double[N, N];
            TSNEWrapper.computeGaussianPerplexity(X, N, D, expected, perplexity);

            double[][] actual = Jagged.Zeros(N, N);
            TSNE.computeGaussianPerplexity(points, N, D, ref actual, perplexity);

            Assert.IsTrue(actual.IsEqual(expected, rtol: 1e-5));
        }
Esempio n. 10
0
        // TSNE
        double[][] runTSNE(double[][] data, int outputDimension, double perplexity)
        {
            // Create a new t-SNE algorithm
            TSNE tSNE = new TSNE()
            {
                NumberOfInputs  = 9,
                NumberOfOutputs = outputDimension,
                Perplexity      = perplexity
            };

            double[][] copiedData = data.Copy();
            // Transform to a reduced dimensionality space
            double[][] output = tSNE.Transform(copiedData);

            // Make it 1-dimensional : probably not needed in our case
            //double[] y = output.Reshape();

            return(output);
        }
Esempio n. 11
0
        public void evaluateError_1()
        {
            double perplexity = 0.5;
            double theta      = 0.5;
            int    N          = 6;
            int    K          = (int)(3 * perplexity);
            int    D          = 2;

            uint[]   row_P = Vector.Create(N + 1, new uint[] { 0, 1, 2, 3, 4, 5, 6 });
            uint[]   col_P = Vector.Create(N * K, new uint[] { 5, 3, 1, 1, 2, 1 });
            double[] val_P = Vector.Create(N * K, new double[]
            {
                0.99901046609114708,
                0.99901047304189827,
                0.99901046869768451,
                0.99901047304189827,
                0.99901046869768484,
                0.99901046869768451,
            });

            double[,] Y = Matrix.Random(6, 2, new NormalDistribution());
            double[][] y = Y.ToJagged();

            uint[]   expected_row = Vector.Create(row_P);
            uint[]   expected_col = Vector.Create(col_P);
            double[] expected_val = Vector.Create(val_P);
            double   expected     = TSNEWrapper.evaluateError(expected_row, expected_col, expected_val, Y, N, D, theta);

            int[]    actual_row = row_P.To <int[]>();
            int[]    actual_col = col_P.To <int[]>();
            double[] actual_val = (double[])val_P.Clone();
            double   actual     = TSNE.evaluateError(actual_row, actual_col, actual_val, y, N, D, theta);

            Assert.AreEqual(expected, actual);
            Assert.IsTrue(actual_col.IsEqual(expected_col));
            Assert.IsTrue(actual_row.IsEqual(expected_row));
            Assert.IsTrue(actual_val.IsEqual(expected_val));
        }
Esempio n. 12
0
        public void learn_test()
        {
            #region doc_learn
            Accord.Math.Random.Generator.Seed = 0;

            // Declare some observations
            double[][] observations =
            {
                new double[] { -5, -2, -1 },
                new double[] { -5, -5, -6 },
                new double[] {  2,  1,  1 },
                new double[] {  1,  1,  2 },
                new double[] {  1,  2,  2 },
                new double[] {  3,  1,  2 },
                new double[] { 11,  5,  4 },
                new double[] { 15,  5,  6 },
                new double[] { 10,  5,  6 },
            };

            // Create a new t-SNE algorithm
            TSNE tSNE = new TSNE()
            {
                NumberOfOutputs = 1,
                Perplexity      = 1.5
            };

            // Transform to a reduced dimensionality space
            double[][] output = tSNE.Transform(observations);

            // Make it 1-dimensional
            double[] y = output.Reshape();
            #endregion

            string   str      = y.ToCSharp();
            double[] expected = new double[] { 327.15556116089, 144.502680170483, -21.5116375004548, 253.712522074559, 214.067349874275, 24.8621254326599, -299.97879062709, -260.342898777221, -382.466911808102 };
            Assert.IsTrue(y.IsEqual(expected, rtol: 1e-5));
        }
Esempio n. 13
0
        public void computeGaussianPerplexity_2()
        {
            double[][] points =
            {
                new double[] { 2, 3,   20 },
                new double[] { 5, 4,    5 },
                new double[] { 9, 6,  500 },
                new double[] { 4, 7, -100 },
                new double[] { 8, 1,   67 },
                new double[] { 1, 2, -888 },
            };

            double perplexity = 0.5;
            int    N          = points.Length;
            int    D          = 3;
            int    K          = (int)(3 * perplexity);

            double[,] X = points.ToMatrix();
            double[][] x = X.ToJagged();

            uint[]   expected_row = Vector.Zeros <uint>(100);
            uint[]   expected_col = Vector.Zeros <uint>(100);
            double[] expected_val = Vector.Zeros <double>(100);
            TSNEWrapper.computeGaussianPerplexity(X, N, D, expected_row, expected_col, expected_val, perplexity, K);

            int[]    actual_row = null;
            int[]    actual_col = null;
            double[] actual_val = null;
            TSNE.computeGaussianPerplexity(x, N, D, ref actual_row, ref actual_col, ref actual_val, perplexity, K);

            expected_row = expected_row.First(7);
            expected_col = expected_col.First(6);
            expected_val = expected_val.First(6);
            Assert.IsTrue(actual_row.IsEqual(expected_row));
            Assert.IsTrue(actual_col.IsEqual(expected_col));
            Assert.IsTrue(actual_val.IsEqual(expected_val, 1e-4));
        }
    public void Go()
    {
        var id                = DateTime.Now.Ticks;
        var inputFileLoc      = TrainingDataManager.GetBlogAuthorshipCorpusFiles().First(f => f.Length >= 1e5 && f.Length <= 2e5).FullName;
        var embeddingsFileLoc = $@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}/wordEmbeddings-{id}.csv";
        var reportFileLoc     = $@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}/report-{id}.csv";

        Directory.CreateDirectory($@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}");

        var word2Vec = new Word2VecTrainer();

        word2Vec.Setup(inputFileLoc, minWordOccurrences: 3);
        word2Vec.TrainModel();
        word2Vec.WriteWordEmbeddings(embeddingsFileLoc);


        using var fileStream = new FileStream(embeddingsFileLoc, FileMode.OpenOrCreate, FileAccess.Read);
        using var reader     = new StreamReader(fileStream, Encoding.UTF8);
        var wordEmbeddings = new List <WordEmbedding>();

        wordEmbeddings.PopulateWordEmbeddingsFromStream(reader);

        var tsne = new TSNE(2, distanceFunctionType: DistanceFunctionType.Cosine);

        tsne.ReduceDimensions(wordEmbeddings);

        var labelClusterIndexMap = DBSCAN.GetLabelClusterMap(
            wordEmbeddings,
            epsilon: 0.1,
            minimumSamples: 3,
            distanceFunctionType: DistanceFunctionType.Cosine,
            concurrentThreads: 4);

        var reportHandler = new ReportWriter(reportFileLoc);

        reportHandler.Write2DWordEmbeddingsAndClusterIndexesForExcel(wordEmbeddings, labelClusterIndexMap);
    }
Esempio n. 15
0
        public void FromDataTest2()
        {
            Accord.Math.Random.Generator.Seed = 0;

            int N    = yinyang.GetLength(0);
            var init = Matrix.Random(N, 1, new NormalDistribution(0, 0.001));

            var X = yinyang.Submatrix(null, 0, 1);
            var Y = (double[, ])init.Clone();
            var x = X.ToJagged();
            var y = Y.ToJagged();

            double perplexity = 20;
            double theta      = 0.5;

            TSNEWrapper.run(X, Y, perplexity, theta);
            var expected = Y.Flatten();


            TSNE.run(x, y, perplexity, theta, true);
            var actual = y.Flatten();

            Assert.IsTrue(actual.IsEqual(expected));
        }
Esempio n. 16
0
        public void ConstructorTest()
        {
            Accord.Math.Random.Generator.Seed = 0;

            IdxReader idxReader = new IdxReader(Resources.train_images_idx3_ubyte);
            double[][] X = idxReader.ReadToEndAsVectors<double>();

            Assert.AreEqual(X.Length, 60000);
            Assert.AreEqual(X[59999].Length, 784);

            // Perform the initial dimensionality reduction using PCA
            //var pca = new PrincipalComponentAnalysis(numberOfOutputs: 2);
            //pca.Learn(X);
            //pca.Save(@"pca_v3_1.bin");

            var pca = Serializer.Load<PrincipalComponentAnalysis>(Properties.Resources.pca_mnist_v3_1);

            X = pca.Transform(X);

            TSNE tSNE = new TSNE();
            var Y = tSNE.Transform(X);

            Assert.Fail();
        }
Esempio n. 17
0
        public void compute_squared_distance_1()
        {
            double[][] points =
            {
                new double[] { 2, 3, 2 },
                new double[] { 5, 4, 5 },
                new double[] { 9, 6, 4 },
                new double[] { 4, 7, 5 },
                new double[] { 8, 1, 1 },
                new double[] { 1, 2, 4 },
            };

            var X = points.ToMatrix();
            int N = X.Rows();
            int D = X.Columns();

            double[,] expected = new double[N, N];
            TSNEWrapper.computeSquaredEuclideanDistance(X, expected);

            double[][] actual = Jagged.Zeros(N, N);
            TSNE.computeSquaredEuclideanDistance(points, N, D, actual);

            Assert.IsTrue(actual.IsEqual(expected));
        }
Esempio n. 18
0
        public void symmetrizeMatrix_1()
        {
            double perplexity = 0.5;
            int    n          = 6;
            int    k          = (int)(3 * perplexity);

            uint[]   row_P = Vector.Create(n + 1, new uint[] { 0, 1, 2, 3, 4, 5, 6 });
            uint[]   col_P = Vector.Create(n * k, new uint[] { 5, 3, 1, 1, 2, 1 });
            double[] val_P = Vector.Create(n * k, new double[]
            {
                0.99901046609114708,
                0.99901047304189827,
                0.99901046869768451,
                0.99901047304189827,
                0.99901046869768484,
                0.99901046869768451,
            });


            uint[]   expected_row = Vector.Create(100, row_P);
            uint[]   expected_col = Vector.Create(100, col_P);
            double[] expected_val = Vector.Create(100, val_P);
            TSNEWrapper.symmetrizeMatrix(expected_row, expected_col, expected_val, n);

            int[]    actual_row = row_P.To <int[]>();
            int[]    actual_col = col_P.To <int[]>();
            double[] actual_val = (double[])val_P.Clone();
            TSNE.symmetrizeMatrix(ref actual_row, ref actual_col, ref actual_val, n);

            expected_row = expected_row.First(7);
            expected_col = expected_col.First(10);
            expected_val = expected_val.First(10);
            Assert.IsTrue(actual_col.IsEqual(expected_col));
            Assert.IsTrue(actual_row.IsEqual(expected_row));
            Assert.IsTrue(actual_val.IsEqual(expected_val));
        }
Esempio n. 19
0
        static void Main(string[] args)
        {
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();


            String[] lines_X = File.ReadAllLines(@"C:\Users\v-jiehu\source\repos\T-SNE\T-SNE\data\mnist2500_X.txt");
            String[] lines_Y = File.ReadAllLines(@"C:\Users\v-jiehu\source\repos\T-SNE\T-SNE\data\mnist2500_labels.txt");
            // var lines_X = input_X.Trim().Split('\n');
            // var line1_X = lines_X[0].Split(new string[] { "   " }, StringSplitOptions.None);

            var m = Matrix.Create(lines_X.GetLength(0), lines_X[0].Split().GetLength(0), 0.0);
            var r = 0;
            var c = 0;

            foreach (var line in lines_X)
            {
                foreach (var w in line.Trim().Split(new string[] { "   " }, StringSplitOptions.None))
                {
                    try
                    {
                        m[r, c] = Convert.ToDouble(w);
                    }
                    catch (FormatException)
                    {
                        Console.WriteLine("Unable to convert '{0}' to a Double. - Data", w);
                        Console.WriteLine(r);
                        Console.WriteLine(c);
                    }
                    catch (OverflowException)
                    {
                        Console.WriteLine("'{0}' is outside the range of a Double.", w);
                    }

                    c++;
                }
                r++;
                c = 0;
            }

            //var lines_Y = input_labels.Trim().Split('\n');
            var labels = Vector.Create(lines_Y.GetLength(0), 0.0);

            c = 0;

            foreach (var w in lines_Y)
            {
                try
                {
                    labels[c] = Convert.ToDouble(w);
                }
                catch (FormatException)
                {
                    Console.WriteLine("Unable to convert '{0}' to a Double. - Label", w);
                }
                catch (OverflowException)
                {
                    Console.WriteLine("'{0}' is outside the range of a Double.", w);
                }

                c++;
            }


            stopWatch.Stop();
            // Get the elapsed time as a TimeSpan value.
            TimeSpan ts = stopWatch.Elapsed;

            Console.WriteLine("Reading files takes: " + ts.Seconds.ToString() + " seconds");

            // T-SNE starts
            stopWatch.Restart();
            var Y = new TSNE(m, 2, 2, 30.0)._TSNE();

            stopWatch.Stop();
            ts = stopWatch.Elapsed;
            string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
                                               ts.Hours, ts.Minutes, ts.Seconds,
                                               ts.Milliseconds / 10);

            Console.WriteLine("RunTime " + elapsedTime);


            // add label to Y
            Y = Y.Concatenate(labels);

            // write result to target path
            string targetPath = @"C:\Users\v-jiehu\source\repos\T-SNE\T-SNE\data";
            string fileName   = "result.txt";

            string destFile = System.IO.Path.Combine(targetPath, fileName);

            if (System.IO.File.Exists(destFile))
            {
                // Use a try block to catch IOExceptions, to
                // handle the case of the file already being
                // opened by another process.
                try
                {
                    System.IO.File.Delete(destFile);
                }
                catch (System.IO.IOException e)
                {
                    Console.WriteLine(e.Message);
                }
            }

            if (!System.IO.File.Exists(targetPath))
            {
                System.IO.Directory.CreateDirectory(targetPath);
            }


            using (System.IO.StreamWriter file =
                       new System.IO.StreamWriter(destFile))
            {
                for (var row = 0; row < Y.GetLength(0); row++)
                {
                    var string_line = string.Join("\t", Y.GetRow(row));
                    file.WriteLine(string_line);
                }
            }
        }
Esempio n. 20
0
    // Start is called before the first frame update
    void Start()
    {
        counter = 0;
        string line;

        preview = true;

        number_of_neighbours = k;

        // Read the file and display it line by line.
        System.IO.StreamReader file = new System.IO.StreamReader(inputfile);
        line = file.ReadLine();

        string[] subStrings = line.Split(' ');
        count          = System.Convert.ToInt32(subStrings[0]);
        dimensionality = System.Convert.ToInt32(subStrings[1]);

        //count = 10000;
        number_of_neighbours = count - 1;

        embeddings = new WordEmbedding[count];
        double[][] data = new double[count][];

        for (int j = 0; j < count; j++)
        {
            line       = file.ReadLine();
            subStrings = line.Split(' ');
            string currentword = subStrings[0];

            double[] currentVectors = new double[dimensionality];
            for (int i = 0; i < dimensionality; i++)
            {
                currentVectors[i] = System.Convert.ToDouble(subStrings[(i + 1)]);
            }
            data[counter] = currentVectors;

            WordEmbedding Target = new WordEmbedding(currentword, currentVectors);

            embeddings[counter] = Target;

            counter++;
        }
        file.Close();

        TSNE tSNE = new TSNE()
        {
            NumberOfOutputs = 3,
            Perplexity      = 5
        };

        double[][] finalData = tSNE.Transform(data);
        counter = 0;

        double[] maxs = { 0, 0, 0 };
        double[] mins = { 0, 0, 0 };

        for (int j = 0; j < count; j++)
        {
            WordEmbedding Target = embeddings[j];

            double[] pcavectors = new double[3];
            for (int k = 0; k < 3; k++)
            {
                pcavectors[k] = finalData[j][k];
                if (pcavectors[k] > maxs[k])
                {
                    maxs[k] = pcavectors[k];
                }
                else if (pcavectors[k] < mins[k])
                {
                    mins[k] = pcavectors[k];
                }
            }

            Target.SetPCAVectors(pcavectors);
            embeddings[j] = Target;
        }

        // Set intial sample word
        SetTarget(embeddings[0]);
        adjustZoomFromNeighbours();
        CreateGameObjects();
    }