void tsnetest() { Accord.Math.Random.Generator.Seed = 0; // Declare some observations double[][] observations = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; // Create a new t-SNE algorithm TSNE tSNE = new TSNE() { NumberOfOutputs = 2, Perplexity = 1.5 }; // Transform to a reduced dimensionality space double[][] output = tSNE.Transform(observations); // Make it 1-dimensional double[] y = output.Reshape(); }
public void computeGaussianPerplexity2_larger() { var points = yinyang.Submatrix(null, 0, 1).ToJagged(); double perplexity = 0.5; int N = points.Rows(); int D = points.Columns(); int K = (int)(3 * perplexity); double[,] X = points.ToMatrix(); double[][] x = X.ToJagged(); uint[] expected_row = Vector.Zeros <uint>(200); uint[] expected_col = Vector.Zeros <uint>(200); double[] expected_val = Vector.Zeros <double>(200); TSNEWrapper.computeGaussianPerplexity(X, N, D, expected_row, expected_col, expected_val, perplexity, K); int[] actual_row = null; int[] actual_col = null; double[] actual_val = null; TSNE.computeGaussianPerplexity(x, N, D, ref actual_row, ref actual_col, ref actual_val, perplexity, K); expected_row = expected_row.First(101); expected_col = expected_col.First(100); expected_val = expected_val.First(100); Assert.IsTrue(actual_row.IsEqual(expected_row)); Assert.IsTrue(actual_col.IsEqual(expected_col)); Assert.IsTrue(actual_val.IsEqual(expected_val, 1e-4)); }
public void ConstructorTest() { Accord.Math.Random.Generator.Seed = 0; string mnistPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Resources", "mnist", "train-images-idx3-ubyte.gz"); IdxReader idxReader = new IdxReader(mnistPath); double[][] X = idxReader.ReadToEndAsVectors <double>(); Assert.AreEqual(X.Length, 60000); Assert.AreEqual(X[59999].Length, 784); // Perform the initial dimensionality reduction using PCA //var pca = new PrincipalComponentAnalysis(numberOfOutputs: 2); //pca.Learn(X); //pca.Save(@"pca_v3_1.bin"); string pcaPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Resources", "mnist", "pca_mnist_v3_1.bin"); var pca = Serializer.Load <PrincipalComponentAnalysis>(pcaPath); X = pca.Transform(X); TSNE tSNE = new TSNE(); var Y = tSNE.Transform(X); Assert.Fail(); }
public void computeGaussianPerplexity_1() { double[][] points = { new double[] { 2, 3, 2 }, new double[] { 5, 4, 5 }, new double[] { 9, 6, 4 }, new double[] { 4, 7, 5 }, new double[] { 8, 1, 1 }, new double[] { 1, 2, 4 }, }; double perplexity = 0.5; int N = points.Length; int D = 3; var X = points.ToMatrix(); double[,] expected = new double[N, N]; TSNEWrapper.computeGaussianPerplexity(X, N, D, expected, perplexity); double[][] actual = Jagged.Zeros(N, N); TSNE.computeGaussianPerplexity(points, N, D, ref actual, perplexity); Assert.IsTrue(actual.IsEqual(expected, rtol: 1e-5)); }
public void ConstructorTest() { Accord.Math.Random.Generator.Seed = 0; IdxReader idxReader = new IdxReader(Resources.train_images_idx3_ubyte); double[][] X = idxReader.ReadToEndAsVectors <double>(); Assert.AreEqual(X.Length, 60000); Assert.AreEqual(X[59999].Length, 784); // Perform the initial dimensionality reduction using PCA //var pca = new PrincipalComponentAnalysis(numberOfOutputs: 2); //pca.Learn(X); //pca.Save(@"pca_v3_1.bin"); var pca = Serializer.Load <PrincipalComponentAnalysis>(Properties.Resources.pca_mnist_v3_1); X = pca.Transform(X); TSNE tSNE = new TSNE(); var Y = tSNE.Transform(X); Assert.Fail(); }
public void compute_squared_distance_larger() { var points = yinyang.Submatrix(null, 0, 1).ToJagged(); var X = points.ToMatrix(); int N = X.Rows(); int D = X.Columns(); double[,] expected = new double[N, N]; TSNEWrapper.computeSquaredEuclideanDistance(X, expected); double[][] actual = Jagged.Zeros(N, N); TSNE.computeSquaredEuclideanDistance(points, N, D, actual); Assert.IsTrue(actual.IsEqual(expected)); }
public void computeGradient_1() { Accord.Math.Random.Generator.Seed = 0; double perplexity = 0.5; double theta = 0.5; int N = 100; int K = (int)(3 * perplexity); int D = 3; uint[] row_P = Vector.Create(N + 1, new uint[] { 0, 1, 2, 3, 4, 5, 6 }); uint[] col_P = Vector.Create(N * K, new uint[] { 5, 3, 1, 1, 2, 1 }); double[] val_P = Vector.Create(N * K, new double[] { 0.83901046609114708, 0.39701047304189827, 0.19501046869768451, 0.59401047304189827, 0.49301046869768484, 0.59901046869768451, }); double[,] P = Matrix.Random(N, N, new NormalDistribution()); double[][] p = P.ToJagged(); double[,] Y = Matrix.Random(N, D, new NormalDistribution()); double[][] y = Y.ToJagged(); uint[] expected_row = Vector.Create(row_P); uint[] expected_col = Vector.Create(col_P); double[] expected_val = Vector.Create(val_P); double[,] expected = Matrix.Zeros(N, D); TSNEWrapper.computeGradient(P, expected_row, expected_col, expected_val, Y, N, D, expected, theta); int[] actual_row = row_P.To <int[]>(); int[] actual_col = col_P.To <int[]>(); double[] actual_val = (double[])val_P.Clone(); double[][] actual = Jagged.Zeros(N, D); TSNE.computeGradient(p, actual_row, actual_col, actual_val, y, N, D, actual, theta); Assert.IsTrue(actual.IsEqual(expected)); Assert.IsTrue(actual_row.IsEqual(expected_row)); Assert.IsTrue(actual_col.IsEqual(expected_col)); Assert.IsTrue(actual_val.IsEqual(expected_val, 1e-4)); }
public void evaluateError_2() { int N = 6; int D = 2; double[,] P = Matrix.Random(6, 6, new NormalDistribution()); double[][] p = P.ToJagged(); double[,] Y = Matrix.Random(6, 2, new NormalDistribution()); double[][] y = Y.ToJagged(); double expected = TSNEWrapper.evaluateError(P, Y, N, D); double actual = TSNE.evaluateError(p, y, N, D); Assert.AreEqual(expected, actual); }
public void computeGaussianPerplexity_larger() { var points = yinyang.Submatrix(null, 0, 1).ToJagged(); double perplexity = 0.5; int N = points.Rows(); int D = points.Columns(); var X = points.ToMatrix(); double[,] expected = new double[N, N]; TSNEWrapper.computeGaussianPerplexity(X, N, D, expected, perplexity); double[][] actual = Jagged.Zeros(N, N); TSNE.computeGaussianPerplexity(points, N, D, ref actual, perplexity); Assert.IsTrue(actual.IsEqual(expected, rtol: 1e-5)); }
// TSNE double[][] runTSNE(double[][] data, int outputDimension, double perplexity) { // Create a new t-SNE algorithm TSNE tSNE = new TSNE() { NumberOfInputs = 9, NumberOfOutputs = outputDimension, Perplexity = perplexity }; double[][] copiedData = data.Copy(); // Transform to a reduced dimensionality space double[][] output = tSNE.Transform(copiedData); // Make it 1-dimensional : probably not needed in our case //double[] y = output.Reshape(); return(output); }
public void evaluateError_1() { double perplexity = 0.5; double theta = 0.5; int N = 6; int K = (int)(3 * perplexity); int D = 2; uint[] row_P = Vector.Create(N + 1, new uint[] { 0, 1, 2, 3, 4, 5, 6 }); uint[] col_P = Vector.Create(N * K, new uint[] { 5, 3, 1, 1, 2, 1 }); double[] val_P = Vector.Create(N * K, new double[] { 0.99901046609114708, 0.99901047304189827, 0.99901046869768451, 0.99901047304189827, 0.99901046869768484, 0.99901046869768451, }); double[,] Y = Matrix.Random(6, 2, new NormalDistribution()); double[][] y = Y.ToJagged(); uint[] expected_row = Vector.Create(row_P); uint[] expected_col = Vector.Create(col_P); double[] expected_val = Vector.Create(val_P); double expected = TSNEWrapper.evaluateError(expected_row, expected_col, expected_val, Y, N, D, theta); int[] actual_row = row_P.To <int[]>(); int[] actual_col = col_P.To <int[]>(); double[] actual_val = (double[])val_P.Clone(); double actual = TSNE.evaluateError(actual_row, actual_col, actual_val, y, N, D, theta); Assert.AreEqual(expected, actual); Assert.IsTrue(actual_col.IsEqual(expected_col)); Assert.IsTrue(actual_row.IsEqual(expected_row)); Assert.IsTrue(actual_val.IsEqual(expected_val)); }
public void learn_test() { #region doc_learn Accord.Math.Random.Generator.Seed = 0; // Declare some observations double[][] observations = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; // Create a new t-SNE algorithm TSNE tSNE = new TSNE() { NumberOfOutputs = 1, Perplexity = 1.5 }; // Transform to a reduced dimensionality space double[][] output = tSNE.Transform(observations); // Make it 1-dimensional double[] y = output.Reshape(); #endregion string str = y.ToCSharp(); double[] expected = new double[] { 327.15556116089, 144.502680170483, -21.5116375004548, 253.712522074559, 214.067349874275, 24.8621254326599, -299.97879062709, -260.342898777221, -382.466911808102 }; Assert.IsTrue(y.IsEqual(expected, rtol: 1e-5)); }
public void computeGaussianPerplexity_2() { double[][] points = { new double[] { 2, 3, 20 }, new double[] { 5, 4, 5 }, new double[] { 9, 6, 500 }, new double[] { 4, 7, -100 }, new double[] { 8, 1, 67 }, new double[] { 1, 2, -888 }, }; double perplexity = 0.5; int N = points.Length; int D = 3; int K = (int)(3 * perplexity); double[,] X = points.ToMatrix(); double[][] x = X.ToJagged(); uint[] expected_row = Vector.Zeros <uint>(100); uint[] expected_col = Vector.Zeros <uint>(100); double[] expected_val = Vector.Zeros <double>(100); TSNEWrapper.computeGaussianPerplexity(X, N, D, expected_row, expected_col, expected_val, perplexity, K); int[] actual_row = null; int[] actual_col = null; double[] actual_val = null; TSNE.computeGaussianPerplexity(x, N, D, ref actual_row, ref actual_col, ref actual_val, perplexity, K); expected_row = expected_row.First(7); expected_col = expected_col.First(6); expected_val = expected_val.First(6); Assert.IsTrue(actual_row.IsEqual(expected_row)); Assert.IsTrue(actual_col.IsEqual(expected_col)); Assert.IsTrue(actual_val.IsEqual(expected_val, 1e-4)); }
public void Go() { var id = DateTime.Now.Ticks; var inputFileLoc = TrainingDataManager.GetBlogAuthorshipCorpusFiles().First(f => f.Length >= 1e5 && f.Length <= 2e5).FullName; var embeddingsFileLoc = $@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}/wordEmbeddings-{id}.csv"; var reportFileLoc = $@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}/report-{id}.csv"; Directory.CreateDirectory($@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}"); var word2Vec = new Word2VecTrainer(); word2Vec.Setup(inputFileLoc, minWordOccurrences: 3); word2Vec.TrainModel(); word2Vec.WriteWordEmbeddings(embeddingsFileLoc); using var fileStream = new FileStream(embeddingsFileLoc, FileMode.OpenOrCreate, FileAccess.Read); using var reader = new StreamReader(fileStream, Encoding.UTF8); var wordEmbeddings = new List <WordEmbedding>(); wordEmbeddings.PopulateWordEmbeddingsFromStream(reader); var tsne = new TSNE(2, distanceFunctionType: DistanceFunctionType.Cosine); tsne.ReduceDimensions(wordEmbeddings); var labelClusterIndexMap = DBSCAN.GetLabelClusterMap( wordEmbeddings, epsilon: 0.1, minimumSamples: 3, distanceFunctionType: DistanceFunctionType.Cosine, concurrentThreads: 4); var reportHandler = new ReportWriter(reportFileLoc); reportHandler.Write2DWordEmbeddingsAndClusterIndexesForExcel(wordEmbeddings, labelClusterIndexMap); }
public void FromDataTest2() { Accord.Math.Random.Generator.Seed = 0; int N = yinyang.GetLength(0); var init = Matrix.Random(N, 1, new NormalDistribution(0, 0.001)); var X = yinyang.Submatrix(null, 0, 1); var Y = (double[, ])init.Clone(); var x = X.ToJagged(); var y = Y.ToJagged(); double perplexity = 20; double theta = 0.5; TSNEWrapper.run(X, Y, perplexity, theta); var expected = Y.Flatten(); TSNE.run(x, y, perplexity, theta, true); var actual = y.Flatten(); Assert.IsTrue(actual.IsEqual(expected)); }
public void ConstructorTest() { Accord.Math.Random.Generator.Seed = 0; IdxReader idxReader = new IdxReader(Resources.train_images_idx3_ubyte); double[][] X = idxReader.ReadToEndAsVectors<double>(); Assert.AreEqual(X.Length, 60000); Assert.AreEqual(X[59999].Length, 784); // Perform the initial dimensionality reduction using PCA //var pca = new PrincipalComponentAnalysis(numberOfOutputs: 2); //pca.Learn(X); //pca.Save(@"pca_v3_1.bin"); var pca = Serializer.Load<PrincipalComponentAnalysis>(Properties.Resources.pca_mnist_v3_1); X = pca.Transform(X); TSNE tSNE = new TSNE(); var Y = tSNE.Transform(X); Assert.Fail(); }
public void compute_squared_distance_1() { double[][] points = { new double[] { 2, 3, 2 }, new double[] { 5, 4, 5 }, new double[] { 9, 6, 4 }, new double[] { 4, 7, 5 }, new double[] { 8, 1, 1 }, new double[] { 1, 2, 4 }, }; var X = points.ToMatrix(); int N = X.Rows(); int D = X.Columns(); double[,] expected = new double[N, N]; TSNEWrapper.computeSquaredEuclideanDistance(X, expected); double[][] actual = Jagged.Zeros(N, N); TSNE.computeSquaredEuclideanDistance(points, N, D, actual); Assert.IsTrue(actual.IsEqual(expected)); }
public void symmetrizeMatrix_1() { double perplexity = 0.5; int n = 6; int k = (int)(3 * perplexity); uint[] row_P = Vector.Create(n + 1, new uint[] { 0, 1, 2, 3, 4, 5, 6 }); uint[] col_P = Vector.Create(n * k, new uint[] { 5, 3, 1, 1, 2, 1 }); double[] val_P = Vector.Create(n * k, new double[] { 0.99901046609114708, 0.99901047304189827, 0.99901046869768451, 0.99901047304189827, 0.99901046869768484, 0.99901046869768451, }); uint[] expected_row = Vector.Create(100, row_P); uint[] expected_col = Vector.Create(100, col_P); double[] expected_val = Vector.Create(100, val_P); TSNEWrapper.symmetrizeMatrix(expected_row, expected_col, expected_val, n); int[] actual_row = row_P.To <int[]>(); int[] actual_col = col_P.To <int[]>(); double[] actual_val = (double[])val_P.Clone(); TSNE.symmetrizeMatrix(ref actual_row, ref actual_col, ref actual_val, n); expected_row = expected_row.First(7); expected_col = expected_col.First(10); expected_val = expected_val.First(10); Assert.IsTrue(actual_col.IsEqual(expected_col)); Assert.IsTrue(actual_row.IsEqual(expected_row)); Assert.IsTrue(actual_val.IsEqual(expected_val)); }
static void Main(string[] args) { Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); String[] lines_X = File.ReadAllLines(@"C:\Users\v-jiehu\source\repos\T-SNE\T-SNE\data\mnist2500_X.txt"); String[] lines_Y = File.ReadAllLines(@"C:\Users\v-jiehu\source\repos\T-SNE\T-SNE\data\mnist2500_labels.txt"); // var lines_X = input_X.Trim().Split('\n'); // var line1_X = lines_X[0].Split(new string[] { " " }, StringSplitOptions.None); var m = Matrix.Create(lines_X.GetLength(0), lines_X[0].Split().GetLength(0), 0.0); var r = 0; var c = 0; foreach (var line in lines_X) { foreach (var w in line.Trim().Split(new string[] { " " }, StringSplitOptions.None)) { try { m[r, c] = Convert.ToDouble(w); } catch (FormatException) { Console.WriteLine("Unable to convert '{0}' to a Double. - Data", w); Console.WriteLine(r); Console.WriteLine(c); } catch (OverflowException) { Console.WriteLine("'{0}' is outside the range of a Double.", w); } c++; } r++; c = 0; } //var lines_Y = input_labels.Trim().Split('\n'); var labels = Vector.Create(lines_Y.GetLength(0), 0.0); c = 0; foreach (var w in lines_Y) { try { labels[c] = Convert.ToDouble(w); } catch (FormatException) { Console.WriteLine("Unable to convert '{0}' to a Double. - Label", w); } catch (OverflowException) { Console.WriteLine("'{0}' is outside the range of a Double.", w); } c++; } stopWatch.Stop(); // Get the elapsed time as a TimeSpan value. TimeSpan ts = stopWatch.Elapsed; Console.WriteLine("Reading files takes: " + ts.Seconds.ToString() + " seconds"); // T-SNE starts stopWatch.Restart(); var Y = new TSNE(m, 2, 2, 30.0)._TSNE(); stopWatch.Stop(); ts = stopWatch.Elapsed; string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10); Console.WriteLine("RunTime " + elapsedTime); // add label to Y Y = Y.Concatenate(labels); // write result to target path string targetPath = @"C:\Users\v-jiehu\source\repos\T-SNE\T-SNE\data"; string fileName = "result.txt"; string destFile = System.IO.Path.Combine(targetPath, fileName); if (System.IO.File.Exists(destFile)) { // Use a try block to catch IOExceptions, to // handle the case of the file already being // opened by another process. try { System.IO.File.Delete(destFile); } catch (System.IO.IOException e) { Console.WriteLine(e.Message); } } if (!System.IO.File.Exists(targetPath)) { System.IO.Directory.CreateDirectory(targetPath); } using (System.IO.StreamWriter file = new System.IO.StreamWriter(destFile)) { for (var row = 0; row < Y.GetLength(0); row++) { var string_line = string.Join("\t", Y.GetRow(row)); file.WriteLine(string_line); } } }
// Start is called before the first frame update void Start() { counter = 0; string line; preview = true; number_of_neighbours = k; // Read the file and display it line by line. System.IO.StreamReader file = new System.IO.StreamReader(inputfile); line = file.ReadLine(); string[] subStrings = line.Split(' '); count = System.Convert.ToInt32(subStrings[0]); dimensionality = System.Convert.ToInt32(subStrings[1]); //count = 10000; number_of_neighbours = count - 1; embeddings = new WordEmbedding[count]; double[][] data = new double[count][]; for (int j = 0; j < count; j++) { line = file.ReadLine(); subStrings = line.Split(' '); string currentword = subStrings[0]; double[] currentVectors = new double[dimensionality]; for (int i = 0; i < dimensionality; i++) { currentVectors[i] = System.Convert.ToDouble(subStrings[(i + 1)]); } data[counter] = currentVectors; WordEmbedding Target = new WordEmbedding(currentword, currentVectors); embeddings[counter] = Target; counter++; } file.Close(); TSNE tSNE = new TSNE() { NumberOfOutputs = 3, Perplexity = 5 }; double[][] finalData = tSNE.Transform(data); counter = 0; double[] maxs = { 0, 0, 0 }; double[] mins = { 0, 0, 0 }; for (int j = 0; j < count; j++) { WordEmbedding Target = embeddings[j]; double[] pcavectors = new double[3]; for (int k = 0; k < 3; k++) { pcavectors[k] = finalData[j][k]; if (pcavectors[k] > maxs[k]) { maxs[k] = pcavectors[k]; } else if (pcavectors[k] < mins[k]) { mins[k] = pcavectors[k]; } } Target.SetPCAVectors(pcavectors); embeddings[j] = Target; } // Set intial sample word SetTarget(embeddings[0]); adjustZoomFromNeighbours(); CreateGameObjects(); }