public void FromDataTest() { Accord.Math.Random.Generator.Seed = 0; double[] points = { 1, 2, 3, 4, 5, 6, 7, }; var tree = VPTree.FromData(points); List <VPTreeNode <double> > nodes = tree.ToList(); for (int i = 1; i <= 7; i++) { Assert.IsTrue(nodes.Select(x => x.Position).Contains(i)); } points = Vector.Shuffled(Vector.Range(1.0, 8.0)); tree = VPTree.FromData(points); nodes = tree.ToList(); for (int i = 1; i <= 7; i++) { Assert.IsTrue(nodes.Select(x => x.Position).Contains(i)); } }
public void StructureData() { var words = parser.FindWords(this.data, out int[] whitespaces); VPTree <Word> tree = new VPTree <Word>(); tree.Create(words, analyzer); this.dictionary = new StaticWordDictionary(tree, this.analyzer); }
public void FromDataTest2() { Accord.Math.Random.Generator.Seed = 0; double[][] points = { new double[] { 2, 3 }, new double[] { 5, 4 }, new double[] { 9, 6 }, new double[] { 4, 7 }, new double[] { 8, 1 }, new double[] { 7, 2 }, }; var tree = VPTree.FromData(points, new Manhattan()); foreach (var n in tree) { double[] location = n.Position; Assert.AreEqual(2, location.Length); } List <VPTreeNode <double[]> > nodes = tree.ToList(); foreach (var p in points) { Assert.IsTrue(nodes.Select(x => x.Position).Contains(p, new ArrayComparer <double>())); } var query = new double[] { 5, 3 }; var neighbors = tree.Nearest(query, neighbors: 3); Assert.IsFalse(tree.Root.IsLeaf); Assert.AreEqual(3, neighbors.Count); Assert.AreEqual("[(5,4), 4]", neighbors[0].Node.ToString()); Assert.AreEqual("[(2,3), 0]", neighbors[1].Node.ToString()); Assert.AreEqual("[(7,2), 0]", neighbors[2].Node.ToString()); Assert.AreEqual(8, tree.Root.Position[0]); Assert.AreEqual(1, tree.Root.Position[1]); Assert.AreEqual("[(5,4), 4]", tree.Root.Left.ToString()); Assert.AreEqual("[(4,7), 6]", tree.Root.Right.ToString()); Assert.AreEqual(4, tree.Root.Right.Position[0]); Assert.AreEqual(7, tree.Root.Right.Position[1]); Assert.AreEqual(9, tree.Root.Right.Right.Position[0]); Assert.AreEqual(6, tree.Root.Right.Right.Position[1]); }
public StaticWordDictionary(VPTree <Word> tree, INodeAnalyzer <Word> analyzer) { this.wordTree = tree; this.analyzer = analyzer; }
// Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free) internal static void computeGaussianPerplexity(double[][] X, int N, int D, ref int[] _row_P, ref int[] _col_P, ref double[] _val_P, double perplexity, int K) { if (perplexity > K) { throw new Exception(String.Format("Perplexity should be lower than K ({0}).", K)); } // Allocate the memory we need _row_P = new int[N + 1]; _col_P = new int[N * K]; _val_P = new double[N * K]; int[] row_P = _row_P; int[] col_P = _col_P; double[] val_P = _val_P; double[] cur_P = new double[N - 1]; row_P[0] = 0; for (int n = 0; n < N; n++) { row_P[n + 1] = row_P[n] + (int)K; } // Build ball tree on data set var idx = Vector.Range(0, N); var tree = VPTree.FromData(X, idx, inPlace: false); // Loop over all points to find nearest neighbors var results = new List <NodeDistance <VPTreeNode <double[], int> > >(); Debug.Write("Building tree..."); for (int n = 0; n < X.Length; n++) { if (n % 10000 == 0) { Debug.Write(String.Format(" - point {0} of {1}", n, N)); } // Find nearest neighbors results.Clear(); tree.Nearest(X[n], K + 1, results); // Initialize some variables for binary search bool found = false; double beta = 1.0; double min_beta = -DBL_MIN; double max_beta = DBL_MAX; double tol = 1e-5; // Iterate until we found a good perplexity int iter = 0; double sum_P = 0; while (!found && iter < 200) { // Compute Gaussian kernel row for (int m = 0; m < K; m++) { double d = results[m + 1].Distance; cur_P[m] = System.Math.Exp(-beta * d * d); } // Compute entropy of current row sum_P = DBL_MIN; for (int m = 0; m < K; m++) { sum_P += cur_P[m]; } double H = 0.0; for (int m = 0; m < K; m++) { double d = results[m + 1].Distance; H += beta * (d * d * cur_P[m]); } H = (H / sum_P) + System.Math.Log(sum_P); // Evaluate whether the entropy is within the tolerance level double Hdiff = H - System.Math.Log(perplexity); if (Hdiff < tol && -Hdiff < tol) { found = true; } else { if (Hdiff > 0) { min_beta = beta; if (max_beta == DBL_MAX || max_beta == -DBL_MAX) { beta *= 2.0; } else { beta = (beta + max_beta) / 2.0; } } else { max_beta = beta; if (min_beta == -DBL_MAX || min_beta == DBL_MAX) { beta /= 2.0; } else { beta = (beta + min_beta) / 2.0; } } } // Update iteration counter iter++; } // Row-normalize current row of P and store in matrix cur_P.Divide(sum_P, result: cur_P); for (int m = 0; m < K; m++) { int j = results[m + 1].Node.Value; // holds the index col_P[row_P[n] + m] = j; val_P[row_P[n] + m] = cur_P[m]; } } }
public void FromDataTest2() { Accord.Math.Random.Generator.Seed = 0; #region doc_create // Let's say we would like to create a VP tree // from a set of multidimensional data points: double[][] points = { new double[] { 2, 3 }, new double[] { 5, 4 }, new double[] { 9, 6 }, new double[] { 4, 7 }, new double[] { 8, 1 }, new double[] { 7, 2 }, }; // We will create it using the Manhattan distance: var tree = VPTree.FromData(points, new Manhattan()); // Now, we can query whether a point belongs to the tree double[] query = new double[] { 5, 3 }; // Find the top-3 closest points within the tree: var neighbors = tree.Nearest(query, neighbors: 3); // Results will be: NodeDistance <VPTreeNode <double[]> >[] result = neighbors.ToArray(); double d1 = result[0].Distance; // 1 double[] p1 = result[0].Node.Position; // { 5, 4 } double d2 = result[1].Distance; // 3 double[] p2 = result[1].Node.Position; // { 2, 3 } double d3 = result[2].Distance; // 3 double[] p3 = result[2].Node.Position; // { 7, 2 } // We can also navigate the tree using: foreach (VPTreeNode <double[]> n in tree) { // We can extract information from its nodes: double[] location = n.Position; // should always have length 2 double threshold = n.Threshold; // the node's threshold radius bool isLeaf = n.IsLeaf; // whether the node is a leaf or not } #endregion Assert.AreEqual(1, d1); Assert.AreEqual(3, d2); Assert.AreEqual(3, d3); Assert.AreEqual(new[] { 5.0, 4.0 }, p1); Assert.AreEqual(new[] { 2.0, 3.0 }, p2); Assert.AreEqual(new[] { 7.0, 2.0 }, p3); foreach (VPTreeNode <double[]> n in tree) { double[] location = n.Position; Assert.AreEqual(2, location.Length); } List <VPTreeNode <double[]> > nodes = tree.ToList(); foreach (var p in points) { Assert.IsTrue(nodes.Select(x => x.Position).Contains(p, new ArrayComparer <double>())); } Assert.IsFalse(tree.Root.IsLeaf); Assert.AreEqual(3, neighbors.Count); Assert.AreEqual("[(5,4), 4]", neighbors[0].Node.ToString()); Assert.AreEqual("[(2,3), 0]", neighbors[1].Node.ToString()); Assert.AreEqual("[(7,2), 0]", neighbors[2].Node.ToString()); Assert.AreEqual(8, tree.Root.Position[0]); Assert.AreEqual(1, tree.Root.Position[1]); Assert.AreEqual("[(5,4), 4]", tree.Root.Left.ToString()); Assert.AreEqual("[(4,7), 6]", tree.Root.Right.ToString()); Assert.AreEqual(4, tree.Root.Right.Position[0]); Assert.AreEqual(7, tree.Root.Right.Position[1]); Assert.AreEqual(9, tree.Root.Right.Right.Position[0]); Assert.AreEqual(6, tree.Root.Right.Right.Position[1]); }