Exemple #1
0
        public void FromDataTest()
        {
            Accord.Math.Random.Generator.Seed = 0;

            double[] points =
            {
                1,
                2,
                3,
                4,
                5,
                6,
                7,
            };

            var tree = VPTree.FromData(points);

            List <VPTreeNode <double> > nodes = tree.ToList();

            for (int i = 1; i <= 7; i++)
            {
                Assert.IsTrue(nodes.Select(x => x.Position).Contains(i));
            }

            points = Vector.Shuffled(Vector.Range(1.0, 8.0));

            tree = VPTree.FromData(points);

            nodes = tree.ToList();

            for (int i = 1; i <= 7; i++)
            {
                Assert.IsTrue(nodes.Select(x => x.Position).Contains(i));
            }
        }
Exemple #2
0
        public void StructureData()
        {
            var           words = parser.FindWords(this.data, out int[] whitespaces);
            VPTree <Word> tree  = new VPTree <Word>();

            tree.Create(words, analyzer);
            this.dictionary = new StaticWordDictionary(tree, this.analyzer);
        }
Exemple #3
0
        public void FromDataTest2()
        {
            Accord.Math.Random.Generator.Seed = 0;

            double[][] points =
            {
                new double[] { 2, 3 },
                new double[] { 5, 4 },
                new double[] { 9, 6 },
                new double[] { 4, 7 },
                new double[] { 8, 1 },
                new double[] { 7, 2 },
            };


            var tree = VPTree.FromData(points, new Manhattan());

            foreach (var n in tree)
            {
                double[] location = n.Position;
                Assert.AreEqual(2, location.Length);
            }

            List <VPTreeNode <double[]> > nodes = tree.ToList();

            foreach (var p in points)
            {
                Assert.IsTrue(nodes.Select(x => x.Position).Contains(p, new ArrayComparer <double>()));
            }

            var query = new double[] { 5, 3 };

            var neighbors = tree.Nearest(query, neighbors: 3);

            Assert.IsFalse(tree.Root.IsLeaf);

            Assert.AreEqual(3, neighbors.Count);
            Assert.AreEqual("[(5,4), 4]", neighbors[0].Node.ToString());
            Assert.AreEqual("[(2,3), 0]", neighbors[1].Node.ToString());
            Assert.AreEqual("[(7,2), 0]", neighbors[2].Node.ToString());

            Assert.AreEqual(8, tree.Root.Position[0]);
            Assert.AreEqual(1, tree.Root.Position[1]);

            Assert.AreEqual("[(5,4), 4]", tree.Root.Left.ToString());
            Assert.AreEqual("[(4,7), 6]", tree.Root.Right.ToString());

            Assert.AreEqual(4, tree.Root.Right.Position[0]);
            Assert.AreEqual(7, tree.Root.Right.Position[1]);

            Assert.AreEqual(9, tree.Root.Right.Right.Position[0]);
            Assert.AreEqual(6, tree.Root.Right.Right.Position[1]);
        }
Exemple #4
0
 public StaticWordDictionary(VPTree <Word> tree, INodeAnalyzer <Word> analyzer)
 {
     this.wordTree = tree;
     this.analyzer = analyzer;
 }
Exemple #5
0
        // Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free)
        internal static void computeGaussianPerplexity(double[][] X, int N, int D, ref int[] _row_P, ref int[] _col_P, ref double[] _val_P, double perplexity, int K)
        {
            if (perplexity > K)
            {
                throw new Exception(String.Format("Perplexity should be lower than K ({0}).", K));
            }

            // Allocate the memory we need
            _row_P = new int[N + 1];
            _col_P = new int[N * K];
            _val_P = new double[N * K];

            int[]    row_P = _row_P;
            int[]    col_P = _col_P;
            double[] val_P = _val_P;
            double[] cur_P = new double[N - 1];

            row_P[0] = 0;
            for (int n = 0; n < N; n++)
            {
                row_P[n + 1] = row_P[n] + (int)K;
            }

            // Build ball tree on data set
            var idx  = Vector.Range(0, N);
            var tree = VPTree.FromData(X, idx, inPlace: false);

            // Loop over all points to find nearest neighbors
            var results = new List <NodeDistance <VPTreeNode <double[], int> > >();

            Debug.Write("Building tree...");
            for (int n = 0; n < X.Length; n++)
            {
                if (n % 10000 == 0)
                {
                    Debug.Write(String.Format(" - point {0} of {1}", n, N));
                }

                // Find nearest neighbors
                results.Clear();
                tree.Nearest(X[n], K + 1, results);

                // Initialize some variables for binary search
                bool   found    = false;
                double beta     = 1.0;
                double min_beta = -DBL_MIN;
                double max_beta = DBL_MAX;
                double tol      = 1e-5;

                // Iterate until we found a good perplexity
                int    iter  = 0;
                double sum_P = 0;
                while (!found && iter < 200)
                {
                    // Compute Gaussian kernel row
                    for (int m = 0; m < K; m++)
                    {
                        double d = results[m + 1].Distance;
                        cur_P[m] = System.Math.Exp(-beta * d * d);
                    }

                    // Compute entropy of current row
                    sum_P = DBL_MIN;
                    for (int m = 0; m < K; m++)
                    {
                        sum_P += cur_P[m];
                    }

                    double H = 0.0;
                    for (int m = 0; m < K; m++)
                    {
                        double d = results[m + 1].Distance;
                        H += beta * (d * d * cur_P[m]);
                    }
                    H = (H / sum_P) + System.Math.Log(sum_P);

                    // Evaluate whether the entropy is within the tolerance level
                    double Hdiff = H - System.Math.Log(perplexity);

                    if (Hdiff < tol && -Hdiff < tol)
                    {
                        found = true;
                    }
                    else
                    {
                        if (Hdiff > 0)
                        {
                            min_beta = beta;
                            if (max_beta == DBL_MAX || max_beta == -DBL_MAX)
                            {
                                beta *= 2.0;
                            }
                            else
                            {
                                beta = (beta + max_beta) / 2.0;
                            }
                        }
                        else
                        {
                            max_beta = beta;
                            if (min_beta == -DBL_MAX || min_beta == DBL_MAX)
                            {
                                beta /= 2.0;
                            }
                            else
                            {
                                beta = (beta + min_beta) / 2.0;
                            }
                        }
                    }

                    // Update iteration counter
                    iter++;
                }

                // Row-normalize current row of P and store in matrix
                cur_P.Divide(sum_P, result: cur_P);

                for (int m = 0; m < K; m++)
                {
                    int j = results[m + 1].Node.Value; // holds the index
                    col_P[row_P[n] + m] = j;
                    val_P[row_P[n] + m] = cur_P[m];
                }
            }
        }
Exemple #6
0
        public void FromDataTest2()
        {
            Accord.Math.Random.Generator.Seed = 0;

            #region doc_create
            // Let's say we would like to create a VP tree
            // from a set of multidimensional data points:
            double[][] points =
            {
                new double[] { 2, 3 },
                new double[] { 5, 4 },
                new double[] { 9, 6 },
                new double[] { 4, 7 },
                new double[] { 8, 1 },
                new double[] { 7, 2 },
            };

            // We will create it using the Manhattan distance:
            var tree = VPTree.FromData(points, new Manhattan());

            // Now, we can query whether a point belongs to the tree
            double[] query = new double[] { 5, 3 };

            // Find the top-3 closest points within the tree:
            var neighbors = tree.Nearest(query, neighbors: 3);

            // Results will be:
            NodeDistance <VPTreeNode <double[]> >[] result = neighbors.ToArray();
            double   d1 = result[0].Distance;      // 1
            double[] p1 = result[0].Node.Position; // { 5, 4 }

            double   d2 = result[1].Distance;      // 3
            double[] p2 = result[1].Node.Position; // { 2, 3 }

            double   d3 = result[2].Distance;      // 3
            double[] p3 = result[2].Node.Position; // { 7, 2 }


            // We can also navigate the tree using:
            foreach (VPTreeNode <double[]> n in tree)
            {
                // We can extract information from its nodes:
                double[] location  = n.Position;  // should always have length 2
                double   threshold = n.Threshold; // the node's threshold radius
                bool     isLeaf    = n.IsLeaf;    // whether the node is a leaf or not
            }
            #endregion

            Assert.AreEqual(1, d1);
            Assert.AreEqual(3, d2);
            Assert.AreEqual(3, d3);

            Assert.AreEqual(new[] { 5.0, 4.0 }, p1);
            Assert.AreEqual(new[] { 2.0, 3.0 }, p2);
            Assert.AreEqual(new[] { 7.0, 2.0 }, p3);

            foreach (VPTreeNode <double[]> n in tree)
            {
                double[] location = n.Position;
                Assert.AreEqual(2, location.Length);
            }

            List <VPTreeNode <double[]> > nodes = tree.ToList();
            foreach (var p in points)
            {
                Assert.IsTrue(nodes.Select(x => x.Position).Contains(p, new ArrayComparer <double>()));
            }

            Assert.IsFalse(tree.Root.IsLeaf);

            Assert.AreEqual(3, neighbors.Count);
            Assert.AreEqual("[(5,4), 4]", neighbors[0].Node.ToString());
            Assert.AreEqual("[(2,3), 0]", neighbors[1].Node.ToString());
            Assert.AreEqual("[(7,2), 0]", neighbors[2].Node.ToString());

            Assert.AreEqual(8, tree.Root.Position[0]);
            Assert.AreEqual(1, tree.Root.Position[1]);

            Assert.AreEqual("[(5,4), 4]", tree.Root.Left.ToString());
            Assert.AreEqual("[(4,7), 6]", tree.Root.Right.ToString());

            Assert.AreEqual(4, tree.Root.Right.Position[0]);
            Assert.AreEqual(7, tree.Root.Right.Position[1]);

            Assert.AreEqual(9, tree.Root.Right.Right.Position[0]);
            Assert.AreEqual(6, tree.Root.Right.Right.Position[1]);
        }