示例#1
0
 public BiCluster(IEnumerable <long> vector, int id, double distance, BiCluster left, BiCluster right)
 {
     Vector   = vector;
     Id       = id;
     Distance = distance;
     Left     = left;
     Right    = right;
 }
示例#2
0
 public BiCluster(IEnumerable<long> vector, int id, double distance, BiCluster left, BiCluster right)
 {
     Vector = vector;
     Id = id;
     Distance = distance;
     Left = left;
     Right = right;
 }
示例#3
0
        public static BiCluster HCluster(IEnumerable<WordVector> words)
        {
            var wordsList = words.ToArray();
            var distances = new Dictionary<Pair, double>();
            int currentClustId = -1;

            List<BiCluster> clust = new List<BiCluster>(wordsList.Length);

            for (int i = 0; i < wordsList.Length; i++)
                clust.Add(new BiCluster(wordsList[i].Entities.Values, i, 0.0));

            while (clust.Count > 1)
            {
                Pair lowestpair = new Pair(0, 1);

                var closest = Pearson(clust[0].Vector, clust[1].Vector);

                // loop through every pair looking for the smallest distance
                for (int i = 0; i < clust.Count; i++)
                {
                    for (int j = i + 1; j < clust.Count; j++)
                    {
                        // distances is the cache of distance calculations
                        if (!distances.ContainsKey(new Pair(clust[i].Id, clust[j].Id)))
                        {
                            // Calculate the pearson value
                            var val = Pearson(clust[i].Vector, clust[j].Vector);

                            distances.Add(new Pair(clust[i].Id, clust[j].Id), val);
                        }

                        double d = distances[new Pair(clust[i].Id, clust[j].Id)];

                        if (d < closest)
                        {
                            closest = d;
                            lowestpair = new Pair(i, j);
                        }
                    }
                }

                // calculate the average of the two clusters
                List<long> mergevec = new List<long>();
                long[] vectorArray = clust[0].Vector.ToArray();
                for (int i = 0; i < vectorArray.Length; i++)
                {
                    long[] leftVectorArray = clust[lowestpair.Left].Vector.ToArray();
                    long[] rightVectorArray = clust[lowestpair.Right].Vector.ToArray();

                    mergevec.Add((leftVectorArray[i] + rightVectorArray[i]) / 2);
                }

                //create the new cluster
                BiCluster newCluster = new BiCluster(mergevec, currentClustId, closest, clust[lowestpair.Left], clust[lowestpair.Right]);

                //cluster ids that weren't in the original set are negative
                currentClustId--;
                clust.Remove(clust[lowestpair.Left]);
                clust.Remove(clust[lowestpair.Right]);
                clust.Add(newCluster);

            }
            return clust[0];
        }
示例#4
0
        public static BiCluster HCluster(IEnumerable <WordVector> words)
        {
            var wordsList      = words.ToArray();
            var distances      = new Dictionary <Pair, double>();
            int currentClustId = -1;

            List <BiCluster> clust = new List <BiCluster>(wordsList.Length);

            for (int i = 0; i < wordsList.Length; i++)
            {
                clust.Add(new BiCluster(wordsList[i].Entities.Values, i, 0.0));
            }

            while (clust.Count > 1)
            {
                Pair lowestpair = new Pair(0, 1);

                var closest = Pearson(clust[0].Vector, clust[1].Vector);

                // loop through every pair looking for the smallest distance
                for (int i = 0; i < clust.Count; i++)
                {
                    for (int j = i + 1; j < clust.Count; j++)
                    {
                        // distances is the cache of distance calculations
                        if (!distances.ContainsKey(new Pair(clust[i].Id, clust[j].Id)))
                        {
                            // Calculate the pearson value
                            var val = Pearson(clust[i].Vector, clust[j].Vector);

                            distances.Add(new Pair(clust[i].Id, clust[j].Id), val);
                        }

                        double d = distances[new Pair(clust[i].Id, clust[j].Id)];

                        if (d < closest)
                        {
                            closest    = d;
                            lowestpair = new Pair(i, j);
                        }
                    }
                }

                // calculate the average of the two clusters
                List <long> mergevec    = new List <long>();
                long[]      vectorArray = clust[0].Vector.ToArray();
                for (int i = 0; i < vectorArray.Length; i++)
                {
                    long[] leftVectorArray  = clust[lowestpair.Left].Vector.ToArray();
                    long[] rightVectorArray = clust[lowestpair.Right].Vector.ToArray();

                    mergevec.Add((leftVectorArray[i] + rightVectorArray[i]) / 2);
                }

                //create the new cluster
                BiCluster newCluster = new BiCluster(mergevec, currentClustId, closest, clust[lowestpair.Left], clust[lowestpair.Right]);

                //cluster ids that weren't in the original set are negative
                currentClustId--;
                clust.Remove(clust[lowestpair.Left]);
                clust.Remove(clust[lowestpair.Right]);
                clust.Add(newCluster);
            }
            return(clust[0]);
        }