public BiCluster(IEnumerable <long> vector, int id, double distance, BiCluster left, BiCluster right) { Vector = vector; Id = id; Distance = distance; Left = left; Right = right; }
public BiCluster(IEnumerable<long> vector, int id, double distance, BiCluster left, BiCluster right) { Vector = vector; Id = id; Distance = distance; Left = left; Right = right; }
public static BiCluster HCluster(IEnumerable<WordVector> words) { var wordsList = words.ToArray(); var distances = new Dictionary<Pair, double>(); int currentClustId = -1; List<BiCluster> clust = new List<BiCluster>(wordsList.Length); for (int i = 0; i < wordsList.Length; i++) clust.Add(new BiCluster(wordsList[i].Entities.Values, i, 0.0)); while (clust.Count > 1) { Pair lowestpair = new Pair(0, 1); var closest = Pearson(clust[0].Vector, clust[1].Vector); // loop through every pair looking for the smallest distance for (int i = 0; i < clust.Count; i++) { for (int j = i + 1; j < clust.Count; j++) { // distances is the cache of distance calculations if (!distances.ContainsKey(new Pair(clust[i].Id, clust[j].Id))) { // Calculate the pearson value var val = Pearson(clust[i].Vector, clust[j].Vector); distances.Add(new Pair(clust[i].Id, clust[j].Id), val); } double d = distances[new Pair(clust[i].Id, clust[j].Id)]; if (d < closest) { closest = d; lowestpair = new Pair(i, j); } } } // calculate the average of the two clusters List<long> mergevec = new List<long>(); long[] vectorArray = clust[0].Vector.ToArray(); for (int i = 0; i < vectorArray.Length; i++) { long[] leftVectorArray = clust[lowestpair.Left].Vector.ToArray(); long[] rightVectorArray = clust[lowestpair.Right].Vector.ToArray(); mergevec.Add((leftVectorArray[i] + rightVectorArray[i]) / 2); } //create the new cluster BiCluster newCluster = new BiCluster(mergevec, currentClustId, closest, clust[lowestpair.Left], clust[lowestpair.Right]); //cluster ids that weren't in the original set are negative currentClustId--; clust.Remove(clust[lowestpair.Left]); clust.Remove(clust[lowestpair.Right]); clust.Add(newCluster); } return clust[0]; }
public static BiCluster HCluster(IEnumerable <WordVector> words) { var wordsList = words.ToArray(); var distances = new Dictionary <Pair, double>(); int currentClustId = -1; List <BiCluster> clust = new List <BiCluster>(wordsList.Length); for (int i = 0; i < wordsList.Length; i++) { clust.Add(new BiCluster(wordsList[i].Entities.Values, i, 0.0)); } while (clust.Count > 1) { Pair lowestpair = new Pair(0, 1); var closest = Pearson(clust[0].Vector, clust[1].Vector); // loop through every pair looking for the smallest distance for (int i = 0; i < clust.Count; i++) { for (int j = i + 1; j < clust.Count; j++) { // distances is the cache of distance calculations if (!distances.ContainsKey(new Pair(clust[i].Id, clust[j].Id))) { // Calculate the pearson value var val = Pearson(clust[i].Vector, clust[j].Vector); distances.Add(new Pair(clust[i].Id, clust[j].Id), val); } double d = distances[new Pair(clust[i].Id, clust[j].Id)]; if (d < closest) { closest = d; lowestpair = new Pair(i, j); } } } // calculate the average of the two clusters List <long> mergevec = new List <long>(); long[] vectorArray = clust[0].Vector.ToArray(); for (int i = 0; i < vectorArray.Length; i++) { long[] leftVectorArray = clust[lowestpair.Left].Vector.ToArray(); long[] rightVectorArray = clust[lowestpair.Right].Vector.ToArray(); mergevec.Add((leftVectorArray[i] + rightVectorArray[i]) / 2); } //create the new cluster BiCluster newCluster = new BiCluster(mergevec, currentClustId, closest, clust[lowestpair.Left], clust[lowestpair.Right]); //cluster ids that weren't in the original set are negative currentClustId--; clust.Remove(clust[lowestpair.Left]); clust.Remove(clust[lowestpair.Right]); clust.Add(newCluster); } return(clust[0]); }