/** * Calculate the distance between two clusters, forcing them to have the same words in them, and to not be related * to each other. * * @param cluster1 the first cluster * @param cluster2 the second cluster * @return The intra-cluster distance, or Double.NEGATIVE_INFINITY if the clusters should never be clustered * together. */ protected double intraClusterDistance(Cluster cluster1, Cluster cluster2) { LogMath logMath = LogMath.getLogMath(); double maxSim = Double.NegativeInfinity; foreach (Node node1 in cluster1.getElements()) { foreach (Node node2 in cluster2.getElements()) { if (!node1.getWord().getSpelling().Equals( node2.getWord().getSpelling())) { return(Double.NegativeInfinity); } if (node1.hasAncestralRelationship(node2)) { return(Double.NegativeInfinity); } double overlap = getOverlap(node1, node2); if (overlap > 0.0) { overlap = logMath.logToLinear((float)overlap); overlap += node1.getPosterior() + node2.getPosterior(); if (overlap > maxSim) { maxSim = overlap; } } } } return(maxSim); }
private float[] computePosterios(float[] componentScores, int numStreams) { float[] posteriors = componentScores; int step = componentScores.Length / numStreams; int startIdx = 0; for (int i = 0; i < numStreams; i++) { float max = posteriors[startIdx]; for (int j = startIdx + 1; j < startIdx + step; j++) { if (posteriors[j] > max) { max = posteriors[j]; } } for (int j = startIdx; j < startIdx + step; j++) { posteriors[j] = (float)logMath.logToLinear(posteriors[j] - max); } startIdx += step; } return(posteriors); }
/** * Calculate the distance between two clusters * * @param c1 the first cluster * @param c2 the second cluster * @return the inter cluster similarity, or Double.NEGATIVE_INFINITY if these clusters should never be clustered * together. */ protected double interClusterDistance(Cluster c1, Cluster c2) { if (areClustersInRelation(c1, c2)) { return(Double.NegativeInfinity); } float totalSim = LogMath.LOG_ZERO; float wordPairCount = (float)0.0; HashSet <String> wordsSeen1 = new HashSet <String>(); LogMath logMath = LogMath.getLogMath(); foreach (Node node1 in c1.getElements()) { String word1 = node1.getWord().getSpelling(); if (wordsSeen1.Contains(word1)) { continue; } wordsSeen1.Add(word1); HashSet <String> wordsSeen2 = new HashSet <String>(); foreach (Node node2 in c2.getElements()) { String word2 = node2.getWord().getSpelling(); if (wordsSeen2.Contains(word2)) { continue; } wordsSeen2.Add(word2); float sim = (float)computePhoneticSimilarity(node1, node2); sim = logMath.linearToLog(sim); sim += (float)wordSubClusterProbability(c1, word1); sim += (float)wordSubClusterProbability(c2, word2); totalSim = logMath.addAsLinear(totalSim, sim); wordPairCount++; } } return(totalSim - logMath.logToLinear(wordPairCount)); }