Пример #1
0
        /**
         * Calculate the distance between two clusters, forcing them to have the same words in them, and to not be related
         * to each other.
         *
         * @param cluster1 the first cluster
         * @param cluster2 the second cluster
         * @return The intra-cluster distance, or Double.NEGATIVE_INFINITY if the clusters should never be clustered
         *         together.
         */
        protected double intraClusterDistance(Cluster cluster1, Cluster cluster2)
        {
            LogMath logMath = LogMath.getLogMath();
            double  maxSim  = Double.NegativeInfinity;

            foreach (Node node1 in cluster1.getElements())
            {
                foreach (Node node2 in cluster2.getElements())
                {
                    if (!node1.getWord().getSpelling().Equals(
                            node2.getWord().getSpelling()))
                    {
                        return(Double.NegativeInfinity);
                    }

                    if (node1.hasAncestralRelationship(node2))
                    {
                        return(Double.NegativeInfinity);
                    }

                    double overlap = getOverlap(node1, node2);
                    if (overlap > 0.0)
                    {
                        overlap  = logMath.logToLinear((float)overlap);
                        overlap += node1.getPosterior() + node2.getPosterior();
                        if (overlap > maxSim)
                        {
                            maxSim = overlap;
                        }
                    }
                }
            }
            return(maxSim);
        }
Пример #2
0
        private float[] computePosterios(float[] componentScores, int numStreams)
        {
            float[] posteriors = componentScores;

            int step     = componentScores.Length / numStreams;
            int startIdx = 0;

            for (int i = 0; i < numStreams; i++)
            {
                float max = posteriors[startIdx];
                for (int j = startIdx + 1; j < startIdx + step; j++)
                {
                    if (posteriors[j] > max)
                    {
                        max = posteriors[j];
                    }
                }

                for (int j = startIdx; j < startIdx + step; j++)
                {
                    posteriors[j] = (float)logMath.logToLinear(posteriors[j] - max);
                }
                startIdx += step;
            }

            return(posteriors);
        }
Пример #3
0
        /**
         * Calculate the distance between two clusters
         *
         * @param c1 the first cluster
         * @param c2 the second cluster
         * @return the inter cluster similarity, or Double.NEGATIVE_INFINITY if these clusters should never be clustered
         *         together.
         */
        protected double interClusterDistance(Cluster c1, Cluster c2)
        {
            if (areClustersInRelation(c1, c2))
            {
                return(Double.NegativeInfinity);
            }
            float            totalSim      = LogMath.LOG_ZERO;
            float            wordPairCount = (float)0.0;
            HashSet <String> wordsSeen1    = new HashSet <String>();
            LogMath          logMath       = LogMath.getLogMath();

            foreach (Node node1 in c1.getElements())
            {
                String word1 = node1.getWord().getSpelling();
                if (wordsSeen1.Contains(word1))
                {
                    continue;
                }
                wordsSeen1.Add(word1);
                HashSet <String> wordsSeen2 = new HashSet <String>();
                foreach (Node node2 in c2.getElements())
                {
                    String word2 = node2.getWord().getSpelling();
                    if (wordsSeen2.Contains(word2))
                    {
                        continue;
                    }
                    wordsSeen2.Add(word2);
                    float sim = (float)computePhoneticSimilarity(node1, node2);
                    sim      = logMath.linearToLog(sim);
                    sim     += (float)wordSubClusterProbability(c1, word1);
                    sim     += (float)wordSubClusterProbability(c2, word2);
                    totalSim = logMath.addAsLinear(totalSim, sim);
                    wordPairCount++;
                }
            }
            return(totalSim - logMath.logToLinear(wordPairCount));
        }