예제 #1
0
        /// <summary>
        /// Calculate topic distinctiveness for each topic and return the average TD
        /// TD for bad topic is treated as 0
        /// </summary>
        /// <returns>average TD</returns>
        public double ComputeAvgTopicDistinctiveness()
        {
            var globalDist = this.wordTF.Select(x => (float)x / (float)this.totalTF).ToArray();

            DenseVector globalDistVector = new DenseVector(globalDist);

            for (int i = 0; i < this.NumTopics; i++)
            {
                if (this.topicsInfo[i].IsBadTopic)
                {
                    continue;
                }

                var topicDist = this.topicWords[i];

                DenseVector topicDistVector = new DenseVector(topicDist);

                var td = 1 - VectorBase.CosineSimilarity(topicDistVector, globalDistVector);

                this.topicMetrics[i, (int)MetricType.Distinctiveness] = td;
                this.avgTopicDistinctiveness += td;
            }

            this.avgGoodTopicDistinctiveness = this.avgTopicDistinctiveness / this.topicsInfo.Count(x => !x.IsBadTopic);
            this.avgTopicDistinctiveness     = this.avgTopicDistinctiveness / this.NumTopics;

            return(this.avgTopicDistinctiveness);
        }
예제 #2
0
        /// <summary>
        /// Given a group of N vectors, compute NxN comparisions and return results in a matrix.
        /// The comparison operator can be Distance or Similarity
        /// </summary>
        /// <param name="func">The comparison function to apply between pairs of vectors: Euclidean Distance or Cosine Similarity</param>
        /// <param name="n">Limit comparisons to the top n vectors in the vector group.  If less than 1, compute comps for all vectors in vectorGroup.</param>
        /// <param name="maxCountItemsToCompare">For each of n vectors in the top, compare to the top maxCountItemsToCompare.
        /// If less than 1, compare top n vs all vectors in vectorGroup.
        /// If both n and maxCountItemsToCompare are less than 1 a full NxN compas are computed where N = |vectorGroup|
        /// </param>
        /// <returns>A diagonal matrix of doubles.  Each cell encodes either the euclidean Distance or Cosine similarity between each vector pair.
        /// The bottom triangule is set to 0.
        /// </returns>
        public double[][] GenerateSimDistMatrix(VectorFunction func, int n, int maxCountItemsToCompare)
        {
            if (this.RowCount <= 0)
            {
                return(null);
            }

            // Are we being asked to compare top n vectors vs. only maxCountItemsToCompare vector?
            int vectorCount = ((maxCountItemsToCompare > 0) && (maxCountItemsToCompare < this.RowCount)) ? maxCountItemsToCompare : this.RowCount;

            // Create a diagonal matrix:  for document i we need to compute and store only (docCount - (i+1)) similarity or distance  scores
            // If asked to compare only the top n vectors againts the rest we only need to allocate a matrix of height n.
            // Otherwise  (n <= 0) we compare all (e.g. vectorCount) vs. all => need to allocate a matrix of height vectorCount

            int matrixHeight = (n > 0) ? n : vectorCount - 1;
            var funcMatrix   = new double[matrixHeight][];

            for (int i = 0; i < matrixHeight; i++)
            {
                funcMatrix[i] = new double[vectorCount - (i + 1)];
            }

            if (this.Verbose)
            {
                StatusMessage.Write(string.Format("Comparing top {0} vs. {1} other items...", matrixHeight, vectorCount - 1));
            }

            var timer = new Stopwatch();

            timer.Start();

            for (int y = 0; y < matrixHeight; y++)
            {
                for (var x = 0; x < funcMatrix[y].Length; x++)
                {
                    if (func == VectorFunction.Distance)
                    {
                        funcMatrix[y][x] = Distance(this[y], this[x + (y + 1)]);
                    }
                    else
                    {
                        funcMatrix[y][x] = VectorBase.CosineSimilarity(this[y], this[x + (y + 1)]);
                    }
                }
            }

            timer.Stop();

            return(funcMatrix);
        }