/// <summary>
        /// Computes cosine similarities between two TermDocumentMatrices.
        /// Cosine similarity is defined as (dot product) / (length * length)
        /// </summary>
        /// <param name="m1">Binary document matrix</param>
        /// <param name="m2">tf-idf weighted document matrix</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix m1, TermDocumentMatrix m2)
        {
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(m1, m2);

            for (int i = 0; i < m1.NumDocs; i++)
            {
                TLLinksList links = new TLLinksList();
                for (int j = 0; j < m2.NumDocs; j++)
                {
                    double lengthProduct = ComputeLength(matrices[0].GetDocument(i)) * ComputeLength(matrices[1].GetDocument(j));
                    if (lengthProduct == 0.0)
                    {
                        links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), 0.0));
                    }
                    else
                    {
                        links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), ComputeDotProduct(matrices[0].GetDocument(i), matrices[1].GetDocument(j)) / lengthProduct));
                    }
                }
                links.Sort();
                foreach (TLSingleLink link in links)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
Beispiel #2
0
        /// <summary>
        /// Computes Jensen-Shannon divergence on two TermDocumentMatrices
        /// </summary>
        /// <param name="source">Source artifacts collection</param>
        /// <param name="target">Target artifacts collection</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target)
        {
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target);
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();

            for (int i = 0; i < matrices[0].NumDocs; i++)
            {
                TLLinksList list = new TLLinksList();
                for (int j = 0; j < matrices[1].NumDocs; j++)
                {
                    list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
                                              DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
                }
                list.Sort();
                foreach (TLSingleLink link in list)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }