Example #1
0
        /// <summary>
        /// Computes Jensen-Shannon divergence on two TermDocumentMatrices
        /// </summary>
        /// <param name="source">Source artifacts collection</param>
        /// <param name="target">Target artifacts collection</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target)
        {
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target);
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();

            for (int i = 0; i < matrices[0].NumDocs; i++)
            {
                TLLinksList list = new TLLinksList();
                for (int j = 0; j < matrices[1].NumDocs; j++)
                {
                    list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
                                              DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
                }
                list.Sort();
                foreach (TLSingleLink link in list)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
Example #2
0
        /// <summary>
        /// Computes cosine similarities between a set of boolean document vectors and a tfidf weighted corpus
        /// </summary>
        /// <param name="ids">Boolean document vectors</param>
        /// <param name="tfidf">tf-idf weighted document vectors</param>
        /// <returns>Similarity matrix</returns>
        private static TLSimilarityMatrix ComputeSimilarities(TermDocumentMatrix ids, TermDocumentMatrix tfidf)
        {
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(ids, tfidf);

            for (int i = 0; i < ids.NumDocs; i++)
            {
                TLLinksList links = new TLLinksList();
                for (int j = 0; j < tfidf.NumDocs; j++)
                {
                    double product  = 0.0;
                    double asquared = 0.0;
                    double bsquared = 0.0;
                    for (int k = 0; k < matrices[0].NumTerms; k++)
                    {
                        double a = matrices[0][i, k];
                        double b = matrices[1][j, k];
                        product  += (a * b);
                        asquared += Math.Pow(a, 2);
                        bsquared += Math.Pow(b, 2);
                    }
                    double cross = Math.Sqrt(asquared) * Math.Sqrt(bsquared);
                    if (cross == 0.0)
                    {
                        links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), 0.0));
                    }
                    else
                    {
                        links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), product / cross));
                    }
                }
                links.Sort();
                foreach (TLSingleLink link in links)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }