/// <summary> /// Computes Jensen-Shannon divergence on two TermDocumentMatrices /// </summary> /// <param name="source">Source artifacts collection</param> /// <param name="target">Target artifacts collection</param> /// <returns>Similarity matrix</returns> public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target) { List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target); TLSimilarityMatrix sims = new TLSimilarityMatrix(); for (int i = 0; i < matrices[0].NumDocs; i++) { TLLinksList list = new TLLinksList(); for (int j = 0; j < matrices[1].NumDocs; j++) { list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j), DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j)))); } list.Sort(); foreach (TLSingleLink link in list) { sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } return(sims); }
/// <summary> /// Computes cosine similarities between a set of boolean document vectors and a tfidf weighted corpus /// </summary> /// <param name="ids">Boolean document vectors</param> /// <param name="tfidf">tf-idf weighted document vectors</param> /// <returns>Similarity matrix</returns> private static TLSimilarityMatrix ComputeSimilarities(TermDocumentMatrix ids, TermDocumentMatrix tfidf) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(ids, tfidf); for (int i = 0; i < ids.NumDocs; i++) { TLLinksList links = new TLLinksList(); for (int j = 0; j < tfidf.NumDocs; j++) { double product = 0.0; double asquared = 0.0; double bsquared = 0.0; for (int k = 0; k < matrices[0].NumTerms; k++) { double a = matrices[0][i, k]; double b = matrices[1][j, k]; product += (a * b); asquared += Math.Pow(a, 2); bsquared += Math.Pow(b, 2); } double cross = Math.Sqrt(asquared) * Math.Sqrt(bsquared); if (cross == 0.0) { links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), 0.0)); } else { links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), product / cross)); } } links.Sort(); foreach (TLSingleLink link in links) { sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } return(sims); }