public void GetTopNLinks()
        {
            TLLinksList list = TLSimilarityMatrixUtil.GetTopNLinks(sims, 4);

            #if Verbose
            Console.WriteLine("TLSimilarityMatrixUtilTest.GetTopNLinks()");
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}",
                                  list[i].SourceArtifactId,
                                  list[i].TargetArtifactId,
                                  list[i].Score
                                  );
            }
            #endif
            Assert.AreEqual(4, list.Count);
            TLLinksList expected = new TLLinksList();
            expected.Add(new TLSingleLink("A", "B*", 10));
            expected.Add(new TLSingleLink("A", "E", 9));
            expected.Add(new TLSingleLink("A", "F", 8));
            expected.Add(new TLSingleLink("A", "C*", 7));
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.AreEqual(expected[i], list[i]);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Computes cosine similarities between two TermDocumentMatrices.
        /// Cosine similarity is defined as (dot product) / (length * length)
        /// </summary>
        /// <param name="m1">Binary document matrix</param>
        /// <param name="m2">tf-idf weighted document matrix</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix m1, TermDocumentMatrix m2)
        {
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(m1, m2);

            for (int i = 0; i < m1.NumDocs; i++)
            {
                TLLinksList links = new TLLinksList();
                for (int j = 0; j < m2.NumDocs; j++)
                {
                    double lengthProduct = ComputeLength(matrices[0].GetDocument(i)) * ComputeLength(matrices[1].GetDocument(j));
                    if (lengthProduct == 0.0)
                    {
                        links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), 0.0));
                    }
                    else
                    {
                        links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), ComputeDotProduct(matrices[0].GetDocument(i), matrices[1].GetDocument(j)) / lengthProduct));
                    }
                }
                links.Sort();
                foreach (TLSingleLink link in links)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Computes cosine similarities between two TermDocumentMatrices.
 /// Cosine similarity is defined as (dot product) / (length * length)
 /// </summary>
 /// <param name="m1">Binary document matrix</param>
 /// <param name="m2">tf-idf weighted document matrix</param>
 /// <returns>Similarity matrix</returns>
 public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix m1, TermDocumentMatrix m2)
 {
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(m1, m2);
     for (int i = 0; i < m1.NumDocs; i++)
     {
         TLLinksList links = new TLLinksList();
         for (int j = 0; j < m2.NumDocs; j++)
         {
             double lengthProduct = ComputeLength(matrices[0].GetDocument(i)) * ComputeLength(matrices[1].GetDocument(j));
             if (lengthProduct == 0.0)
             {
                 links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), 0.0));
             }
             else
             {
                 links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), ComputeDotProduct(matrices[0].GetDocument(i), matrices[1].GetDocument(j)) / lengthProduct));
             }
         }
         links.Sort();
         foreach (TLSingleLink link in links)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
        public void GetLinksAtRecall100()
        {
            TLLinksList list = TLSimilarityMatrixUtil.GetLinksAtRecall(sims, oracle, 1.0);

            #if Verbose
            Console.WriteLine("TLSimilarityMatrixUtilTest.GetLinksAtRecall100()");
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}",
                                  list[i].SourceArtifactId,
                                  list[i].TargetArtifactId,
                                  list[i].Score
                                  );
            }
            #endif
            Assert.AreEqual(9, list.Count);
            TLLinksList expected = new TLLinksList();
            expected.Add(new TLSingleLink("A", "B*", 10));
            expected.Add(new TLSingleLink("A", "E", 9));
            expected.Add(new TLSingleLink("A", "F", 8));
            expected.Add(new TLSingleLink("A", "C*", 7));
            expected.Add(new TLSingleLink("A", "G", 6));
            expected.Add(new TLSingleLink("A", "H", 5));
            expected.Add(new TLSingleLink("A", "I", 4));
            expected.Add(new TLSingleLink("A", "J", 3));
            expected.Add(new TLSingleLink("A", "D*", 2));
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.AreEqual(expected[i], list[i]);
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Returns links for the desired recall level.
        /// </summary>
        /// <param name="matrix">Candidate matrix</param>
        /// <param name="answerMatrix">Answer matrix</param>
        /// <param name="level">Desired recall level</param>
        /// <returns>List of links at desired recall</returns>
        public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix matrix, TLSimilarityMatrix answerMatrix, double level)
        {
            if (level <= 0.0 || level > 1.0)
            {
                throw new DevelopmentKitException("Recall level must be between 0 and 1.");
            }
            double      totalCorrect = answerMatrix.Count * level;
            int         numCorrect   = 0;
            TLLinksList links        = matrix.AllLinks;

            links.Sort();
            TLLinksList newLinks = new TLLinksList();

            while (links.Count > 0 && numCorrect < totalCorrect)
            {
                TLSingleLink link = links[0];
                if (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    numCorrect++;
                }
                newLinks.Add(link);
                links.RemoveAt(0);
            }
            return(newLinks);
        }
        public void GetLinksAboveThresholdDefault()
        {
            sims.Threshold = 4;
            TLLinksList list = TLSimilarityMatrixUtil.GetLinksAboveThreshold(sims);

            list.Sort();
            #if Verbose
            Console.WriteLine("TLSimilarityMatrixUtilTest.GetLinksAboveThresholdDefault()");
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}",
                                  list[i].SourceArtifactId,
                                  list[i].TargetArtifactId,
                                  list[i].Score
                                  );
            }
            #endif
            Assert.AreEqual(6, list.Count);
            TLLinksList expected = new TLLinksList();
            expected.Add(new TLSingleLink("A", "B*", 10));
            expected.Add(new TLSingleLink("A", "E", 9));
            expected.Add(new TLSingleLink("A", "F", 8));
            expected.Add(new TLSingleLink("A", "C*", 7));
            expected.Add(new TLSingleLink("A", "G", 6));
            expected.Add(new TLSingleLink("A", "H", 5));
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.AreEqual(expected[i], list[i]);
            }
        }
 public void GetLinksAboveThresholdDefault()
 {
     sims.Threshold = 4;
     TLLinksList list = TLSimilarityMatrixUtil.GetLinksAboveThreshold(sims);
     list.Sort();
     #if Verbose
     Console.WriteLine("TLSimilarityMatrixUtilTest.GetLinksAboveThresholdDefault()");
     for (int i = 0; i < list.Count; i++)
     {
         Console.WriteLine("{0}\t{1}\t{2}",
             list[i].SourceArtifactId,
             list[i].TargetArtifactId,
             list[i].Score
         );
     }
     #endif
     Assert.AreEqual(6, list.Count);
     TLLinksList expected = new TLLinksList();
     expected.Add(new TLSingleLink("A", "B*", 10));
     expected.Add(new TLSingleLink("A", "E", 9));
     expected.Add(new TLSingleLink("A", "F", 8));
     expected.Add(new TLSingleLink("A", "C*", 7));
     expected.Add(new TLSingleLink("A", "G", 6));
     expected.Add(new TLSingleLink("A", "H", 5));
     for (int i = 0; i < expected.Count; i++)
     {
         Assert.AreEqual(expected[i], list[i]);
     }
 }
Ejemplo n.º 8
0
        /// <summary>
        /// Returns all links above the given threshold.
        /// </summary>
        /// <param name="matrix">Matrix</param>
        /// <param name="threshold">Score threshold</param>
        /// <returns>List of links above threshold</returns>
        public static TLLinksList GetLinksAboveThreshold(TLSimilarityMatrix matrix, double threshold)
        {
            TLLinksList links = new TLLinksList();

            foreach (TLSingleLink link in matrix.AllLinks)
            {
                if (link.Score > threshold)
                {
                    links.Add(link);
                }
            }
            return(links);
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Computes cosine similarities between a set of boolean document vectors and a tfidf weighted corpus
        /// </summary>
        /// <param name="ids">Boolean document vectors</param>
        /// <param name="tfidf">tf-idf weighted document vectors</param>
        /// <returns>Similarity matrix</returns>
        private static TLSimilarityMatrix ComputeSimilarities(TermDocumentMatrix ids, TermDocumentMatrix tfidf)
        {
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(ids, tfidf);

            for (int i = 0; i < ids.NumDocs; i++)
            {
                TLLinksList links = new TLLinksList();
                for (int j = 0; j < tfidf.NumDocs; j++)
                {
                    double product  = 0.0;
                    double asquared = 0.0;
                    double bsquared = 0.0;
                    for (int k = 0; k < matrices[0].NumTerms; k++)
                    {
                        double a = matrices[0][i, k];
                        double b = matrices[1][j, k];
                        product  += (a * b);
                        asquared += Math.Pow(a, 2);
                        bsquared += Math.Pow(b, 2);
                    }
                    double cross = Math.Sqrt(asquared) * Math.Sqrt(bsquared);
                    if (cross == 0.0)
                    {
                        links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), 0.0));
                    }
                    else
                    {
                        links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), product / cross));
                    }
                }
                links.Sort();
                foreach (TLSingleLink link in links)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
Ejemplo n.º 10
0
        public static TLSimilarityMatrix Compute(TLSimilarityMatrix matrix, TLSimilarityMatrix relationships)
        {
            // create pseudo matrix for easy lookup
            // Dictionary<sourceID, Dictionary<targetID, score>>
            Dictionary <string, Dictionary <string, double> > storage = new Dictionary <string, Dictionary <string, double> >();

            foreach (TLSingleLink link in matrix.AllLinks)
            {
                if (!storage.ContainsKey(link.SourceArtifactId))
                {
                    storage.Add(link.SourceArtifactId, new Dictionary <string, double>());
                }
                storage[link.SourceArtifactId].Add(link.TargetArtifactId, link.Score);
            }
#if UseDelta
            // compute delta
            double delta = SharedUtils.ComputeDelta(matrix);
#endif
            // iterate over every (source, target) pair
            TLLinksList links = matrix.AllLinks;
            links.Sort();
            foreach (TLSingleLink link in links)
            {
                // get the set of target artifacts related to link.TargetArtifactId
                // then update the value of (link.SourceArtifactId, relatedArtifact) by delta
                foreach (string relatedArtifact in relationships.GetSetOfTargetArtifactIdsAboveThresholdForSourceArtifact(link.TargetArtifactId))
                {
#if UseDelta
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * delta;
#else
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * 0.1;
#endif
                }
            }
            // build new matrix
            TLLinksList newLinks = new TLLinksList();
            foreach (string source in storage.Keys)
            {
                foreach (string target in storage[source].Keys)
                {
                    newLinks.Add(new TLSingleLink(source, target, storage[source][target]));
                }
            }
            newLinks.Sort();
            TLSimilarityMatrix newMatrix = new TLSimilarityMatrix();
            foreach (TLSingleLink link in newLinks)
            {
                newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
            }
            return(newMatrix);
        }
Ejemplo n.º 11
0
        public static TLSimilarityMatrix Compute(TLSimilarityMatrix matrix, TLSimilarityMatrix relationships)
        {
            // create pseudo matrix for easy lookup
            // Dictionary<sourceID, Dictionary<targetID, score>>
            Dictionary<string, Dictionary<string, double>> storage = new Dictionary<string, Dictionary<string, double>>();
            foreach (TLSingleLink link in matrix.AllLinks)
            {
                if (!storage.ContainsKey(link.SourceArtifactId))
                {
                    storage.Add(link.SourceArtifactId, new Dictionary<string, double>());
                }
                storage[link.SourceArtifactId].Add(link.TargetArtifactId, link.Score);
            }
#if UseDelta
            // compute delta
            double delta = SharedUtils.ComputeDelta(matrix);
#endif
            // iterate over every (source, target) pair
            TLLinksList links = matrix.AllLinks;
            links.Sort();
            foreach (TLSingleLink link in links)
            {
                // get the set of target artifacts related to link.TargetArtifactId
                // then update the value of (link.SourceArtifactId, relatedArtifact) by delta
                foreach (string relatedArtifact in relationships.GetSetOfTargetArtifactIdsAboveThresholdForSourceArtifact(link.TargetArtifactId))
                {
#if UseDelta
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * delta;
#else
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * 0.1;
#endif
                }
            }
            // build new matrix
            TLLinksList newLinks = new TLLinksList();
            foreach (string source in storage.Keys)
            {
                foreach (string target in storage[source].Keys)
                {
                    newLinks.Add(new TLSingleLink(source, target, storage[source][target]));
                }
            }
            newLinks.Sort();
            TLSimilarityMatrix newMatrix = new TLSimilarityMatrix();
            foreach (TLSingleLink link in newLinks)
            {
                newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
            }
            return newMatrix;
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Removes a percentage of links from the bottom of the list.
        /// </summary>
        /// <param name="links">Ranklist</param>
        /// <param name="percent">Percentage to remove</param>
        /// <returns>Trimmed ranklist</returns>
        public static TLLinksList RemoveBottomPercentage(TLLinksList links, double percent)
        {
            if (percent <= 0.0 || percent >= 1.0)
            {
                throw new DevelopmentKitException("Percentage level must be between 0 and 1.");
            }
            TLLinksList remaining = new TLLinksList();

            links.Sort();
            int endIndex = Convert.ToInt32(Math.Floor(links.Count * (1 - percent))) - 1;

            for (int i = 0; i < endIndex; i++)
            {
                TLSingleLink link = links[i];
                remaining.Add(new TLSingleLink(link.SourceArtifactId, link.TargetArtifactId, link.Score));
            }
            return(remaining);
        }
Ejemplo n.º 13
0
 public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel level)
 {
     double totalCorrect = oracle.Count * RecallLevelUtil.RecallValue(level);
     int numCorrect = 0;
     TLLinksList list = new TLLinksList();
     TLLinksList links = sims.AllLinks;
     links.Sort();
     while (links.Count > 0 && numCorrect < totalCorrect)
     {
         TLSingleLink link = links[0];
         if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
         {
             numCorrect++;
         }
         list.Add(link);
         links.RemoveAt(0);
     }
     return list;
 }
Ejemplo n.º 14
0
        public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel level)
        {
            double      totalCorrect = oracle.Count * RecallLevelUtil.RecallValue(level);
            int         numCorrect   = 0;
            TLLinksList list         = new TLLinksList();
            TLLinksList links        = sims.AllLinks;

            links.Sort();
            while (links.Count > 0 && numCorrect < totalCorrect)
            {
                TLSingleLink link = links[0];
                if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    numCorrect++;
                }
                list.Add(link);
                links.RemoveAt(0);
            }
            return(list);
        }
Ejemplo n.º 15
0
 /// <summary>
 /// Computes Jensen-Shannon divergence on two TermDocumentMatrices
 /// </summary>
 /// <param name="source">Source artifacts collection</param>
 /// <param name="target">Target artifacts collection</param>
 /// <returns>Similarity matrix</returns>
 public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target)
 {
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target);
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     for (int i = 0; i < matrices[0].NumDocs; i++)
     {
         TLLinksList list = new TLLinksList();
         for (int j = 0; j < matrices[1].NumDocs; j++)
         {
             list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
                 DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
         }
         list.Sort();
         foreach (TLSingleLink link in list)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
Ejemplo n.º 16
0
        /// <summary>
        /// Computes Jensen-Shannon divergence on two TermDocumentMatrices
        /// </summary>
        /// <param name="source">Source artifacts collection</param>
        /// <param name="target">Target artifacts collection</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target)
        {
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target);
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();

            for (int i = 0; i < matrices[0].NumDocs; i++)
            {
                TLLinksList list = new TLLinksList();
                for (int j = 0; j < matrices[1].NumDocs; j++)
                {
                    list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
                                              DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
                }
                list.Sort();
                foreach (TLSingleLink link in list)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Returns the top N scoring links in a matrix.
        /// </summary>
        /// <param name="matrix">Matrix</param>
        /// <param name="topN">Number of links to return</param>
        /// <returns>List of top N links</returns>
        public static TLLinksList GetTopNLinks(TLSimilarityMatrix matrix, int topN)
        {
            if (matrix.AllLinks.Count < topN)
            {
                throw new DevelopmentKitException("Matrix only has " + matrix.AllLinks.Count + " links (" + topN + " requested).");
            }
            if (topN < 1)
            {
                throw new DevelopmentKitException("topN must be greater than 0.");
            }
            TLLinksList links = matrix.AllLinks;

            links.Sort();
            TLLinksList newLinks = new TLLinksList();

            for (int i = 0; i < topN; i++)
            {
                newLinks.Add(links[i]);
            }
            return(newLinks);
        }
Ejemplo n.º 18
0
 /// <summary>
 /// Computes cosine similarities between a set of boolean document vectors and a tfidf weighted corpus
 /// </summary>
 /// <param name="ids">Boolean document vectors</param>
 /// <param name="tfidf">tf-idf weighted document vectors</param>
 /// <returns>Similarity matrix</returns>
 private static TLSimilarityMatrix ComputeSimilarities(TermDocumentMatrix ids, TermDocumentMatrix tfidf)
 {
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(ids, tfidf);
     for (int i = 0; i < ids.NumDocs; i++)
     {
         TLLinksList links = new TLLinksList();
         for (int j = 0; j < tfidf.NumDocs; j++)
         {
             double product = 0.0;
             double asquared = 0.0;
             double bsquared = 0.0;
             for (int k = 0; k < matrices[0].NumTerms; k++)
             {
                 double a = matrices[0][i, k];
                 double b = matrices[1][j, k];
                 product += (a * b);
                 asquared += Math.Pow(a, 2);
                 bsquared += Math.Pow(b, 2);
             }
             double cross = Math.Sqrt(asquared) * Math.Sqrt(bsquared);
             if (cross == 0.0)
             {
                 links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), 0.0));
             }
             else
             {
                 links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), product / cross));
             }
         }
         links.Sort();
         foreach (TLSingleLink link in links)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
 public void GetLinksAtRecall100()
 {
     TLLinksList list = TLSimilarityMatrixUtil.GetLinksAtRecall(sims, oracle, 1.0);
     #if Verbose
     Console.WriteLine("TLSimilarityMatrixUtilTest.GetLinksAtRecall100()");
     for (int i = 0; i < list.Count; i++)
     {
         Console.WriteLine("{0}\t{1}\t{2}",
             list[i].SourceArtifactId,
             list[i].TargetArtifactId,
             list[i].Score
         );
     }
     #endif
     Assert.AreEqual(9, list.Count);
     TLLinksList expected = new TLLinksList();
     expected.Add(new TLSingleLink("A", "B*", 10));
     expected.Add(new TLSingleLink("A", "E",  9));
     expected.Add(new TLSingleLink("A", "F",  8));
     expected.Add(new TLSingleLink("A", "C*", 7));
     expected.Add(new TLSingleLink("A", "G",  6));
     expected.Add(new TLSingleLink("A", "H",  5));
     expected.Add(new TLSingleLink("A", "I",  4));
     expected.Add(new TLSingleLink("A", "J",  3));
     expected.Add(new TLSingleLink("A", "D*", 2));
     for (int i = 0; i < expected.Count; i++)
     {
         Assert.AreEqual(expected[i], list[i]);
     }
 }
        /// <summary>
        /// Get relevant links for source artifacts with score larger than threshold. 
        /// </summary>
        /// <param name="sourceArtifactId">Id of source artifact for which the set of relevant/retrieved links is requested</param>
        /// <returns>Hashset of target artifacts ids that are retrieved or relevant to the given source artifact (depends on usage).</returns>
        public TLLinksList GetLinksAboveThresholdForSourceArtifact(string sourceArtifactId)
        {
            TLLinksList linksForSourceArtifact;
            if (CacheOfLinksPerSourceArtifacts.TryGetValue(sourceArtifactId, out linksForSourceArtifact) == false)
            {
                linksForSourceArtifact = new TLLinksList();

                Dictionary<string, double> links;
                if (m_matrix.TryGetValue(sourceArtifactId, out links))
                {
                    foreach (string targetArtifactId in links.Keys)
                    {
                        if (links[targetArtifactId] > Threshold)
                        {
                            linksForSourceArtifact.Add(new TLSingleLink(sourceArtifactId, targetArtifactId, links[targetArtifactId]));
                        }
                    }
                }

                CacheOfLinksPerSourceArtifacts.Add(sourceArtifactId, linksForSourceArtifact);
            }
            return linksForSourceArtifact; //return empty set
        }
Ejemplo n.º 21
0
 /// <summary>
 /// Removes a percentage of links from the top of the list.
 /// </summary>
 /// <param name="links">Ranklist</param>
 /// <param name="percent">Percentage to remove</param>
 /// <returns>Trimmed ranklist</returns>
 public static TLLinksList RemoveTopPercentage(TLLinksList links, double percent)
 {
     if (percent <= 0.0 || percent >= 1.0)
     {
         throw new DevelopmentKitException("Percentage level must be between 0 and 1.");
     }
     TLLinksList remaining = new TLLinksList();
     links.Sort();
     int startIndex = Convert.ToInt32(Math.Ceiling(links.Count * percent)) - 1;
     for (int i = startIndex; i < links.Count; i++)
     {
         TLSingleLink link = links[i];
         remaining.Add(new TLSingleLink(link.SourceArtifactId, link.TargetArtifactId, link.Score));
     }
     return remaining;
 }
Ejemplo n.º 22
0
 /// <summary>
 /// Returns the top N scoring links in a matrix.
 /// </summary>
 /// <param name="matrix">Matrix</param>
 /// <param name="topN">Number of links to return</param>
 /// <returns>List of top N links</returns>
 public static TLLinksList GetTopNLinks(TLSimilarityMatrix matrix, int topN)
 {
     if (matrix.AllLinks.Count < topN)
     {
         throw new DevelopmentKitException("Matrix only has " + matrix.AllLinks.Count + " links (" + topN + " requested).");
     }
     if (topN < 1)
     {
         throw new DevelopmentKitException("topN must be greater than 0.");
     }
     TLLinksList links = matrix.AllLinks;
     links.Sort();
     TLLinksList newLinks = new TLLinksList();
     for (int i = 0; i < topN; i++)
     {
         newLinks.Add(links[i]);
     }
     return newLinks;
 }
Ejemplo n.º 23
0
 /// <summary>
 /// Returns links for the desired recall level.
 /// </summary>
 /// <param name="matrix">Candidate matrix</param>
 /// <param name="answerMatrix">Answer matrix</param>
 /// <param name="level">Desired recall level</param>
 /// <returns>List of links at desired recall</returns>
 public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix matrix, TLSimilarityMatrix answerMatrix, double level)
 {
     if (level <= 0.0 || level > 1.0)
     {
         throw new DevelopmentKitException("Recall level must be between 0 and 1.");
     }
     double totalCorrect = answerMatrix.Count * level;
     int numCorrect = 0;
     TLLinksList links = matrix.AllLinks;
     links.Sort();
     TLLinksList newLinks = new TLLinksList();
     while (links.Count > 0 && numCorrect < totalCorrect)
     {
         TLSingleLink link = links[0];
         if (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
         {
             numCorrect++;
         }
         newLinks.Add(link);
         links.RemoveAt(0);
     }
     return newLinks;
 }
Ejemplo n.º 24
0
 /// <summary>
 /// Returns all links above the given threshold.
 /// </summary>
 /// <param name="matrix">Matrix</param>
 /// <param name="threshold">Score threshold</param>
 /// <returns>List of links above threshold</returns>
 public static TLLinksList GetLinksAboveThreshold(TLSimilarityMatrix matrix, double threshold)
 {
     TLLinksList links = new TLLinksList();
     foreach (TLSingleLink link in matrix.AllLinks)
     {
         if (link.Score > threshold)
             links.Add(link);
     }
     return links;
 }
 public void GetTopNLinks()
 {
     TLLinksList list = TLSimilarityMatrixUtil.GetTopNLinks(sims, 4);
     #if Verbose
     Console.WriteLine("TLSimilarityMatrixUtilTest.GetTopNLinks()");
     for (int i = 0; i < list.Count; i++)
     {
         Console.WriteLine("{0}\t{1}\t{2}",
             list[i].SourceArtifactId,
             list[i].TargetArtifactId,
             list[i].Score
         );
     }
     #endif
     Assert.AreEqual(4, list.Count);
     TLLinksList expected = new TLLinksList();
     expected.Add(new TLSingleLink("A", "B*", 10));
     expected.Add(new TLSingleLink("A", "E", 9));
     expected.Add(new TLSingleLink("A", "F", 8));
     expected.Add(new TLSingleLink("A", "C*", 7));
     for (int i = 0; i < expected.Count; i++)
     {
         Assert.AreEqual(expected[i], list[i]);
     }
 }