public void GetLinksAtRecall100()
        {
            TLLinksList list = TLSimilarityMatrixUtil.GetLinksAtRecall(sims, oracle, 1.0);

            #if Verbose
            Console.WriteLine("TLSimilarityMatrixUtilTest.GetLinksAtRecall100()");
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}",
                                  list[i].SourceArtifactId,
                                  list[i].TargetArtifactId,
                                  list[i].Score
                                  );
            }
            #endif
            Assert.AreEqual(9, list.Count);
            TLLinksList expected = new TLLinksList();
            expected.Add(new TLSingleLink("A", "B*", 10));
            expected.Add(new TLSingleLink("A", "E", 9));
            expected.Add(new TLSingleLink("A", "F", 8));
            expected.Add(new TLSingleLink("A", "C*", 7));
            expected.Add(new TLSingleLink("A", "G", 6));
            expected.Add(new TLSingleLink("A", "H", 5));
            expected.Add(new TLSingleLink("A", "I", 4));
            expected.Add(new TLSingleLink("A", "J", 3));
            expected.Add(new TLSingleLink("A", "D*", 2));
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.AreEqual(expected[i], list[i]);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Returns links for the desired recall level.
        /// </summary>
        /// <param name="matrix">Candidate matrix</param>
        /// <param name="answerMatrix">Answer matrix</param>
        /// <param name="level">Desired recall level</param>
        /// <returns>List of links at desired recall</returns>
        public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix matrix, TLSimilarityMatrix answerMatrix, double level)
        {
            if (level <= 0.0 || level > 1.0)
            {
                throw new DevelopmentKitException("Recall level must be between 0 and 1.");
            }
            double      totalCorrect = answerMatrix.Count * level;
            int         numCorrect   = 0;
            TLLinksList links        = matrix.AllLinks;

            links.Sort();
            TLLinksList newLinks = new TLLinksList();

            while (links.Count > 0 && numCorrect < totalCorrect)
            {
                TLSingleLink link = links[0];
                if (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    numCorrect++;
                }
                newLinks.Add(link);
                links.RemoveAt(0);
            }
            return(newLinks);
        }
        public void GetLinksAboveThresholdProvided()
        {
            TLLinksList list = TLSimilarityMatrixUtil.GetLinksAboveThreshold(sims, 4);

            list.Sort();
            #if Verbose
            Console.WriteLine("TLSimilarityMatrixUtilTest.GetLinksAboveThresholdProvided()");
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}",
                                  list[i].SourceArtifactId,
                                  list[i].TargetArtifactId,
                                  list[i].Score
                                  );
            }
            #endif
            Assert.AreEqual(6, list.Count);
            TLLinksList expected = new TLLinksList();
            expected.Add(new TLSingleLink("A", "B*", 10));
            expected.Add(new TLSingleLink("A", "E", 9));
            expected.Add(new TLSingleLink("A", "F", 8));
            expected.Add(new TLSingleLink("A", "C*", 7));
            expected.Add(new TLSingleLink("A", "G", 6));
            expected.Add(new TLSingleLink("A", "H", 5));
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.AreEqual(expected[i], list[i]);
            }
        }
Ejemplo n.º 4
0
        public static DataSetPairs Compute(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel recall)
        {
            TLSimilarityMatrix matrix = Similarities.CreateMatrix(MetricsUtil.GetLinksAtRecall(sims, oracle, recall));

            matrix.Threshold = double.MinValue;
            DataSetPairs pairs = new DataSetPairs();

            foreach (string sourceArtifact in oracle.SourceArtifactsIds)
            {
                TLLinksList links = matrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact);
                links.Sort();
                int    totalCorrect      = oracle.GetLinksAboveThresholdForSourceArtifact(sourceArtifact).Count;
                int    numCorrect        = 0;
                int    totalRead         = 0;
                double totalAvgPrecision = 0.0;
                foreach (TLSingleLink link in links)
                {
                    totalRead++;
                    if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                    {
                        numCorrect++;
                        totalAvgPrecision += numCorrect / (double)totalRead;
                    }
                }
                pairs.PrecisionData.Add(new KeyValuePair <string, double>(sourceArtifact, numCorrect / Convert.ToDouble(links.Count)));
                pairs.RecallData.Add(new KeyValuePair <string, double>(sourceArtifact, Convert.ToDouble(numCorrect) / totalCorrect));
                pairs.AveragePrecisionData.Add(new KeyValuePair <string, double>(sourceArtifact, totalAvgPrecision / totalCorrect));
            }

            pairs.MeanAveragePrecisionData.Add(new KeyValuePair <string, double>("#TOTAL", DataSetPairsCollection.CalculateAverage(pairs.AveragePrecisionData)));
            return(pairs);
        }
 public void GetLinksAboveThresholdDefault()
 {
     sims.Threshold = 4;
     TLLinksList list = TLSimilarityMatrixUtil.GetLinksAboveThreshold(sims);
     list.Sort();
     #if Verbose
     Console.WriteLine("TLSimilarityMatrixUtilTest.GetLinksAboveThresholdDefault()");
     for (int i = 0; i < list.Count; i++)
     {
         Console.WriteLine("{0}\t{1}\t{2}",
             list[i].SourceArtifactId,
             list[i].TargetArtifactId,
             list[i].Score
         );
     }
     #endif
     Assert.AreEqual(6, list.Count);
     TLLinksList expected = new TLLinksList();
     expected.Add(new TLSingleLink("A", "B*", 10));
     expected.Add(new TLSingleLink("A", "E", 9));
     expected.Add(new TLSingleLink("A", "F", 8));
     expected.Add(new TLSingleLink("A", "C*", 7));
     expected.Add(new TLSingleLink("A", "G", 6));
     expected.Add(new TLSingleLink("A", "H", 5));
     for (int i = 0; i < expected.Count; i++)
     {
         Assert.AreEqual(expected[i], list[i]);
     }
 }
Ejemplo n.º 6
0
 /// <summary>
 /// Computes cosine similarities between two TermDocumentMatrices.
 /// Cosine similarity is defined as (dot product) / (length * length)
 /// </summary>
 /// <param name="m1">Binary document matrix</param>
 /// <param name="m2">tf-idf weighted document matrix</param>
 /// <returns>Similarity matrix</returns>
 public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix m1, TermDocumentMatrix m2)
 {
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(m1, m2);
     for (int i = 0; i < m1.NumDocs; i++)
     {
         TLLinksList links = new TLLinksList();
         for (int j = 0; j < m2.NumDocs; j++)
         {
             double lengthProduct = ComputeLength(matrices[0].GetDocument(i)) * ComputeLength(matrices[1].GetDocument(j));
             if (lengthProduct == 0.0)
             {
                 links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), 0.0));
             }
             else
             {
                 links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), ComputeDotProduct(matrices[0].GetDocument(i), matrices[1].GetDocument(j)) / lengthProduct));
             }
         }
         links.Sort();
         foreach (TLSingleLink link in links)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
Ejemplo n.º 7
0
 public static TLSimilarityMatrix CreateMatrix(TLLinksList list)
 {
     TLSimilarityMatrix matrix = new TLSimilarityMatrix();
     foreach (TLSingleLink link in list)
     {
         matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
     }
     return matrix;
 }
Ejemplo n.º 8
0
        private void lsv_originalSourceArtifacts_ItemSelectionChanged(object sender, ListViewItemSelectionChangedEventArgs e)
        {
            if (lsv_originalSourceArtifacts.SelectedItems.Count > 0)
            {
                string selectedSourceId = e.Item.Text;

                rtb_sourceArtifactsDescrpition.Text = originalSourceArtifacts[selectedSourceId].Text;

                //load the target artifacts to gui:

                lsv_originalTargetArtifacts.Items.Clear();

                TLLinksList linksList = extendedSimilarityMatrix.GetLinksAboveThresholdForSourceArtifact(selectedSourceId);

                lsv_originalTargetArtifacts.BeginUpdate();

                ListViewItem[] items = new ListViewItem[linksList.Count];
                for (int i = 0; i < linksList.Count; i++)
                {
                    items[i] = new ListViewItem(linksList[i].TargetArtifactId);
                    items[i].SubItems.Add(linksList[i].Score.ToString("F5"));
                }
                lsv_originalTargetArtifacts.Items.AddRange(items);

                lsv_originalTargetArtifacts.EndUpdate();

                if (lsv_originalTargetArtifacts.Items.Count > 0)
                {
                    lsv_originalTargetArtifacts.Items[0].Selected = true;
                }



                // get made decision on satisfaction state for the sourceArtifact and display it on the radios:
                SimilarityMatrixUserFeedback.sourceSatisfactionState setAnswer = extendedSimilarityMatrix.getSourceSatisfactionDecision(selectedSourceId);

                switch (setAnswer)
                {
                case SimilarityMatrixUserFeedback.sourceSatisfactionState.notSatisfied:
                    rdb_satisfactionUnsatisfied.Checked = true;
                    break;

                case SimilarityMatrixUserFeedback.sourceSatisfactionState.satisfied:
                    rdb_satisfactionSatisfied.Checked = true;
                    break;

                case SimilarityMatrixUserFeedback.sourceSatisfactionState.undecided:
                    rdb_satisfactionUndecided.Checked = true;
                    break;

                case SimilarityMatrixUserFeedback.sourceSatisfactionState.notSet:
                    rdb_satisfactionUnsatisfied.Checked = rdb_satisfactionSatisfied.Checked = rdb_satisfactionUndecided.Checked = false;
                    break;
                }
            }
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Gets the target artifact ids present in the ranklist.
        /// </summary>
        /// <param name="links">Input matrix</param>
        /// <returns>Set of target artifacts ids</returns>
        public static ISet <string> GetSetOfTargetArtifacts(TLLinksList links)
        {
            HashSet <string> artifacts = new HashSet <string>();

            foreach (TLSingleLink link in links)
            {
                artifacts.Add(link.TargetArtifactId);
            }
            return(artifacts);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Computes the average similarity score of a links list
        /// </summary>
        /// <param name="list">Links list</param>
        /// <returns>Average similarity score</returns>
        public static double AverageSimilarity(TLLinksList list)
        {
            double sum = 0;

            foreach (TLSingleLink link in list)
            {
                sum += link.Score;
            }

            return sum / list.Count;
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Returns all links above the given threshold.
        /// </summary>
        /// <param name="matrix">Matrix</param>
        /// <param name="threshold">Score threshold</param>
        /// <returns>List of links above threshold</returns>
        public static TLLinksList GetLinksAboveThreshold(TLSimilarityMatrix matrix, double threshold)
        {
            TLLinksList links = new TLLinksList();

            foreach (TLSingleLink link in matrix.AllLinks)
            {
                if (link.Score > threshold)
                {
                    links.Add(link);
                }
            }
            return(links);
        }
Ejemplo n.º 12
0
        public static TLSimilarityMatrix Compute(TLSimilarityMatrix matrix, TLSimilarityMatrix relationships)
        {
            // create pseudo matrix for easy lookup
            // Dictionary<sourceID, Dictionary<targetID, score>>
            Dictionary<string, Dictionary<string, double>> storage = new Dictionary<string, Dictionary<string, double>>();
            foreach (TLSingleLink link in matrix.AllLinks)
            {
                if (!storage.ContainsKey(link.SourceArtifactId))
                {
                    storage.Add(link.SourceArtifactId, new Dictionary<string, double>());
                }
                storage[link.SourceArtifactId].Add(link.TargetArtifactId, link.Score);
            }
#if UseDelta
            // compute delta
            double delta = SharedUtils.ComputeDelta(matrix);
#endif
            // iterate over every (source, target) pair
            TLLinksList links = matrix.AllLinks;
            links.Sort();
            foreach (TLSingleLink link in links)
            {
                // get the set of target artifacts related to link.TargetArtifactId
                // then update the value of (link.SourceArtifactId, relatedArtifact) by delta
                foreach (string relatedArtifact in relationships.GetSetOfTargetArtifactIdsAboveThresholdForSourceArtifact(link.TargetArtifactId))
                {
#if UseDelta
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * delta;
#else
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * 0.1;
#endif
                }
            }
            // build new matrix
            TLLinksList newLinks = new TLLinksList();
            foreach (string source in storage.Keys)
            {
                foreach (string target in storage[source].Keys)
                {
                    newLinks.Add(new TLSingleLink(source, target, storage[source][target]));
                }
            }
            newLinks.Sort();
            TLSimilarityMatrix newMatrix = new TLSimilarityMatrix();
            foreach (TLSingleLink link in newLinks)
            {
                newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
            }
            return newMatrix;
        }
Ejemplo n.º 13
0
        public static TLSimilarityMatrix Compute(TLSimilarityMatrix sims, TLSimilarityMatrix relationships, TLSimilarityMatrix feedback)
        {
            // new matrix
            TLSimilarityMatrix newMatrix = new TLSimilarityMatrix();

#if UseDelta
            // compute delta
            double delta = SharedUtils.ComputeDelta(sims);
#endif
            // make sure the entire list is sorted
            TLLinksList links = sims.AllLinks;
            links.Sort();
            // end condition
            int correct = 0;
            // iterate over each source-target pair
            while (links.Count > 0 && correct < feedback.Count)
            {
                // get link at top of list
                TLSingleLink link = links[0];
                // check feedback
                if (feedback.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    correct++;
                    // update related links
                    for (int i = 1; i < links.Count; i++)
                    {
                        if (link.SourceArtifactId.Equals(links[i].SourceArtifactId) &&
                            relationships.IsLinkAboveThreshold(link.TargetArtifactId, links[i].TargetArtifactId))
                        {
#if UseDelta
                            links[i].Score += links[i].Score * delta;
#else
                            links[i].Score += links[i].Score * 0.1;
#endif
                        }
                    }
                }
                // remove link
                newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                links.RemoveAt(0);
                // reorder links
                links.Sort();
            }
            return(newMatrix);
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Computes the precision-recall curve of the given similarity matrix using the answer matrix provided.
        /// </summary>
        protected override void ComputeImplementation()
        {
            _oracle.Threshold = 0;
            int         correct = 0;
            TLLinksList links   = _matrix.AllLinks;

            links.Sort();
            Results = new SerializableDictionary <string, double>();
            for (int linkNumber = 1; linkNumber <= links.Count; linkNumber++)
            {
                if (_oracle.IsLinkAboveThreshold(links[linkNumber - 1].SourceArtifactId, links[linkNumber - 1].TargetArtifactId))
                {
                    correct++;
                }
                Results.Add(String.Format(_precisionFormat, linkNumber), correct / (double)linkNumber);
                Results.Add(String.Format(_recallFormat, linkNumber), correct / (double)_oracle.Count);
            }
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Removes a percentage of links from the bottom of the list.
        /// </summary>
        /// <param name="links">Ranklist</param>
        /// <param name="percent">Percentage to remove</param>
        /// <returns>Trimmed ranklist</returns>
        public static TLLinksList RemoveBottomPercentage(TLLinksList links, double percent)
        {
            if (percent <= 0.0 || percent >= 1.0)
            {
                throw new DevelopmentKitException("Percentage level must be between 0 and 1.");
            }
            TLLinksList remaining = new TLLinksList();

            links.Sort();
            int endIndex = Convert.ToInt32(Math.Floor(links.Count * (1 - percent))) - 1;

            for (int i = 0; i < endIndex; i++)
            {
                TLSingleLink link = links[i];
                remaining.Add(new TLSingleLink(link.SourceArtifactId, link.TargetArtifactId, link.Score));
            }
            return(remaining);
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Construct from TLSimilarityMatrix object
        /// </summary>
        public SimilarityMatrixUserFeedback(TLSimilarityMatrix matrixIn)
            : base(matrixIn)
        {
            foreach (String sourceArtifactId in this.SourceArtifactsIds)
            {
                sourceAnswers.Add(sourceArtifactId, sourceSatisfactionState.notSet);

                //and create a copy of the list of links:
                TLLinksList listOfLinks = GetLinksAboveThresholdForSourceArtifact(sourceArtifactId);
                Dictionary <string, linkStates> tempTargets = new Dictionary <string, linkStates>();

                foreach (TLSingleLink link in listOfLinks)
                {
                    tempTargets.Add(link.TargetArtifactId, linkStates.notSet);
                }

                linkDecisions.Add(sourceArtifactId, tempTargets);
            }
        }
Ejemplo n.º 17
0
 public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel level)
 {
     double totalCorrect = oracle.Count * RecallLevelUtil.RecallValue(level);
     int numCorrect = 0;
     TLLinksList list = new TLLinksList();
     TLLinksList links = sims.AllLinks;
     links.Sort();
     while (links.Count > 0 && numCorrect < totalCorrect)
     {
         TLSingleLink link = links[0];
         if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
         {
             numCorrect++;
         }
         list.Add(link);
         links.RemoveAt(0);
     }
     return list;
 }
Ejemplo n.º 18
0
        /// <summary>
        /// Extracts links containing the given artifact IDs from a similarity matrix.
        /// </summary>
        /// <param name="original">Original matrix</param>
        /// <param name="artifactIDs">List of artifact IDs</param>
        /// <param name="ignoreParameters">Flag to ignore parameter overloads and compare only method names.</param>
        /// <returns>Extracted links</returns>
        public static TLLinksList ExtractLinks(TLLinksList original, IEnumerable <string> artifactIDs, bool ignoreParameters)
        {
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            foreach (TLSingleLink link in original)
            {
                string sourceID = (ignoreParameters && link.SourceArtifactId.IndexOf('(') > 0)
                    ? link.SourceArtifactId.Substring(0, link.SourceArtifactId.IndexOf('('))
                    : link.SourceArtifactId;
                string targetID = (ignoreParameters && link.TargetArtifactId.IndexOf('(') > 0)
                    ? link.TargetArtifactId.Substring(0, link.TargetArtifactId.IndexOf('('))
                    : link.TargetArtifactId;
                if (artifactIDs.Contains(sourceID) || artifactIDs.Contains(targetID))
                {
                    matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(matrix.AllLinks);
        }
Ejemplo n.º 19
0
        private static void WriteSims(ref Info info, CSMR13DataSet dataset, TLSimilarityMatrix oracle, string model)
        {
            TextWriter         Output  = File.CreateText(info.OutputDirectory + @"\CheckLinkOrder\" + SharedUtils.CleanFileName(dataset.Name) + "." + model + ".txt");
            TLSimilarityMatrix sims    = Similarities.Import(info.ResultsDirectory.FullName + @"\" + SharedUtils.CleanFileName(dataset.Name) + @"\sims\" + model + ".sims");
            TLLinksList        simList = sims.AllLinks;

            simList.Sort();
            int pos = 1;

            foreach (TLSingleLink link in simList)
            {
                if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    Output.WriteLine("[{0}]\t{1}\t{2}\t{3}", pos, link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
                pos++;
            }
            Output.Flush();
            Output.Close();
        }
Ejemplo n.º 20
0
        public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel level)
        {
            double      totalCorrect = oracle.Count * RecallLevelUtil.RecallValue(level);
            int         numCorrect   = 0;
            TLLinksList list         = new TLLinksList();
            TLLinksList links        = sims.AllLinks;

            links.Sort();
            while (links.Count > 0 && numCorrect < totalCorrect)
            {
                TLSingleLink link = links[0];
                if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    numCorrect++;
                }
                list.Add(link);
                links.RemoveAt(0);
            }
            return(list);
        }
Ejemplo n.º 21
0
 /// <summary>
 /// Computes Jensen-Shannon divergence on two TermDocumentMatrices
 /// </summary>
 /// <param name="source">Source artifacts collection</param>
 /// <param name="target">Target artifacts collection</param>
 /// <returns>Similarity matrix</returns>
 public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target)
 {
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target);
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     for (int i = 0; i < matrices[0].NumDocs; i++)
     {
         TLLinksList list = new TLLinksList();
         for (int j = 0; j < matrices[1].NumDocs; j++)
         {
             list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
                 DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
         }
         list.Sort();
         foreach (TLSingleLink link in list)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
Ejemplo n.º 22
0
        /// <summary>
        /// Computes the recall of each source artifact in the similarity matrix using the answer matrix provided.
        /// </summary>
        protected override void ComputeImplementation()
        {
            SerializableDictionary <string, double> sourceRecall = new SerializableDictionary <string, double>();

            _oracle.Threshold = 0;
            foreach (string sourceArtifact in _oracle.SourceArtifactsIds)
            {
                TLLinksList links   = _matrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact);
                int         correct = 0;
                foreach (TLSingleLink link in links)
                {
                    if (_oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                    {
                        correct++;
                    }
                }
                sourceRecall.Add(sourceArtifact, correct / (double)_oracle.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifact));
            }
            Results = sourceRecall;
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Called from MetricComputation
        /// </summary>
        protected override void ComputeImplementation()
        {
            Results = new SerializableDictionary <string, double>();
            double      sumOfPrecisions = 0.0;
            int         currentLink     = 0;
            int         correctSoFar    = 0;
            TLLinksList links           = _matrix.AllLinks;

            links.Sort();
            foreach (TLSingleLink link in links)
            {
                currentLink++;
                if (_oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    correctSoFar++;
                    sumOfPrecisions += correctSoFar / (double)currentLink;
                }
            }
            Results.Add("AveragePrecision", sumOfPrecisions / _oracle.AllLinks.Count);
        }
Ejemplo n.º 24
0
 private static KeyValuePair<int, TLSingleLink> FindLink(TLLinksList list, TLSingleLink source)
 {
     int index;
     for (index = 0; index < list.Count; index++)
     {
         TLSingleLink link = list[index];
         if (link.SourceArtifactId == source.SourceArtifactId
             && link.TargetArtifactId == source.TargetArtifactId)
         {
             break;
         }
     }
     if (index == list.Count)
     {
         return new KeyValuePair<int, TLSingleLink>(-1, new TLSingleLink(source.SourceArtifactId, source.TargetArtifactId, -1));
     }
     else
     {
         return new KeyValuePair<int, TLSingleLink>(index, list[index]);
     }
 }
Ejemplo n.º 25
0
        /// <summary>
        /// Computes Jensen-Shannon divergence on two TermDocumentMatrices
        /// </summary>
        /// <param name="source">Source artifacts collection</param>
        /// <param name="target">Target artifacts collection</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target)
        {
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target);
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();

            for (int i = 0; i < matrices[0].NumDocs; i++)
            {
                TLLinksList list = new TLLinksList();
                for (int j = 0; j < matrices[1].NumDocs; j++)
                {
                    list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
                                              DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
                }
                list.Sort();
                foreach (TLSingleLink link in list)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
Ejemplo n.º 26
0
        /// <summary>
        /// Returns the top N scoring links in a matrix.
        /// </summary>
        /// <param name="matrix">Matrix</param>
        /// <param name="topN">Number of links to return</param>
        /// <returns>List of top N links</returns>
        public static TLLinksList GetTopNLinks(TLSimilarityMatrix matrix, int topN)
        {
            if (matrix.AllLinks.Count < topN)
            {
                throw new DevelopmentKitException("Matrix only has " + matrix.AllLinks.Count + " links (" + topN + " requested).");
            }
            if (topN < 1)
            {
                throw new DevelopmentKitException("topN must be greater than 0.");
            }
            TLLinksList links = matrix.AllLinks;

            links.Sort();
            TLLinksList newLinks = new TLLinksList();

            for (int i = 0; i < topN; i++)
            {
                newLinks.Add(links[i]);
            }
            return(newLinks);
        }
Ejemplo n.º 27
0
        private static double Calculate(string sourceArtifactId, TLLinksList resultList, TLSimilarityMatrix answerMatrix)
        {
            resultList.Sort();

            int correct = 0;
            Double totalAvgPrecision = 0.0;
            int totalDocumentsRead = 0;

            foreach (TLSingleLink link in resultList)
            {
                totalDocumentsRead++;
                //check if this is relevant link
                if (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    correct++;
                    Double precisionAtCurrentIteration = (double)correct / totalDocumentsRead;
                    totalAvgPrecision += precisionAtCurrentIteration;
                }
            }

            //int numberOfRelevant = answerMatrix.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifactId);

            return totalAvgPrecision;
        }
 public void GetLinksAtRecall100()
 {
     TLLinksList list = TLSimilarityMatrixUtil.GetLinksAtRecall(sims, oracle, 1.0);
     #if Verbose
     Console.WriteLine("TLSimilarityMatrixUtilTest.GetLinksAtRecall100()");
     for (int i = 0; i < list.Count; i++)
     {
         Console.WriteLine("{0}\t{1}\t{2}",
             list[i].SourceArtifactId,
             list[i].TargetArtifactId,
             list[i].Score
         );
     }
     #endif
     Assert.AreEqual(9, list.Count);
     TLLinksList expected = new TLLinksList();
     expected.Add(new TLSingleLink("A", "B*", 10));
     expected.Add(new TLSingleLink("A", "E",  9));
     expected.Add(new TLSingleLink("A", "F",  8));
     expected.Add(new TLSingleLink("A", "C*", 7));
     expected.Add(new TLSingleLink("A", "G",  6));
     expected.Add(new TLSingleLink("A", "H",  5));
     expected.Add(new TLSingleLink("A", "I",  4));
     expected.Add(new TLSingleLink("A", "J",  3));
     expected.Add(new TLSingleLink("A", "D*", 2));
     for (int i = 0; i < expected.Count; i++)
     {
         Assert.AreEqual(expected[i], list[i]);
     }
 }
Ejemplo n.º 29
0
        /// <summary>
        /// Computes the standard deviation of similarity scores for a links list
        /// </summary>
        /// <param name="list">Links list</param>
        /// <returns>Standard deviation</returns>
        public static double SimilarityStandardDeviation(TLLinksList list)
        {
            double average = AverageSimilarity(list);
            double sumOfDerivation = 0;

            foreach (TLSingleLink link in list)
            {
                sumOfDerivation += link.Score * link.Score;
            }

            double sumOfDerivationAverage = sumOfDerivation / list.Count;
            return Math.Sqrt(sumOfDerivationAverage - (average * average));
        }
Ejemplo n.º 30
0
 /// <summary>
 /// Removes a percentage of links from the top of the list.
 /// </summary>
 /// <param name="links">Ranklist</param>
 /// <param name="percent">Percentage to remove</param>
 /// <returns>Trimmed ranklist</returns>
 public static TLLinksList RemoveTopPercentage(TLLinksList links, double percent)
 {
     if (percent <= 0.0 || percent >= 1.0)
     {
         throw new DevelopmentKitException("Percentage level must be between 0 and 1.");
     }
     TLLinksList remaining = new TLLinksList();
     links.Sort();
     int startIndex = Convert.ToInt32(Math.Ceiling(links.Count * percent)) - 1;
     for (int i = startIndex; i < links.Count; i++)
     {
         TLSingleLink link = links[i];
         remaining.Add(new TLSingleLink(link.SourceArtifactId, link.TargetArtifactId, link.Score));
     }
     return remaining;
 }
Ejemplo n.º 31
0
 /// <summary>
 /// Returns the top N scoring links in a matrix.
 /// </summary>
 /// <param name="matrix">Matrix</param>
 /// <param name="topN">Number of links to return</param>
 /// <returns>List of top N links</returns>
 public static TLLinksList GetTopNLinks(TLSimilarityMatrix matrix, int topN)
 {
     if (matrix.AllLinks.Count < topN)
     {
         throw new DevelopmentKitException("Matrix only has " + matrix.AllLinks.Count + " links (" + topN + " requested).");
     }
     if (topN < 1)
     {
         throw new DevelopmentKitException("topN must be greater than 0.");
     }
     TLLinksList links = matrix.AllLinks;
     links.Sort();
     TLLinksList newLinks = new TLLinksList();
     for (int i = 0; i < topN; i++)
     {
         newLinks.Add(links[i]);
     }
     return newLinks;
 }
Ejemplo n.º 32
0
 /// <summary>
 /// Gets the target artifact ids present in the ranklist.
 /// </summary>
 /// <param name="links">Input matrix</param>
 /// <returns>Set of target artifacts ids</returns>
 public static ISet<string> GetSetOfTargetArtifacts(TLLinksList links)
 {
     HashSet<string> artifacts = new HashSet<string>();
     foreach (TLSingleLink link in links)
     {
         artifacts.Add(link.TargetArtifactId);
     }
     return artifacts;
 }
Ejemplo n.º 33
0
 /// <summary>
 /// Returns all links above the given threshold.
 /// </summary>
 /// <param name="matrix">Matrix</param>
 /// <param name="threshold">Score threshold</param>
 /// <returns>List of links above threshold</returns>
 public static TLLinksList GetLinksAboveThreshold(TLSimilarityMatrix matrix, double threshold)
 {
     TLLinksList links = new TLLinksList();
     foreach (TLSingleLink link in matrix.AllLinks)
     {
         if (link.Score > threshold)
             links.Add(link);
     }
     return links;
 }
Ejemplo n.º 34
0
 /// <summary>
 /// Extracts links containing the given artifact IDs from a similarity matrix.
 /// </summary>
 /// <param name="original">Original matrix</param>
 /// <param name="artifactIDs">List of artifact IDs</param>
 /// <param name="ignoreParameters">Flag to ignore parameter overloads and compare only method names.</param>
 /// <returns>Extracted links</returns>
 public static TLLinksList ExtractLinks(TLLinksList original, IEnumerable<string> artifactIDs, bool ignoreParameters)
 {
     TLSimilarityMatrix matrix = new TLSimilarityMatrix();
     foreach (TLSingleLink link in original)
     {
         string sourceID = (ignoreParameters && link.SourceArtifactId.IndexOf('(') > 0)
             ? link.SourceArtifactId.Substring(0, link.SourceArtifactId.IndexOf('('))
             : link.SourceArtifactId;
         string targetID = (ignoreParameters && link.TargetArtifactId.IndexOf('(') > 0)
             ? link.TargetArtifactId.Substring(0, link.TargetArtifactId.IndexOf('('))
             : link.TargetArtifactId;
         if (artifactIDs.Contains(sourceID) || artifactIDs.Contains(targetID))
         {
             matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return matrix.AllLinks;
 }
 public void GetTopNLinks()
 {
     TLLinksList list = TLSimilarityMatrixUtil.GetTopNLinks(sims, 4);
     #if Verbose
     Console.WriteLine("TLSimilarityMatrixUtilTest.GetTopNLinks()");
     for (int i = 0; i < list.Count; i++)
     {
         Console.WriteLine("{0}\t{1}\t{2}",
             list[i].SourceArtifactId,
             list[i].TargetArtifactId,
             list[i].Score
         );
     }
     #endif
     Assert.AreEqual(4, list.Count);
     TLLinksList expected = new TLLinksList();
     expected.Add(new TLSingleLink("A", "B*", 10));
     expected.Add(new TLSingleLink("A", "E", 9));
     expected.Add(new TLSingleLink("A", "F", 8));
     expected.Add(new TLSingleLink("A", "C*", 7));
     for (int i = 0; i < expected.Count; i++)
     {
         Assert.AreEqual(expected[i], list[i]);
     }
 }
        /// <summary>
        /// Get relevant links for source artifacts with score larger than threshold. 
        /// </summary>
        /// <param name="sourceArtifactId">Id of source artifact for which the set of relevant/retrieved links is requested</param>
        /// <returns>Hashset of target artifacts ids that are retrieved or relevant to the given source artifact (depends on usage).</returns>
        public TLLinksList GetLinksAboveThresholdForSourceArtifact(string sourceArtifactId)
        {
            TLLinksList linksForSourceArtifact;
            if (CacheOfLinksPerSourceArtifacts.TryGetValue(sourceArtifactId, out linksForSourceArtifact) == false)
            {
                linksForSourceArtifact = new TLLinksList();

                Dictionary<string, double> links;
                if (m_matrix.TryGetValue(sourceArtifactId, out links))
                {
                    foreach (string targetArtifactId in links.Keys)
                    {
                        if (links[targetArtifactId] > Threshold)
                        {
                            linksForSourceArtifact.Add(new TLSingleLink(sourceArtifactId, targetArtifactId, links[targetArtifactId]));
                        }
                    }
                }

                CacheOfLinksPerSourceArtifacts.Add(sourceArtifactId, linksForSourceArtifact);
            }
            return linksForSourceArtifact; //return empty set
        }
Ejemplo n.º 37
0
 /// <summary>
 /// Computes cosine similarities between a set of boolean document vectors and a tfidf weighted corpus
 /// </summary>
 /// <param name="ids">Boolean document vectors</param>
 /// <param name="tfidf">tf-idf weighted document vectors</param>
 /// <returns>Similarity matrix</returns>
 private static TLSimilarityMatrix ComputeSimilarities(TermDocumentMatrix ids, TermDocumentMatrix tfidf)
 {
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(ids, tfidf);
     for (int i = 0; i < ids.NumDocs; i++)
     {
         TLLinksList links = new TLLinksList();
         for (int j = 0; j < tfidf.NumDocs; j++)
         {
             double product = 0.0;
             double asquared = 0.0;
             double bsquared = 0.0;
             for (int k = 0; k < matrices[0].NumTerms; k++)
             {
                 double a = matrices[0][i, k];
                 double b = matrices[1][j, k];
                 product += (a * b);
                 asquared += Math.Pow(a, 2);
                 bsquared += Math.Pow(b, 2);
             }
             double cross = Math.Sqrt(asquared) * Math.Sqrt(bsquared);
             if (cross == 0.0)
             {
                 links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), 0.0));
             }
             else
             {
                 links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), product / cross));
             }
         }
         links.Sort();
         foreach (TLSingleLink link in links)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
Ejemplo n.º 38
0
 /// <summary>
 /// Returns links for the desired recall level.
 /// </summary>
 /// <param name="matrix">Candidate matrix</param>
 /// <param name="answerMatrix">Answer matrix</param>
 /// <param name="level">Desired recall level</param>
 /// <returns>List of links at desired recall</returns>
 public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix matrix, TLSimilarityMatrix answerMatrix, double level)
 {
     if (level <= 0.0 || level > 1.0)
     {
         throw new DevelopmentKitException("Recall level must be between 0 and 1.");
     }
     double totalCorrect = answerMatrix.Count * level;
     int numCorrect = 0;
     TLLinksList links = matrix.AllLinks;
     links.Sort();
     TLLinksList newLinks = new TLLinksList();
     while (links.Count > 0 && numCorrect < totalCorrect)
     {
         TLSingleLink link = links[0];
         if (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
         {
             numCorrect++;
         }
         newLinks.Add(link);
         links.RemoveAt(0);
     }
     return newLinks;
 }