public static DataSetPairs Compute(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel recall) { TLSimilarityMatrix matrix = Similarities.CreateMatrix(MetricsUtil.GetLinksAtRecall(sims, oracle, recall)); matrix.Threshold = double.MinValue; DataSetPairs pairs = new DataSetPairs(); foreach (string sourceArtifact in oracle.SourceArtifactsIds) { TLLinksList links = matrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact); links.Sort(); int totalCorrect = oracle.GetLinksAboveThresholdForSourceArtifact(sourceArtifact).Count; int numCorrect = 0; int totalRead = 0; double totalAvgPrecision = 0.0; foreach (TLSingleLink link in links) { totalRead++; if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numCorrect++; totalAvgPrecision += numCorrect / (double)totalRead; } } pairs.PrecisionData.Add(new KeyValuePair <string, double>(sourceArtifact, numCorrect / Convert.ToDouble(links.Count))); pairs.RecallData.Add(new KeyValuePair <string, double>(sourceArtifact, Convert.ToDouble(numCorrect) / totalCorrect)); pairs.AveragePrecisionData.Add(new KeyValuePair <string, double>(sourceArtifact, totalAvgPrecision / totalCorrect)); } pairs.MeanAveragePrecisionData.Add(new KeyValuePair <string, double>("#TOTAL", DataSetPairsCollection.CalculateAverage(pairs.AveragePrecisionData))); return(pairs); }
/// <summary> /// Computes the delta value for an individual artifact /// </summary> /// <param name="matrix">Similarities</param> /// <param name="source">Source artifact id</param> /// <returns>delta</returns> public static double ComputeForSourceArtifact(TLSimilarityMatrix matrix, string source) { matrix.Threshold = double.MinValue; double min = Double.MaxValue; double max = Double.MinValue; foreach (TLSingleLink link in matrix.GetLinksAboveThresholdForSourceArtifact(source)) { if (link.Score < min) { min = link.Score; } if (link.Score > max) { max = link.Score; } } double delta = (max - min) / 2.0; // according to R scripts if (delta < 0.05) { delta = Math.Pow(delta, 4) / 4; } return(delta); }
private static void ReadSimilarityMatrixToExcelWorksheet(TLSimilarityMatrix similarityMatrix, TLSimilarityMatrix answerMatrix, Excel.Worksheet xlWorkSheet) { //header int row = 1; xlWorkSheet.Cells[row, 1] = "Source Artifact Id"; xlWorkSheet.Cells[row, 2] = "Target Artifact Id"; xlWorkSheet.Cells[row, 3] = "Probability"; xlWorkSheet.Cells[row, 4] = "Is correct"; row++; foreach (string sourceArtifact in similarityMatrix.SourceArtifactsIds) { var traceLinks = similarityMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact); traceLinks.Sort(); foreach (TLSingleLink link in traceLinks) { xlWorkSheet.Cells[row, 1] = link.SourceArtifactId; xlWorkSheet.Cells[row, 2] = link.TargetArtifactId; xlWorkSheet.Cells[row, 3] = link.Score; xlWorkSheet.Cells[row, 4] = (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) ? "1" : "0"; row++; } } }
private static void ReadSimilarityMatrixToFile(TLSimilarityMatrix similarityMatrix, System.IO.TextWriter writeFile) { //header writeFile.WriteLine("Source Artifact Id,Target Artifact Id,Probability"); foreach (string sourceArtifact in similarityMatrix.SourceArtifactsIds) { var traceLinks = similarityMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact); traceLinks.Sort(); foreach (TLSingleLink link in traceLinks) { writeFile.WriteLine("{0},{1},{2}", link.SourceArtifactId, link.TargetArtifactId, link.Score); } } }
/// <summary> /// Computes the effectiveness all measure of the given similarity matrix using the answer matrix provided. /// </summary> protected override void ComputeImplementation() { _oracle.Threshold = 0; Results = new SerializableDictionary <string, double>(); foreach (string query in _oracle.SourceArtifactsIds) { TLLinksList links = _matrix.GetLinksAboveThresholdForSourceArtifact(query); links.Sort(); for (int i = 0; i < links.Count; i++) { if (_oracle.IsLinkAboveThreshold(query, links[i].TargetArtifactId)) { Results.Add(String.Format("{0}_{1}", query, links[i].TargetArtifactId), i); } } } }
public static double Compute(TLSimilarityMatrix resultSimilarityMatrix, TLSimilarityMatrix answerMatrix) { double tmpAveragePrecision = 0.0; int totalCountOfTrueLinks = answerMatrix.Count; foreach (string sourceArtifact in answerMatrix.SourceArtifactsIds) { var traceLinks = resultSimilarityMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact); tmpAveragePrecision += Calculate(sourceArtifact, traceLinks, answerMatrix); } double finalAverageAveragePrecision = 0.0; if (totalCountOfTrueLinks > 0) { finalAverageAveragePrecision = tmpAveragePrecision / totalCountOfTrueLinks; } return(finalAverageAveragePrecision); }
/// <summary> /// Computes the recall of each source artifact in the similarity matrix using the answer matrix provided. /// </summary> protected override void ComputeImplementation() { SerializableDictionary <string, double> sourceRecall = new SerializableDictionary <string, double>(); _oracle.Threshold = 0; foreach (string sourceArtifact in _oracle.SourceArtifactsIds) { TLLinksList links = _matrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact); int correct = 0; foreach (TLSingleLink link in links) { if (_oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { correct++; } } sourceRecall.Add(sourceArtifact, correct / (double)_oracle.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifact)); } Results = sourceRecall; }
public SortedDictionary <string, double> Calculate(TLSimilarityMatrix resultMatrix, TLDataset dataset) { var answerSet = dataset.AnswerSet; var sourceArtifacts = dataset.SourceArtifacts; SortedDictionary <string, double> metricValues = new SortedDictionary <string, double>(); resultMatrix.Threshold = m_threshold; foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { int numberOfRelevant = answerSet.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); double recall = 0.0; if (numberOfRelevant > 0) { TLLinksList resultsListForArtifact = resultMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); resultsListForArtifact.Sort(); int numberOfCorrectlyRetrieved = 0; foreach (TLSingleLink link in resultsListForArtifact) { //check if this is relevant link if (answerSet.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numberOfCorrectlyRetrieved++; } } recall = (double)numberOfCorrectlyRetrieved / numberOfRelevant; metricValues.Add(sourceArtifact.Id, recall); } } resultMatrix.Threshold = 0.0; return(metricValues); }
/// <summary> /// Called from MetricComputation /// </summary> protected override void ComputeImplementation() { Results = new SerializableDictionary <string, double>(); foreach (string sourceID in _oracle.SourceArtifactsIds) { double sumOfPrecisions = 0.0; int currentLink = 0; int correctSoFar = 0; TLLinksList links = _matrix.GetLinksAboveThresholdForSourceArtifact(sourceID); links.Sort(); foreach (TLSingleLink link in links) { currentLink++; if (_oracle.IsLinkAboveThreshold(sourceID, link.TargetArtifactId)) { correctSoFar++; sumOfPrecisions += correctSoFar / (double)currentLink; } } Results.Add(sourceID, sumOfPrecisions / _oracle.GetCountOfLinksAboveThresholdForSourceArtifact(sourceID)); } }
public static void Export(TLArtifactsCollection queries, TLSimilarityMatrix sims, TLSimilarityMatrix gold, String allPath, String bestPath) { TextWriter all = new StreamWriter(allPath, false); TextWriter best = new StreamWriter(bestPath, false); TextWriter raw = new StreamWriter(allPath + ".csv", false); List <int> rawList = new List <int>(); foreach (String feature in queries.Keys) { TLLinksList simList = sims.GetLinksAboveThresholdForSourceArtifact(feature); TLLinksList goldList = gold.GetLinksAboveThresholdForSourceArtifact(feature); simList.Sort(); all.WriteLine(feature); best.WriteLine(feature); bool first = true; foreach (TLSingleLink link in goldList) { KeyValuePair <int, TLSingleLink> recovered = FindLink(simList, link); if (first) { best.WriteLine(recovered.Value.TargetArtifactId + "\t" + recovered.Key); first = false; } all.WriteLine(recovered.Value.TargetArtifactId + "\t" + recovered.Key); if (recovered.Key != -1) { rawList.Add(recovered.Key); } } } raw.WriteLine(String.Join("\n", rawList)); all.Flush(); all.Close(); best.Flush(); best.Close(); raw.Flush(); raw.Close(); }
private static double ComputeMeanAveragePrecision(TLArtifactsCollection sourceArtifacts, TLSimilarityMatrix resultSimilarityMatrix, TLSimilarityMatrix answerMatrix) { if (sourceArtifacts == null) { throw new ComponentException("Received null sourceArtifacts"); } if (resultSimilarityMatrix == null) { throw new ComponentException("Received null similarityMatrix"); } if (answerMatrix == null) { throw new ComponentException("Received null answerMatrix"); } double tmpAveragePrecision = 0.0; int totalCountOfTrueLinks = answerMatrix.Count; foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { var traceLinks = resultSimilarityMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); double intermediateAvgPrec = 0.0; intermediateAvgPrec = Calculate(sourceArtifact.Id, traceLinks, answerMatrix); tmpAveragePrecision += intermediateAvgPrec; } double finalAverageAveragePrecision = 0.0; if (totalCountOfTrueLinks > 0) { finalAverageAveragePrecision = tmpAveragePrecision / totalCountOfTrueLinks; } return(finalAverageAveragePrecision); }
public SortedDictionary <string, double> Calculate(TLSimilarityMatrix resultMatrix, TLDataset dataset) { var answerSet = dataset.AnswerSet; var sourceArtifacts = dataset.SourceArtifacts; SortedDictionary <string, double> metricValues = new SortedDictionary <string, double>(); foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { int numberOfRelevant = answerSet.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); //?? double averagePrecision = 0.0; //do calculation only if there are relevant links if (numberOfRelevant > 0) { TLLinksList resultsListForArtifact = resultMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); resultsListForArtifact.Sort(); int numRetrieved = 0; int numCorrectlyRetrieved = 0; double sumPrecision = 0; int numSameRankPosition = 1; int sumSameRankPosition = 0; bool hasCorrectlyRetrieved = false; double lastSimilarityScore = -1; foreach (TLSingleLink link in resultsListForArtifact) { numRetrieved++; if (link.Score != lastSimilarityScore) { if (hasCorrectlyRetrieved) { double averageRankPosition = (double)sumSameRankPosition / numSameRankPosition; sumPrecision += (double)numCorrectlyRetrieved / averageRankPosition; } numSameRankPosition = 1; sumSameRankPosition = numRetrieved; hasCorrectlyRetrieved = false; } else { numSameRankPosition++; sumSameRankPosition += numRetrieved; } if (answerSet.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numCorrectlyRetrieved++; hasCorrectlyRetrieved = true; } lastSimilarityScore = link.Score; } if (hasCorrectlyRetrieved) { double averageRankPosition = sumSameRankPosition / numSameRankPosition; sumPrecision += (double)numCorrectlyRetrieved / averageRankPosition; } averagePrecision = (double)sumPrecision / numberOfRelevant; metricValues.Add(sourceArtifact.Id, averagePrecision); } } return(metricValues); }
public static DatasetResults Calculate(ref TLSimilarityMatrix sims, ref TLSimilarityMatrix goldset, Dictionary <int, string> qmap, string ModelName) { TLKeyValuePairsList allall = new TLKeyValuePairsList(); TLKeyValuePairsList allbest = new TLKeyValuePairsList(); TLKeyValuePairsList bugall = new TLKeyValuePairsList(); TLKeyValuePairsList bugbest = new TLKeyValuePairsList(); TLKeyValuePairsList featall = new TLKeyValuePairsList(); TLKeyValuePairsList featbest = new TLKeyValuePairsList(); TLKeyValuePairsList patchall = new TLKeyValuePairsList(); TLKeyValuePairsList patchbest = new TLKeyValuePairsList(); sims.Threshold = Double.MinValue; foreach (KeyValuePair <int, string> qmapKVP in qmap) { TLLinksList simList = sims.GetLinksAboveThresholdForSourceArtifact(qmapKVP.Key.ToString()); simList.Sort(); bool best = false; for (int i = 0; i < simList.Count; i++) { if (goldset.IsLinkAboveThreshold(simList[i].SourceArtifactId, simList[i].TargetArtifactId)) { KeyValuePair <string, double> recovered = new KeyValuePair <string, double>(simList[i].SourceArtifactId + "_" + simList[i].TargetArtifactId, i); allall.Add(recovered); if (!best) { allbest.Add(recovered); best = true; if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Bugs)) { bugbest.Add(recovered); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Features)) { featbest.Add(recovered); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Patch)) { patchbest.Add(recovered); } } if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Bugs)) { bugall.Add(recovered); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Features)) { featall.Add(recovered); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Patch)) { patchall.Add(recovered); } } } } List <SummaryData> alldata = new List <SummaryData>(); alldata.Add(CreateSummaryData(allall, "All (all)")); alldata.Add(CreateSummaryData(bugall, "Bugs (all)")); alldata.Add(CreateSummaryData(featall, "Features (all)")); alldata.Add(CreateSummaryData(patchall, "Patches (all)")); List <SummaryData> bestdata = new List <SummaryData>(); bestdata.Add(CreateSummaryData(allbest, "All (best)")); bestdata.Add(CreateSummaryData(bugbest, "Bugs (best)")); bestdata.Add(CreateSummaryData(featbest, "Features (best)")); bestdata.Add(CreateSummaryData(patchbest, "Patches (best)")); List <Metric> data = new List <Metric>(); data.Add(new EffectivenessMetric(alldata, 0.0, "none", ModelName + " all")); data.Add(new EffectivenessMetric(bestdata, 0.0, "none", ModelName + " best")); return(new DatasetResults("", data)); }
public static void Export(ref TLSimilarityMatrix sims, ref TLSimilarityMatrix goldset, Dictionary <int, string> qmap, string dir, string prefix) { TextWriter allall = new StreamWriter(dir + prefix + ".all.allmeasures", false); TextWriter allbest = new StreamWriter(dir + prefix + ".all.bestmeasures", false); TextWriter bugall = new StreamWriter(dir + prefix + ".bugs.allmeasures", false); TextWriter bugbest = new StreamWriter(dir + prefix + ".bugs.bestmeasures", false); TextWriter featall = new StreamWriter(dir + prefix + ".features.allmeasures", false); TextWriter featbest = new StreamWriter(dir + prefix + ".features.bestmeasures", false); TextWriter patchall = new StreamWriter(dir + prefix + ".patch.allmeasures", false); TextWriter patchbest = new StreamWriter(dir + prefix + ".patch.bestmeasures", false); sims.Threshold = Double.MinValue; foreach (KeyValuePair <int, string> qmapKVP in qmap) { TLLinksList simList = sims.GetLinksAboveThresholdForSourceArtifact(qmapKVP.Key.ToString()); TLLinksList goldList = goldset.GetLinksAboveThresholdForSourceArtifact(qmapKVP.Key.ToString()); simList.Sort(); allall.WriteLine(qmapKVP.Key.ToString()); allbest.WriteLine(qmapKVP.Key.ToString()); if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Bugs)) { bugall.WriteLine(qmapKVP.Key.ToString()); bugbest.WriteLine(qmapKVP.Key.ToString()); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Features)) { featall.WriteLine(qmapKVP.Key.ToString()); featbest.WriteLine(qmapKVP.Key.ToString()); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Patch)) { patchall.WriteLine(qmapKVP.Key.ToString()); patchbest.WriteLine(qmapKVP.Key.ToString()); } KeyValuePair <int, TLSingleLink> best = new KeyValuePair <int, TLSingleLink>(Int32.MaxValue, new TLSingleLink("null", "null", 0)); foreach (TLSingleLink link in goldList) { KeyValuePair <int, TLSingleLink> recovered = FindLink(simList, link); if (recovered.Key != -1 && recovered.Key < best.Key) { best = recovered; } allall.WriteLine(recovered.Value.TargetArtifactId + "\t" + recovered.Key); if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Bugs)) { bugall.WriteLine(recovered.Value.TargetArtifactId + "\t" + recovered.Key); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Features)) { featall.WriteLine(recovered.Value.TargetArtifactId + "\t" + recovered.Key); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Patch)) { patchall.WriteLine(recovered.Value.TargetArtifactId + "\t" + recovered.Key); } } allbest.WriteLine(best.Value.TargetArtifactId + "\t" + best.Key); if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Bugs)) { bugbest.WriteLine(best.Value.TargetArtifactId + "\t" + best.Key); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Features)) { featbest.WriteLine(best.Value.TargetArtifactId + "\t" + best.Key); } else if (qmapKVP.Value == Trace.GetFeatureSetType(FeatureSet.Patch)) { patchbest.WriteLine(best.Value.TargetArtifactId + "\t" + best.Key); } } allall.Flush(); allall.Close(); allbest.Flush(); allbest.Close(); bugall.Flush(); bugall.Close(); bugbest.Flush(); bugbest.Close(); featall.Flush(); featall.Close(); featbest.Flush(); featbest.Close(); patchall.Flush(); patchall.Close(); patchbest.Flush(); patchbest.Close(); }
public SortedDictionary <string, double> Calculate(TLSimilarityMatrix resultMatrix, TLDataset dataset) { var answerSet = dataset.AnswerSet; var sourceArtifacts = dataset.SourceArtifacts; SortedDictionary <string, double> metricValues = new SortedDictionary <string, double>(); foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { int totalNumberOfCorrectLinks = answerSet.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); double precision = 0.0; resultMatrix.Threshold = 0.0; TLLinksList resultsListForArtifact = resultMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); resultsListForArtifact.Sort(); int numberOfCorrectlyRetrieved = 0; int numberOfRetrieved = 0; double scoreOfLastCorrectLink = 0; bool foundLastCorrectLink = false; foreach (TLSingleLink link in resultsListForArtifact) { numberOfRetrieved++; //if all correct links has not been found yet if (foundLastCorrectLink == false) { //check if this is relevant link if (answerSet.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numberOfCorrectlyRetrieved++; if (numberOfCorrectlyRetrieved == totalNumberOfCorrectLinks) { foundLastCorrectLink = true; scoreOfLastCorrectLink = answerSet.GetScoreForLink(link.SourceArtifactId, link.TargetArtifactId); } } } else if (foundLastCorrectLink) { //if all correct link were found // retrieve all the documents that have the same relevance score as the document with the last correct link double score = answerSet.GetScoreForLink(link.SourceArtifactId, link.TargetArtifactId); if (!score.Equals(scoreOfLastCorrectLink)) { break; } } } if (numberOfCorrectlyRetrieved != totalNumberOfCorrectLinks) { //if number of correctly retrieved links is not equal once results list was exhausted, //it means there are some links not retrieved with probability zero. the precision is calculated by taking all target documents count //because then also all documents with probability zero would have to be retrieved precision = (double)totalNumberOfCorrectLinks / dataset.TargetArtifacts.Count; metricValues.Add(sourceArtifact.Id, precision); } else if (numberOfRetrieved > 0) { precision = (double)numberOfCorrectlyRetrieved / numberOfRetrieved; metricValues.Add(sourceArtifact.Id, precision); } } return(metricValues); }