private static TLSimilarityMatrix ConvertRocco(ref Info info, string input, string output) { TLSimilarityMatrix matrix = RanklistImporter.Import(input); Similarities.Export(matrix, output); return(matrix); }
public static DataSetPairs Compute(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel recall) { TLSimilarityMatrix matrix = Similarities.CreateMatrix(MetricsUtil.GetLinksAtRecall(sims, oracle, recall)); matrix.Threshold = double.MinValue; DataSetPairs pairs = new DataSetPairs(); foreach (string sourceArtifact in oracle.SourceArtifactsIds) { TLLinksList links = matrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact); links.Sort(); int totalCorrect = oracle.GetLinksAboveThresholdForSourceArtifact(sourceArtifact).Count; int numCorrect = 0; int totalRead = 0; double totalAvgPrecision = 0.0; foreach (TLSingleLink link in links) { totalRead++; if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numCorrect++; totalAvgPrecision += numCorrect / (double)totalRead; } } pairs.PrecisionData.Add(new KeyValuePair <string, double>(sourceArtifact, numCorrect / Convert.ToDouble(links.Count))); pairs.RecallData.Add(new KeyValuePair <string, double>(sourceArtifact, Convert.ToDouble(numCorrect) / totalCorrect)); pairs.AveragePrecisionData.Add(new KeyValuePair <string, double>(sourceArtifact, totalAvgPrecision / totalCorrect)); } pairs.MeanAveragePrecisionData.Add(new KeyValuePair <string, double>("#TOTAL", DataSetPairsCollection.CalculateAverage(pairs.AveragePrecisionData))); return(pairs); }
public override void Compute() { List <CSMR13DataSet> lds = (List <CSMR13DataSet>)Workspace.Load("ListOfDatasets"); foreach (CSMR13DataSet ds in lds) { DirectoryInfo directory = Directory.CreateDirectory(_config.ResultsDirectory.Absolute + @"\" + CleanFileName(ds.Name)); DirectoryInfo metricsDir = Directory.CreateDirectory(directory.FullName + @"\metrics"); TextWriter infoFile = File.CreateText(directory.FullName + @"\info.txt"); infoFile.Write(ds.ToOutputString()); infoFile.Flush(); infoFile.Close(); TextWriter dataFile = Console.Out; for (int i = 0, j = 10; i < ds.Metrics.Count; i++, j += 10) { if (j == 10) { dataFile = File.CreateText(metricsDir.FullName + @"\" + CleanFileName(ds.Metrics[i].Name.Replace(" @R10", "").Replace(' ', '_'))); } dataFile.WriteLine("{0} {1}", j, ds.Metrics[i].PrecisionData[0].Value); if (j == 100) { dataFile.Flush(); dataFile.Close(); j = 0; } } DirectoryInfo simsDir = Directory.CreateDirectory(directory.FullName + @"\sims"); foreach (TLSimilarityMatrix matrix in ds.Similarities) { Similarities.Export(matrix, simsDir.FullName + @"\" + matrix.Name + ".sims"); } } }
public override void Compute() { List <string> map = (List <string>)Workspace.Load("DocumentMap"); TLSimilarityMatrix sims = Similarities.Import(_config.Directory.Absolute, map); Workspace.Store("Similarities", sims); }
public override void Compute() { int CurrentDataset = (int)Workspace.Load("CurrentDataset") - 1; List <ICSM11DataSet> datasets = (List <ICSM11DataSet>)Workspace.Load("ListOfDatasets"); Workspace.Store("RTM", Similarities.Import(datasets[CurrentDataset].PrecomputedRTMSimilarities)); }
public async Task <IOrderedEnumerable <DumpViewModel> > SearchDuplicates(DumpIdentifier id, bool includeSimilarities = true) { var similarDumps = new Similarities(await similarityService.GetSimilarities(id)).AboveThresholdSimilarities().Select(x => x.Key); var dumpViewModels = await Task.WhenAll(similarDumps.Select(x => ToDumpViewModel(x, dumpRepo, bundleRepo, includeSimilarities ? similarityService : null))); var dumpViewModelsOrdered = dumpViewModels.OrderByDescending(x => x.DumpInfo.Created); return(dumpViewModelsOrdered); }
public static void ExtractFeature(ref TLSimilarityMatrix sims, ref TLSimilarityMatrix newsims, string tracedir) { foreach (String file in Directory.GetFiles(tracedir)) { String feature = Similarities.ExtractFeatureID(file); Dictionary <string, int> trace = Lookup(feature, tracedir); RemoveNonExecutedMethods(ref sims, ref newsims, feature, trace); } }
/// <summary> /// Normalizes a similarity matrix /// </summary> /// <param name="matrix">Similarity matrix</param> /// <returns>Normalized similarity matrix</returns> public static TLSimilarityMatrix Normalize(TLSimilarityMatrix matrix) { TLSimilarityMatrix norm = new TLSimilarityMatrix(); double mean = Similarities.AverageSimilarity(matrix); double stdDev = Similarities.SimilarityStandardDeviation(matrix); foreach (TLSingleLink link in matrix.AllLinks) { norm.AddLink(link.SourceArtifactId, link.TargetArtifactId, (link.Score - mean) / stdDev); } return(norm); }
public void ComputeLSA() { TLArtifactsCollection source = TermDocumentMatrix.Load(@"../../Data/LSA/source.txt").ToTLArtifactsCollection(); TLArtifactsCollection target = TermDocumentMatrix.Load(@"../../Data/LSA/target.txt").ToTLArtifactsCollection(); REngine engine = new REngine(Settings.Default.RScriptEXE); TLSimilarityMatrix matrix = (TLSimilarityMatrix)engine.Execute(new LSAScript(source, target, new LSAConfig { Dimensions = 3 })); TLSimilarityMatrix correct = Similarities.Import(@"../../Data/LSA/correct.txt"); foreach (TLSingleLink link in matrix.AllLinks) { Assert.AreEqual(correct.GetScoreForLink(link.SourceArtifactId, link.TargetArtifactId), link.Score, Settings.Default.DoublePrecision ); } }
public static void Run(ref Info info) { TLSimilarityMatrix vsm = Similarities.Import(info.ResultsDirectory + @"\CSMRTools\RoccoResults\EasyClinic UC-CC.VSM.sims"); TLSimilarityMatrix oracle = Oracle.Import(info.ResultsDirectory + @"\CSMRTools\RoccoResults\EasyClinic UC-CC.oracle"); TLSimilarityMatrix usage = Oracle.Import(info.ResultsDirectory + @"\CSMRTools\RoccoResults\EasyClinic UC-CC.relationships"); TLSimilarityMatrix udcsti = UDCSTI.Compute(vsm, usage, oracle); Similarities.Export(udcsti, info.ResultsDirectory + @"\CSMRTools\RoccoResults\EasyClinic UC-CC.VSM_UDCSTI.MYsims"); DataSetPairsCollection metrics = OverallMetricsComputation.ComputeAll(udcsti, oracle); TextWriter dataFile = File.CreateText(info.ResultsDirectory + @"\CSMRTools\RoccoResults\EasyClinic UC-CC.VSM_UDCSTI.MYsims.metrics"); for (int i = 0, j = 10; i < metrics.Count; i++, j += 10) { dataFile.WriteLine("{0} {1}", j, metrics[i].PrecisionData[0].Value); } dataFile.Flush(); dataFile.Close(); }
private static void WriteSims(ref Info info, CSMR13DataSet dataset, TLSimilarityMatrix oracle, string model) { TextWriter Output = File.CreateText(info.OutputDirectory + @"\CheckLinkOrder\" + SharedUtils.CleanFileName(dataset.Name) + "." + model + ".txt"); TLSimilarityMatrix sims = Similarities.Import(info.ResultsDirectory.FullName + @"\" + SharedUtils.CleanFileName(dataset.Name) + @"\sims\" + model + ".sims"); TLLinksList simList = sims.AllLinks; simList.Sort(); int pos = 1; foreach (TLSingleLink link in simList) { if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { Output.WriteLine("[{0}]\t{1}\t{2}\t{3}", pos, link.SourceArtifactId, link.TargetArtifactId, link.Score); } pos++; } Output.Flush(); Output.Close(); }
public void ComputeTest() { string data = @"../../Data/SimpleCorpus."; TLArtifactsCollection source = Artifacts.Import(data + "input.source.txt"); TLArtifactsCollection target = Artifacts.Import(data + "input.target.txt"); TLSimilarityMatrix testsims = VSM.Compute(source, target); TLSimilarityMatrix realsims = Similarities.Import(data + "output.VSM.txt"); Assert.AreEqual(testsims.Count, realsims.Count); TLLinksList testlinks = testsims.AllLinks; TLLinksList reallinks = realsims.AllLinks; testlinks.Sort(); reallinks.Sort(); for (int i = 0; i < reallinks.Count; i++) { Assert.AreEqual(testlinks[i].SourceArtifactId, reallinks[i].SourceArtifactId); Assert.AreEqual(testlinks[i].TargetArtifactId, reallinks[i].TargetArtifactId); Assert.AreEqual(testlinks[i].Score, reallinks[i].Score, 0.000000001); } }
public void BooleanQueriesAndTFIDFCorpusTest() { string inputData = Settings.Default.SimpleCorpusDir; string outputData = Path.Combine(inputData, "VSM"); TLArtifactsCollection source = Artifacts.ImportFile(Path.Combine(inputData, "source.txt")); TLArtifactsCollection target = Artifacts.ImportFile(Path.Combine(inputData, "target.txt")); TLSimilarityMatrix testsims = VSM.Compute(source, target, VSMWeightEnum.BooleanQueriesAndTFIDFCorpus); TLSimilarityMatrix realsims = Similarities.Import(Path.Combine(outputData, "output.txt")); Assert.AreEqual(testsims.Count, realsims.Count); TLLinksList testlinks = testsims.AllLinks; TLLinksList reallinks = realsims.AllLinks; testlinks.Sort(); reallinks.Sort(); for (int i = 0; i < reallinks.Count; i++) { Assert.AreEqual(testlinks[i].SourceArtifactId, reallinks[i].SourceArtifactId); Assert.AreEqual(testlinks[i].TargetArtifactId, reallinks[i].TargetArtifactId); Assert.AreEqual(testlinks[i].Score, reallinks[i].Score, Settings.Default.DoublePrecision); } }
public static void ComputeMetrics(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel level, out TLKeyValuePairsList precision, out TLKeyValuePairsList recall, out TLKeyValuePairsList avgPrecision, out TLKeyValuePairsList meanAvgPrecision) { TLLinksList links = MetricsUtil.GetLinksAtRecall(sims, oracle, level); int numCorrect = 0; int totalRead = 0; double totalAvgPrecision = 0.0; foreach (TLSingleLink link in links) { totalRead++; if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numCorrect++; totalAvgPrecision += numCorrect / (double)totalRead; } } // temporary precision = new TLKeyValuePairsList(); precision.Add(new KeyValuePair <string, double>("#TOTAL", numCorrect / Convert.ToDouble(links.Count))); recall = new TLKeyValuePairsList(); recall.Add(new KeyValuePair <string, double>("#TOTAL", Math.Ceiling(oracle.Count * RecallLevelUtil.RecallValue(level)) / oracle.Count)); avgPrecision = new TLKeyValuePairsList(); avgPrecision.Add(new KeyValuePair <string, double>("#TOTAL", totalAvgPrecision / oracle.Count)); meanAvgPrecision = new TLKeyValuePairsList(); meanAvgPrecision.Add(new KeyValuePair <string, double>("#TOTAL", MeanAveragePrecision.Compute(Similarities.CreateMatrix(links), oracle))); }
public override void Compute() { Workspace.Store("Similarities", (TLSimilarityMatrix)Similarities.Import(_config.File.Absolute)); }
/// <summary> /// Import script results /// </summary> /// <param name="result">RScriptResults object</param> /// <returns>Script results</returns> public override object ImportResults(RScriptResult result) { return(Similarities.Import(_outputFile)); }
public int compareTo(Similarities o){ if((this.similarityValue - o.similarityValue)>0) return -1; else if ((this.similarityValue - o.similarityValue) == 0) return 0; else return 1; }
public override void Compute() { Logger.Trace("Writing similarities to " + _config.File.Absolute); Similarities.Export((TLSimilarityMatrix)Workspace.Load("Similarities"), _config.File.Absolute); Logger.Trace("Write complete."); }