public override void Compute() { var datasetsLocations = ReadDatasetsLocations(); TLDatasetsList listOfDatasets = new TLDatasetsList(); foreach (DatasetLocation locations in datasetsLocations) { TLDataset dataset = new TLDataset(locations.DatasetName); string error; //do validation if (CoestDatasetImporterHelper.ValidatePath(locations.SourceArtifactsLocation, "Source Artifacts File", out error)) { dataset.SourceArtifacts = CoestDatasetImporterHelper.ImportArtifacts(locations.SourceArtifactsLocation, m_config.TrimElementValues); Logger.Info(String.Format("Source artifacts imported from {0}.", locations.SourceArtifactsLocation)); } else { throw new ComponentException(error); } //do validation if (CoestDatasetImporterHelper.ValidatePath(locations.SourceArtifactsLocation, "Target Artifacts File", out error)) { dataset.TargetArtifacts = CoestDatasetImporterHelper.ImportArtifacts(locations.TargetArtifactsLocation, m_config.TrimElementValues); Logger.Info(String.Format("Target artifacts imported from {0}.", locations.TargetArtifactsLocation)); } else { throw new ComponentException(error); } //do validation if (CoestDatasetImporterHelper.ValidatePath(locations.SourceArtifactsLocation, "Target Artifacts File", out error)) { dataset.AnswerSet = CoestDatasetImporterHelper.ImportAnswerSet(locations.AnswerSetLocation, dataset.SourceArtifacts, locations.SourceArtifactsLocation, dataset.TargetArtifacts, locations.TargetArtifactsLocation, Logger, m_config.TrimElementValues); Logger.Info(String.Format("Answer set imported from {0}.", locations.AnswerSetLocation)); } else { throw new ComponentException(error); } listOfDatasets.Add(dataset); } Workspace.Store("listOfDatasets", listOfDatasets); Workspace.Store("numberOfDatasets", listOfDatasets.Count); }
public BoxSummaryData Calculate(SingleTracingResults singleTechniqueResults, TLDataset dataset) { SortedDictionary<string, double> intermediateResults = m_metricComputation.Calculate(singleTechniqueResults.ResultMatrix, dataset); double[] dataPoints = intermediateResults.Values.ToArray(); if (dataPoints.Length == 0 && m_logger != null) { m_logger.Warn("Metric computation of '" + m_metricName + "' returned zero matching results for " + dataset.Name + " for one of the techniques. It may be valid results, but it may also mean that there is mismatch of ids in the answer matrix and corresponding artifacts." ); } var summaryData = new BoxSummaryData(m_metricName, m_metricDescription); summaryData.AddPoint(new BoxPlotPoint(dataPoints)); return summaryData; }
public override void Compute() { // Loading artifacts & datasets from workspace TLArtifactsCollection sourceArtifacts = (TLArtifactsCollection)Workspace.Load("sourceArtifacts"); TLArtifactsCollection targetArtifacts = (TLArtifactsCollection)Workspace.Load("targetArtifacts"); TLSimilarityMatrix answerMatrix = (TLSimilarityMatrix)Workspace.Load("answerMatrix"); TLSimilarityMatrix similarityMatrix = (TLSimilarityMatrix)Workspace.Load("similarityMatrix"); // Checking for null arguments if (sourceArtifacts == null) { throw new ComponentException("The loaded source artifacts cannot be null!"); } if (targetArtifacts == null) { throw new ComponentException("The loaded target artifacts cannot be null!"); } if (answerMatrix == null) { throw new ComponentException("The loaded answer matrix cannot be null!"); } if (similarityMatrix == null) { throw new ComponentException("The loaded similarity matrix cannot be null!"); } // Results calculation TLDatasetsList datasets = new TLDatasetsList(); var dataset = new TLDataset("Experiment results"); dataset.AnswerSet = answerMatrix; dataset.SourceArtifacts = sourceArtifacts; dataset.TargetArtifacts = targetArtifacts; datasets.Add(dataset); TLSimilarityMatricesCollection similarityMatrices = new TLSimilarityMatricesCollection(); similarityMatrix.Name = "Experiment results"; similarityMatrices.Add(similarityMatrix); MetricComputationEngine engine = new MetricComputationEngine(datasets, Logger, m_config); //wrap result similarity matrix into TracingResult var tracingResults = GroupOfTracingResults<SingleTracingResults>.Adapt(similarityMatrices, "Experiment results"); engine.AddTracingResults(tracingResults); var results = engine.ComputeResults(); // Store the results in the workspace Workspace.Store("results", results); }
public SortedDictionary<string, double> Calculate(TLSimilarityMatrix resultMatrix, TLDataset dataset) { var answerSet = dataset.AnswerSet; var sourceArtifacts = dataset.SourceArtifacts; SortedDictionary<string, double> metricValues = new SortedDictionary<string, double>(); resultMatrix.Threshold = m_threshold; foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { int numberOfRelevant = answerSet.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); double recall = 0.0; if (numberOfRelevant > 0) { TLLinksList resultsListForArtifact = resultMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); resultsListForArtifact.Sort(); int numberOfCorrectlyRetrieved = 0; foreach (TLSingleLink link in resultsListForArtifact) { //check if this is relevant link if (answerSet.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numberOfCorrectlyRetrieved++; } } recall = (double)numberOfCorrectlyRetrieved / numberOfRelevant; metricValues.Add(sourceArtifact.Id, recall); } } resultMatrix.Threshold = 0.0; return metricValues; }
public SortedDictionary<string, double> Calculate(TLSimilarityMatrix resultMatrix, TLDataset dataset) { var answerSet = dataset.AnswerSet; var sourceArtifacts = dataset.SourceArtifacts; SortedDictionary<string, double> metricValues = new SortedDictionary<string, double>(); foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { int totalNumberOfCorrectLinks = answerSet.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); double precision = 0.0; resultMatrix.Threshold = 0.0; TLLinksList resultsListForArtifact = resultMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); resultsListForArtifact.Sort(); int numberOfCorrectlyRetrieved = 0; int numberOfRetrieved = 0; double scoreOfLastCorrectLink = 0; bool foundLastCorrectLink = false; foreach (TLSingleLink link in resultsListForArtifact) { numberOfRetrieved++; //if all correct links has not been found yet if (foundLastCorrectLink == false) { //check if this is relevant link if (answerSet.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numberOfCorrectlyRetrieved++; if (numberOfCorrectlyRetrieved == totalNumberOfCorrectLinks) { foundLastCorrectLink = true; scoreOfLastCorrectLink = answerSet.GetScoreForLink(link.SourceArtifactId, link.TargetArtifactId); } } } else if (foundLastCorrectLink) { //if all correct link were found // retrieve all the documents that have the same relevance score as the document with the last correct link double score = answerSet.GetScoreForLink(link.SourceArtifactId, link.TargetArtifactId); if (!score.Equals(scoreOfLastCorrectLink)) break; } } if (numberOfCorrectlyRetrieved != totalNumberOfCorrectLinks) { //if number of correctly retrieved links is not equal once results list was exhausted, //it means there are some links not retrieved with probability zero. the precision is calculated by taking all target documents count //because then also all documents with probability zero would have to be retrieved precision = (double)totalNumberOfCorrectLinks / dataset.TargetArtifacts.Count; metricValues.Add(sourceArtifact.Id, precision); } else if (numberOfRetrieved > 0) { precision = (double)numberOfCorrectlyRetrieved / numberOfRetrieved; metricValues.Add(sourceArtifact.Id, precision); } } return metricValues; }
public SortedDictionary<string, double> Calculate(TLSimilarityMatrix resultMatrix, TLDataset dataset) { var answerSet = dataset.AnswerSet; var sourceArtifacts = dataset.SourceArtifacts; SortedDictionary<string, double> metricValues = new SortedDictionary<string, double>(); foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { int numberOfRelevant = answerSet.GetCountOfLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); //?? double averagePrecision = 0.0; //do calculation only if there are relevant links if (numberOfRelevant > 0) { TLLinksList resultsListForArtifact = resultMatrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact.Id); resultsListForArtifact.Sort(); int numRetrieved = 0; int numCorrectlyRetrieved = 0; double sumPrecision = 0; int numSameRankPosition = 1; int sumSameRankPosition = 0; bool hasCorrectlyRetrieved = false; double lastSimilarityScore = -1; foreach (TLSingleLink link in resultsListForArtifact) { numRetrieved++; if (link.Score != lastSimilarityScore) { if (hasCorrectlyRetrieved) { double averageRankPosition = (double)sumSameRankPosition / numSameRankPosition; sumPrecision += (double)numCorrectlyRetrieved / averageRankPosition; } numSameRankPosition = 1; sumSameRankPosition = numRetrieved; hasCorrectlyRetrieved = false; } else { numSameRankPosition++; sumSameRankPosition += numRetrieved; } if (answerSet.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { numCorrectlyRetrieved++; hasCorrectlyRetrieved = true; } lastSimilarityScore = link.Score; } if (hasCorrectlyRetrieved) { double averageRankPosition = sumSameRankPosition / numSameRankPosition; sumPrecision += (double)numCorrectlyRetrieved / averageRankPosition; } averagePrecision = (double)sumPrecision / numberOfRelevant; metricValues.Add(sourceArtifact.Id, averagePrecision); } } return metricValues; }