/// <summary> /// Computes a vector of the average weight for each term in the matrix /// </summary> /// <param name="matrix">Input matrix</param> /// <param name="IDs">Collection of artifacts ids</param> /// <returns>Average vector</returns> public static double[] ComputeAverageVector(TermDocumentMatrix matrix, IEnumerable<string> IDs) { double[] avg = new double[matrix.NumTerms]; for (int j = 0; j < matrix.NumTerms; j++) { foreach (string docID in IDs) { int docIndex = matrix.GetDocumentIndex(docID); avg[j] += matrix[docIndex, j]; } avg[j] = avg[j] / IDs.Count(); } return avg; }
/// <summary> /// Computes a vector of the average weight for each term in the matrix /// </summary> /// <param name="matrix">Input matrix</param> /// <param name="IDs">Collection of artifacts ids</param> /// <returns>Average vector</returns> public static double[] ComputeAverageVector(TermDocumentMatrix matrix, IEnumerable <string> IDs) { double[] avg = new double[matrix.NumTerms]; for (int j = 0; j < matrix.NumTerms; j++) { foreach (string docID in IDs) { int docIndex = matrix.GetDocumentIndex(docID); avg[j] += matrix[docIndex, j]; } avg[j] = avg[j] / IDs.Count(); } return(avg); }
/// <summary> /// Smoothing filter from ICPC'11 paper "Improving IR-based Traceability Recovery Using Smoothing Filters" /// </summary> /// <param name="matrix">Term-by-document matrix</param> /// <param name="IDs">Collection of document ids to smooth.</param> /// <returns>Smoothed artifacts</returns> public static TermDocumentMatrix Compute(TermDocumentMatrix matrix, IEnumerable <string> IDs) { double[] avg = WeightUtil.ComputeAverageVector(matrix, IDs); if (avg.Length != matrix.NumTerms) { throw new ArgumentException("Average vector does not have the correct number of terms."); } foreach (string docID in IDs) { int i = matrix.GetDocumentIndex(docID); for (int j = 0; j < matrix.NumTerms; j++) { matrix[i, j] -= avg[j]; if (matrix[i, j] < 0.0) { matrix[i, j] = 0.0; } } } return(matrix); }
/// <summary> /// Smoothing filter from ICPC'11 paper "Improving IR-based Traceability Recovery Using Smoothing Filters" /// </summary> /// <param name="matrix">Term-by-document matrix</param> /// <param name="IDs">Collection of document ids to smooth.</param> /// <returns>Smoothed artifacts</returns> public static TermDocumentMatrix Compute(TermDocumentMatrix matrix, IEnumerable<string> IDs) { double[] avg = WeightUtil.ComputeAverageVector(matrix, IDs); if (avg.Length != matrix.NumTerms) throw new ArgumentException("Average vector does not have the correct number of terms."); foreach (string docID in IDs) { int i = matrix.GetDocumentIndex(docID); for (int j = 0; j < matrix.NumTerms; j++) { matrix[i, j] -= avg[j]; if (matrix[i, j] < 0.0) { matrix[i, j] = 0.0; } } } return matrix; }