Exemple #1
0
        /// <summary>
        /// Computes cosine similarities between documents via the Vector Space Model.
        /// </summary>
        /// <param name="source">Source artifacts</param>
        /// <param name="target">Target artifacts</param>
        /// <param name="weight">Weighting scheme</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Compute(TLArtifactsCollection source, TLArtifactsCollection target, VSMWeightEnum weight)
        {
            switch (weight)
            {
            case VSMWeightEnum.TFIDF:
                return(SimilarityUtil.ComputeCosine(WeightUtil.ComputeTFIDF(new TermDocumentMatrix(source, target)), source.Keys, target.Keys));

            case VSMWeightEnum.BooleanQueriesAndTFIDFCorpus:
                return(SimilarityUtil.ComputeCosine(WeightUtil.ComputeBinaryTF(new TermDocumentMatrix(source)), WeightUtil.ComputeTFIDF(new TermDocumentMatrix(target))));

            case VSMWeightEnum.NoWeight:
                return(SimilarityUtil.ComputeCosine(new TermDocumentMatrix(source, target), source.Keys, target.Keys));

            default:
                throw new NotImplementedException("Unknown weighting scheme \"" + weight + "\"");
            }
        }
        /// <summary>
        /// Smoothing filter from ICPC'11 paper "Improving IR-based Traceability Recovery Using Smoothing Filters"
        /// </summary>
        /// <param name="matrix">Term-by-document matrix</param>
        /// <returns>Smoothed artifacts</returns>
        public static TermDocumentMatrix Compute(TermDocumentMatrix matrix)
        {
            double[] avg = WeightUtil.ComputeAverageVector(matrix);

            if (avg.Length != matrix.NumTerms)
            {
                throw new ArgumentException("Average vector does not have the correct number of terms.");
            }

            for (int i = 0; i < matrix.NumDocs; i++)
            {
                for (int j = 0; j < matrix.NumTerms; j++)
                {
                    matrix[i, j] -= avg[j];
                    if (matrix[i, j] < 0.0)
                    {
                        matrix[i, j] = 0.0;
                    }
                }
            }

            return(matrix);
        }
        /// <summary>
        /// Smoothing filter from ICPC'11 paper "Improving IR-based Traceability Recovery Using Smoothing Filters"
        /// </summary>
        /// <param name="matrix">Term-by-document matrix</param>
        /// <param name="IDs">Collection of document ids to smooth.</param>
        /// <returns>Smoothed artifacts</returns>
        public static TermDocumentMatrix Compute(TermDocumentMatrix matrix, IEnumerable <string> IDs)
        {
            double[] avg = WeightUtil.ComputeAverageVector(matrix, IDs);

            if (avg.Length != matrix.NumTerms)
            {
                throw new ArgumentException("Average vector does not have the correct number of terms.");
            }

            foreach (string docID in IDs)
            {
                int i = matrix.GetDocumentIndex(docID);
                for (int j = 0; j < matrix.NumTerms; j++)
                {
                    matrix[i, j] -= avg[j];
                    if (matrix[i, j] < 0.0)
                    {
                        matrix[i, j] = 0.0;
                    }
                }
            }

            return(matrix);
        }