private void ExecuteSub(MyDoubleDictionary queryTfIdfDictionary, string appendTextToFileName = "") { // max frequency double maxFrequency = queryTfIdfDictionary.Max(x => x.Value); // now multiply each by idf to get tfidf for query foreach (var queryWordWithTf in queryTfIdfDictionary.ToList()) { queryTfIdfDictionary[queryWordWithTf.Key] = IdfDictionary.ContainsKey(queryWordWithTf.Key) ? (queryWordWithTf.Value / maxFrequency) * IdfDictionary[queryWordWithTf.Key] : 0; } // Calculate Similarity var similarityDictionary = new MyDoubleDictionary(); // compute similarity of fileText with each _codeFiles foreach (var codeFileWithTfIdfDictionary in TfIdfDictionary) { double cosineSimilarityWithUseCase = Helper.GetSimilarity(queryTfIdfDictionary, codeFileWithTfIdfDictionary.Value); similarityDictionary.Add(codeFileWithTfIdfDictionary.Key, cosineSimilarityWithUseCase); } // WRITE TO FILE WriteDocumentVectorToFileOrderedDescending("Vsm" + appendTextToFileName, similarityDictionary); }
/// <summary> /// 计算 VSM 方法 /// </summary> /// <param name="outputFolderPath"></param> /// <param name="bugName"></param> /// <param name="queryText">查询文本</param> public static void ComputeVsm(string outputFolderPath, string bugName, List <string> queryText) { Utility.Status("Creating VSM: " + bugName); // 创建查询文本的TF-IDF字典 MyDoubleDictionary queryTfIdfDictionary = new MyDoubleDictionary(); queryText.ForEach(queryTfIdfDictionary.Add); // 最大频度 double maxFrequency = queryTfIdfDictionary.Max(x => x.Value); // 计算TF-IDF foreach (var queryWordWithTf in queryTfIdfDictionary.ToList()) { queryTfIdfDictionary[queryWordWithTf.Key] = IdfDictionary.ContainsKey(queryWordWithTf.Key) ? (queryWordWithTf.Value / maxFrequency) * IdfDictionary[queryWordWithTf.Key] : 0; } // 计算相似度字典 MyDoubleDictionary similarityDictionary = new MyDoubleDictionary(); CosineSimilarityCalculator cosineSimilarityCalculator = new CosineSimilarityCalculator(queryTfIdfDictionary); // 计算文本文件相似度 with each _codeFiles foreach (var codeFileWithTfIdfDictionary in TfIdfDictionary) { double cosineSimilarityWithUseCase = cosineSimilarityCalculator.GetSimilarity(codeFileWithTfIdfDictionary.Value); similarityDictionary.Add(codeFileWithTfIdfDictionary.Key, cosineSimilarityWithUseCase); } // 将文档向量降序写入文件Project\001\Results\Vsm.txt WriteDocumentVectorToFileOrderedDescending(outputFolderPath + VsmFileName, similarityDictionary); Utility.Status("Completed VSM: " + bugName); }