private void WriteRankingResult() { Dictionary <string, Dictionary <string, float> > topicDocDensity = new Dictionary <string, Dictionary <string, float> >(); foreach (string doc in docTopicDensityMap.Keys) //transfer the primaryKey from docId to topicId { Dictionary <string, float> topicDensity = docTopicDensityMap[doc]; foreach (string topicName in topicDensity.Keys) { float density = topicDensity[topicName]; if (topicDocDensity.ContainsKey(topicName)) { topicDocDensity[topicName].Add(doc, density); } else { Dictionary <string, float> docDensity = new Dictionary <string, float>(); docDensity.Add(doc, density); topicDocDensity.Add(topicName, docDensity); } } } foreach (string topicName in topicDocDensity.Keys) { Dictionary <string, float> relevantDocDensity = topicDocDensity[topicName]; Dictionary <string, float> sortedDocDensity = DictionaryDecreasedSort.DecreasedByValue(relevantDocDensity); string docAndDensity = ""; foreach (string key in sortedDocDensity.Keys) { string fileName = key.Substring(filePathLength); float freq = sortedDocDensity[key]; if (freq > 0) { docAndDensity += topicName + "\t" + fileName + "\t" + sortedDocDensity[key] + "\r\n"; } } FileOperators.FileAppend(rankResult, docAndDensity); } }
public void executeRank() { getAllTopicTerms(); //get topic and the related terms, and do the normalization int txtDirLength = docsPath.Length; foreach (KeyValuePair <string, Dictionary <string, float> > entry in normalizedTopicTerms) { string topicName = entry.Key; //just the topic ID Dictionary <string, float> termAndValues = entry.Value; List <string> terms = new List <string>(termAndValues.Keys); List <float> queryVector = new List <float>(termAndValues.Values); // topicName = topicName.Replace(" ", string.Empty); tfidfStore = docsPath + "-ifidf\\" + topicName + ".csv"; //for each document, generate the ifidf according to the keyterms of topic TFIDF tfidf = new TFIDF(terms, this.docsPath, tfidfStore); tfidf.calTfidf(); string[] tfidfLines = FileOperators.ReadFileLines(tfidfStore); int lineScale = tfidfLines.Length; VSM vsm = new VSM(); string simContent = ""; Dictionary <string, double> docAndRelevance = new Dictionary <string, double>(); for (int i = 1; i < lineScale; i++) { string curLine = tfidfLines[i]; int firstComma = curLine.IndexOf(';'); string fileName = curLine.Substring(0, firstComma); //test if the length is right string valueStr = curLine.Substring(firstComma + 1); string[] valueTerms = valueStr.Split(';'); List <float> docVector = new List <float>(); foreach (string valueTerm in valueTerms) { float value = float.Parse(valueTerm); docVector.Add(value); } double sim = vsm.calSimilarity(docVector, queryVector); if (sim > 0) { docAndRelevance.Add(fileName, sim); //get the similarity between doc and topic } } //execute decrease sorting on the docAndRelevance Dictionary <string, double> sortedByRelevance = DictionaryDecreasedSort.DecreasedByValue(docAndRelevance); foreach (string key in sortedByRelevance.Keys) { double similarity = sortedByRelevance[key]; string fileName = key.Substring(txtDirLength); simContent += topicName + "\t" + fileName + "\t" + similarity + "\r\n"; } FileOperators.FileAppend(simStorePath, simContent); //simStorePath should contain relativePath } Console.WriteLine("DONE!!"); }