private void WriteRankingResult()
        {
            Dictionary <string, Dictionary <string, float> > topicDocDensity = new Dictionary <string, Dictionary <string, float> >();

            foreach (string doc in docTopicDensityMap.Keys)  //transfer the primaryKey from docId to topicId
            {
                Dictionary <string, float> topicDensity = docTopicDensityMap[doc];
                foreach (string topicName in topicDensity.Keys)
                {
                    float density = topicDensity[topicName];
                    if (topicDocDensity.ContainsKey(topicName))
                    {
                        topicDocDensity[topicName].Add(doc, density);
                    }
                    else
                    {
                        Dictionary <string, float> docDensity = new Dictionary <string, float>();
                        docDensity.Add(doc, density);
                        topicDocDensity.Add(topicName, docDensity);
                    }
                }
            }

            foreach (string topicName in topicDocDensity.Keys)
            {
                Dictionary <string, float> relevantDocDensity = topicDocDensity[topicName];
                Dictionary <string, float> sortedDocDensity   = DictionaryDecreasedSort.DecreasedByValue(relevantDocDensity);
                string docAndDensity = "";
                foreach (string key in sortedDocDensity.Keys)
                {
                    string fileName = key.Substring(filePathLength);
                    float  freq     = sortedDocDensity[key];
                    if (freq > 0)
                    {
                        docAndDensity += topicName + "\t" + fileName + "\t" + sortedDocDensity[key] + "\r\n";
                    }
                }
                FileOperators.FileAppend(rankResult, docAndDensity);
            }
        }
Exemple #2
0
        public void executeRank()
        {
            getAllTopicTerms(); //get topic and the related terms, and do the normalization

            int txtDirLength = docsPath.Length;

            foreach (KeyValuePair <string, Dictionary <string, float> > entry in normalizedTopicTerms)
            {
                string topicName = entry.Key; //just the topic ID


                Dictionary <string, float> termAndValues = entry.Value;

                List <string> terms = new List <string>(termAndValues.Keys);

                List <float> queryVector = new List <float>(termAndValues.Values);

                // topicName = topicName.Replace(" ", string.Empty);

                tfidfStore = docsPath + "-ifidf\\" + topicName + ".csv";
                //for each document, generate the ifidf according to the keyterms of topic
                TFIDF tfidf = new TFIDF(terms, this.docsPath, tfidfStore);
                tfidf.calTfidf();

                string[] tfidfLines = FileOperators.ReadFileLines(tfidfStore);

                int lineScale = tfidfLines.Length;

                VSM vsm = new VSM();

                string simContent = "";

                Dictionary <string, double> docAndRelevance = new Dictionary <string, double>();

                for (int i = 1; i < lineScale; i++)
                {
                    string curLine    = tfidfLines[i];
                    int    firstComma = curLine.IndexOf(';');
                    string fileName   = curLine.Substring(0, firstComma); //test if the length is right

                    string       valueStr   = curLine.Substring(firstComma + 1);
                    string[]     valueTerms = valueStr.Split(';');
                    List <float> docVector  = new List <float>();
                    foreach (string valueTerm in valueTerms)
                    {
                        float value = float.Parse(valueTerm);
                        docVector.Add(value);
                    }
                    double sim = vsm.calSimilarity(docVector, queryVector);
                    if (sim > 0)
                    {
                        docAndRelevance.Add(fileName, sim); //get the similarity between doc and topic
                    }
                }

                //execute decrease sorting on the docAndRelevance
                Dictionary <string, double> sortedByRelevance = DictionaryDecreasedSort.DecreasedByValue(docAndRelevance);
                foreach (string key in sortedByRelevance.Keys)
                {
                    double similarity = sortedByRelevance[key];
                    string fileName   = key.Substring(txtDirLength);
                    simContent += topicName + "\t" + fileName + "\t" + similarity + "\r\n";
                }
                FileOperators.FileAppend(simStorePath, simContent); //simStorePath should contain relativePath
            }
            Console.WriteLine("DONE!!");
        }