Пример #1
0
        public void Process(string inpuFilePath, string outputFilePath)
        {
            StreamReader reader = new StreamReader(inpuFilePath);
            string currentLine;

            while ((currentLine = reader.ReadLine()) != null)
            {
                string question = currentLine.Split('\t')[0];
                var entities = hashClient.Cover(question);
                foreach (var item in entities)
                {
                    if (entityFrequencyDict.ContainsKey(item.Key) == false)
                        entityFrequencyDict.Add(item.Key, 0);
                    entityFrequencyDict[item.Key]++;
                }
            }
            reader.Close();

            foreach (var item in entityFrequencyDict)
                entityFrequencyList.Add(item);

            DinoComparerStringInt dc = new DinoComparerStringInt();
            entityFrequencyList.Sort(dc);

            StreamWriter writer = new StreamWriter(outputFilePath);
            foreach (var item in entityFrequencyList)
                writer.WriteLine(item.Key + "\t" + item.Value);
            writer.Close();
        }
Пример #2
0
        public void ComputeQuestionToken(string inputFilePath, string outputFilePath)
        {
            StreamReader reader = new StreamReader(inputFilePath);
            string currentLine;

            while ((currentLine = reader.ReadLine()) != null)
            {
                currentLine = currentLine.ToLower();
                string question = currentLine.Split('\t')[0];
                string[] tokens = question.Split(' ');
                foreach (string token in tokens)
                {
                    if (tokenToFrequency.ContainsKey(token) == false) tokenToFrequency.Add(token, 0);
                    tokenToFrequency[token]++;
                }
            }
            reader.Close();

            List<KeyValuePair<string, int>> tokenFrequencyList = new List<KeyValuePair<string, int>>();
            foreach (var item in tokenToFrequency)
                tokenFrequencyList.Add(item);
            DinoComparerStringInt dc = new DinoComparerStringInt();
            tokenFrequencyList.Sort(dc);

            StreamWriter writer = new StreamWriter(outputFilePath);
            foreach (var item in tokenFrequencyList)
                writer.WriteLine(item.Key + "\t" + item.Value);
            writer.Close();
        }
Пример #3
0
        public void Process(string inputFile, string outputFile)
        {
            StreamReader reader = new StreamReader(inputFile);
            StreamWriter writer = new StreamWriter(outputFile);
            List<KeyValuePair<string, int>> entityFrequency = new List<KeyValuePair<string, int>>();

            string currentLine;

            while ((currentLine = reader.ReadLine()) != null)
            {
                string[] tokens = currentLine.Split('\t');
                entityFrequency.Add(new KeyValuePair<string, int>(tokens[0], Convert.ToInt32(tokens[1])));
            }

            DinoComparerStringInt dc = new DinoComparerStringInt();
            entityFrequency.Sort(dc);

            for (int i = 0; i < 50000; i++)
                writer.WriteLine(entityFrequency[i].Key + "\t" + entityFrequency[i].Value);
            reader.Close();
            writer.Close();
        }