public void Process(string inpuFilePath, string outputFilePath) { StreamReader reader = new StreamReader(inpuFilePath); string currentLine; while ((currentLine = reader.ReadLine()) != null) { string question = currentLine.Split('\t')[0]; var entities = hashClient.Cover(question); foreach (var item in entities) { if (entityFrequencyDict.ContainsKey(item.Key) == false) entityFrequencyDict.Add(item.Key, 0); entityFrequencyDict[item.Key]++; } } reader.Close(); foreach (var item in entityFrequencyDict) entityFrequencyList.Add(item); DinoComparerStringInt dc = new DinoComparerStringInt(); entityFrequencyList.Sort(dc); StreamWriter writer = new StreamWriter(outputFilePath); foreach (var item in entityFrequencyList) writer.WriteLine(item.Key + "\t" + item.Value); writer.Close(); }
public void ComputeQuestionToken(string inputFilePath, string outputFilePath) { StreamReader reader = new StreamReader(inputFilePath); string currentLine; while ((currentLine = reader.ReadLine()) != null) { currentLine = currentLine.ToLower(); string question = currentLine.Split('\t')[0]; string[] tokens = question.Split(' '); foreach (string token in tokens) { if (tokenToFrequency.ContainsKey(token) == false) tokenToFrequency.Add(token, 0); tokenToFrequency[token]++; } } reader.Close(); List<KeyValuePair<string, int>> tokenFrequencyList = new List<KeyValuePair<string, int>>(); foreach (var item in tokenToFrequency) tokenFrequencyList.Add(item); DinoComparerStringInt dc = new DinoComparerStringInt(); tokenFrequencyList.Sort(dc); StreamWriter writer = new StreamWriter(outputFilePath); foreach (var item in tokenFrequencyList) writer.WriteLine(item.Key + "\t" + item.Value); writer.Close(); }
public void Process(string inputFile, string outputFile) { StreamReader reader = new StreamReader(inputFile); StreamWriter writer = new StreamWriter(outputFile); List<KeyValuePair<string, int>> entityFrequency = new List<KeyValuePair<string, int>>(); string currentLine; while ((currentLine = reader.ReadLine()) != null) { string[] tokens = currentLine.Split('\t'); entityFrequency.Add(new KeyValuePair<string, int>(tokens[0], Convert.ToInt32(tokens[1]))); } DinoComparerStringInt dc = new DinoComparerStringInt(); entityFrequency.Sort(dc); for (int i = 0; i < 50000; i++) writer.WriteLine(entityFrequency[i].Key + "\t" + entityFrequency[i].Value); reader.Close(); writer.Close(); }