public void StartPreprocessing() { Console.WriteLine("Preprocessing of the stop file done!"); //read and populate all stop list ReadStopList rsl = new ReadStopList(); rsl.PopulateStopList(); //initialise the word dictionary before it is populated in the later methods GlobalData.WordDictionary = new Dictionary <string, int>(); ReadEachData rd = new ReadEachData(); Console.WriteLine("Building dictionaries from entire document collection.."); string[] filePaths = Directory.GetFiles("train", "*.*", SearchOption.AllDirectories); foreach (string eachFile in filePaths) { outStringsDict.Add(eachFile, rd.ReadFileNConstructDictionaries(eachFile)); } Console.WriteLine("Preprocessing of the train files done!"); string[] allClassNames = File.ReadAllLines("class_name.txt"); foreach (string eachclass in allClassNames) { string[] ecl = eachclass.Split(' '); classLabelNames.Add(ecl[1], ecl[0]); } Console.WriteLine("Processing of the class label files done!"); List <string> allKeys = classLabelNames.Keys.ToList(); Directory.CreateDirectory("ModelInputs"); //forms modelinput for each Class Vs rest of the classes for (int i = 0; i < allKeys.Count; i++) { Console.WriteLine("Creating model for " + allKeys[i] + " Vs All"); string posDir = @"train\" + allKeys[i]; string negDir = "train"; WriteToModelInput(posDir, negDir, @"ModelInputs\" + allKeys[i] + "_VS_All"); } Console.WriteLine("Preprocessing of the test files started!"); Dictionary <string, string> devlabelDict = new Dictionary <string, string>(); string[] allDevLabels = File.ReadAllLines("dev_label.txt"); foreach (string eachLine in allDevLabels) { string[] parts = eachLine.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); devlabelDict.Add(parts[0], parts[1]); } //store all dev labels string[] testfilePaths = Directory.GetFiles("dev", "*.*", SearchOption.AllDirectories); ReadEachTestInput rtd = new ReadEachTestInput(); foreach (string eachFile in testfilePaths) { rtd.classlabel = devlabelDict[Path.GetFileName(eachFile)]; testStrings.Add(rtd.ReadFileNConstructDictionaries(eachFile)); } Directory.CreateDirectory("TestInput"); WriteToInput(@"TestInput\test.txt", testStrings); Console.WriteLine("Preprocessing of the test files done!"); }
public void StartPreprocessing() { Console.WriteLine("Preprocessing of the stop file done!"); //read and populate all stop list ReadStopList rsl = new ReadStopList(); rsl.PopulateStopList(); //initialise the word dictionary before it is populated in the later methods GlobalData.WordDictionary = new Dictionary<string, int>(); ReadEachData rd = new ReadEachData(); Console.WriteLine("Building dictionaries from entire document collection.."); string[] filePaths = Directory.GetFiles("train", "*.*", SearchOption.AllDirectories); foreach (string eachFile in filePaths) { outStringsDict.Add(eachFile, rd.ReadFileNConstructDictionaries(eachFile)); } Console.WriteLine("Preprocessing of the train files done!"); string[] allClassNames = File.ReadAllLines("class_name.txt"); foreach (string eachclass in allClassNames) { string[] ecl = eachclass.Split(' '); classLabelNames.Add(ecl[1], ecl[0]); } Console.WriteLine("Processing of the class label files done!"); List<string> allKeys = classLabelNames.Keys.ToList(); Directory.CreateDirectory("ModelInputs"); //forms modelinput for each Class Vs rest of the classes for (int i = 0; i < allKeys.Count; i++) { Console.WriteLine("Creating model for " + allKeys[i] + " Vs All"); string posDir = @"train\" + allKeys[i]; string negDir = "train"; WriteToModelInput(posDir, negDir, @"ModelInputs\" + allKeys[i] + "_VS_All"); } Console.WriteLine("Preprocessing of the test files started!"); Dictionary<string, string> devlabelDict = new Dictionary<string, string>(); string[] allDevLabels = File.ReadAllLines("dev_label.txt"); foreach (string eachLine in allDevLabels) { string[] parts = eachLine.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); devlabelDict.Add(parts[0], parts[1]); } //store all dev labels string[] testfilePaths = Directory.GetFiles("dev", "*.*", SearchOption.AllDirectories); ReadEachTestInput rtd = new ReadEachTestInput(); foreach (string eachFile in testfilePaths) { rtd.classlabel = devlabelDict[Path.GetFileName(eachFile)]; testStrings.Add(rtd.ReadFileNConstructDictionaries(eachFile)); } Directory.CreateDirectory("TestInput"); WriteToInput(@"TestInput\test.txt", testStrings); Console.WriteLine("Preprocessing of the test files done!"); }