private static bool ValidateColumns(string[] splitLine, string line, SharedStateObject sharedState, StreamWriter errorFile) { if (!splitLine.Count().Equals(sharedState.Arguments.Columns)) { lock (errorFile) { errorFile.WriteLine("Invalid number of columns expecting '" + sharedState.Arguments.Columns.ToString() + "' found '" + splitLine.Count().ToString() + "' in data \n\t" + line); } return(true); } return(false); }
internal static void ProcessFiles(SharedStateObject sharedState, List <string> fileList, StreamWriter outputFile, StreamWriter errorFile) { foreach (var file in fileList) { Console.WriteLine("Processing File: " + file); var stopwatch = new Stopwatch(); stopwatch.Start(); using (FileStream fs = File.Open(file, FileMode.Open, FileAccess.Read)) using (BufferedStream bs = new BufferedStream(fs)) using (StreamReader sr = new StreamReader(bs)) { var errorOnLine = false; string line; while ((line = sr.ReadLine()) != null) { errorOnLine = false; //split row var splitLine = line.Split('|'); //validate the number of columns errorOnLine = ValidateColumns(splitLine, line, sharedState, errorFile); //grab the id assume it's first var ID = splitLine[0]; //concatenate and hash the row var hash = HashLine(string.Join("", splitLine)); if (sharedState.Arguments.HashIncluded) { //compare the hash generated to the one included assume it's last errorOnLine = ValidateHash(splitLine, line, hash, errorFile); } //output the id and hash if there are no errors if (!errorOnLine) { lock (outputFile) { outputFile.WriteLine(ID + "\t" + hash); } } } } stopwatch.Stop(); Console.WriteLine("Done Processing File: " + file + " in {0} seconds.", ((TimeSpan)stopwatch.Elapsed).Seconds); stopwatch.Reset(); } }
static void Main(string[] args) { var stopwatch = new Stopwatch(); var parameters = ArgumentParser.ParseArguments(args); const int numOfThreads = 4; stopwatch.Start(); try { var fileList = Directory.GetFiles(parameters.DirectoryPath).ToList(); /*split the file list into four pieces * on each small list start a thread * thread runs the hashing algorithm */ var listToProcess = fileList.SplitList(numOfThreads); var sharedStateObj = new SharedStateObject() { Arguments = parameters }; var threadList = new List <Thread>(); var errorFile = new StreamWriter("error.txt", true); var outputFile = new StreamWriter("output.txt", true); foreach (var item in listToProcess) { threadList.Add(new Thread(() => FileProcessor.ProcessFiles(sharedStateObj, item, outputFile, errorFile))); } foreach (var thread in threadList) { thread.Start(); } var stillRunning = false; do { Thread.Sleep(5000); stillRunning = false; foreach (var thread in threadList) { if (thread.IsAlive) { stillRunning = true; } } } while (stillRunning); errorFile.Flush(); errorFile.Close(); outputFile.Flush(); outputFile.Close(); } catch (Exception e) { throw; } stopwatch.Stop(); Console.WriteLine("Hashing finished in {0} seconds.", ((TimeSpan)stopwatch.Elapsed).Seconds); Console.Read(); }