static void Main(string[] args) { var arguments = new ConsoleSwitches(args); var start = DateTime.UtcNow; Console.WriteLine("Started At UTC: {0}", start); Console.WriteLine("Reading files from {0}", arguments.DocumentDir.FullName); //ExecuteResultImport(); ExecuteWordExtraction(arguments); Console.WriteLine("Completed. DURATION: {0}", DateTime.UtcNow.Subtract(start)); Console.WriteLine("Results saved on: {0}", arguments.OutputFile); Console.WriteLine("Press any key to end"); Console.ReadKey(); }
private static void ExecuteWordExtraction(ConsoleSwitches arguments) { DoExecute(() => { var files = arguments.DocumentDir.GetFiles("*.doc*", SearchOption.AllDirectories).Where(i => !i.Name.StartsWith("~")).ToList(); var stats = new BlockingCollection <DocumentData>(); var tasks = new List <Task>(arguments.ThreadCount); var tcount = 0; for (int f = 0; f < files.Count; f++) { var file = files[f]; Console.WriteLine("File: {0} {1} of {2}", file.Name.PadRight(100), (files.IndexOf(file) + 1).ToString().PadLeft(4), files.Count.ToString().PadLeft(4)); if (tcount < arguments.ThreadCount) { var t = new Task(() => { var doc = new WordDocument(file.FullName); DoExecute(() => { stats.Add(doc.GetStats()); }); }); tcount++; tasks.Add(t); } if (tcount >= arguments.ThreadCount || f == (files.Count - 1)) /*is the last file*/ { tasks.ForEach(t => t.Start()); tcount = 0; Task.WaitAll(tasks.ToArray()); tasks.Clear(); } } Console.WriteLine(); Console.WriteLine(); File.WriteAllText(arguments.OutputFile, JsonConvert.SerializeObject(stats)); }); }