async Task <ElasticEmailDocument> ParseMimeMessage(FileInfo input) { var mimeMsg = default(MimeMessage); var elasticDocument = default(ElasticEmailDocument); // First parse the file to an understandable object using (var iStream = input.Open(FileMode.Open)) { try { var parserOptions = PluginConfig.GetConfig <ParserOptions>("mime_parse_options"); mimeMsg = await MimeMessage.LoadAsync(parserOptions, iStream); } catch (Exception ex) { Error("An error occurred parsing file {0}!", input.FullName); Trace("Source: {0}", ex.Source); Trace("Stack trace: {0}", ex.StackTrace); Error("Cannot continue parsing this email!"); return(default); // Return the default value; we will decide what happens to these later
async void ProcessorLooper(object state) { // FIXME? // Currently debating whether this should be a Task // instead of a full-blown thread. // I guess it'd make more sense that way? const int minNumberForThreading = 100; Info("Preparing looper..."); // Convert state to dynamic object // This object will contain all the necessary information // to parse, index, and archive emails dynamic stateObj = state; ContinueProcessing = true; // Set to true to start processing while (ContinueProcessing) { // First things first: Sleep. // This will give the user some wiggle-room. Thread.Sleep(PluginConfig.GetConfig <TimeSpan>("search_interval")); // Occasionally check if loop should continue if (!ContinueProcessing) { break; } // Check if max no/threads has been reached // If multi-threading is not used, the loop will not continue until the previous // process has completed, so no need to check for that. I guess. var useMultiThreading = PluginConfig.GetBool("use_multithreading"); var maxCores = PluginConfig.GetInt("core_use"); if (useMultiThreading && maxCores == ProcessingTasks.Count) { Warn("Already utilizing max. number of threads! Execution will continue once a thread becomes available."); break; } // Build the file list Info("Building file list... please wait..."); var fileList = (await BuildFileListAsync(PluginConfig.GetConfig <List <string> >("search_directories").ToArray())).ToList(); // Figure out how many cores/threads to use var logicalCpus = Environment.ProcessorCount; var utilizableCores = maxCores <= logicalCpus ? maxCores : logicalCpus; var maxEmails = PluginConfig.GetInt("max_emails"); Info($"Total available logical CPUs: { logicalCpus }"); Info($"Utilizable CPU cores: { utilizableCores }"); Info($"Maximum amount of emails to parse: { maxEmails }"); // Shuffle the file list so each domain has an // equal chance to be processed. // Some emails may not make it, so // let's try to make it equal. fileList.Shuffle(); // Now check if the file list is greater than the // max configured amount of emails to parse. if (fileList.Count > maxEmails) { fileList = fileList.GetRange(0, maxEmails); } // Great! Now that that's all sorted; we can get on to determining how many threads we'll need! Info("Preparing to parse {0} emails; please wait...", fileList.Count); { // We'll scope this to make sure we don't leak anything. // This has been proven to reduce the memory footprint // in tests with similar applications. // // Let's get ready to figure out how many threads we'll require var maxI = utilizableCores - ProcessingTasks.Count; for (var i = 0; i < maxI; i++) { // We'll need to sleep for a few seconds to offset each thread // from one another. // This will prevent severe performance impairments. Trust me. Thread.Sleep(new TimeSpan(0, 0, 15)); var start = default(int); var files = default(List <FileInfo>); var threadId = default(string); if (i == logicalCpus - 1) { // We'll do the remainder here start = i * (fileList.Count / utilizableCores); files = fileList.GetRange(start, (fileList.Count - start)); // Generate thread-related information threadId = Extensions.GenerateUniqueId(); // Set all files to r/o // This way they'll be ignored by the file list builder // during the next run and we won't have // any files magically disappearing files.ForEach(x => x.IsReadOnly = true); // TODO: Call thread } else if (fileList.Count <= minNumberForThreading) { // This will happen, and if this is the case // there is no point in snipping this up in to multiple threads. // To be perfectly honest, 1000 emails wouldn't be an // issue, but this just happens to be the number I picked. threadId = Extensions.GenerateUniqueId(); // Set all files to r/o // This way they'll be ignored by the file list builder // during the next run and we won't have // any files magically disappearing files = fileList; files.ForEach(x => x.IsReadOnly = true); // TODO: Call thread break; // Break the loop. } else { // Standard mode. // Essentially what we're doing here, is grabbing a number // of files to be processed. // The exact number of file equates to: // N(files) / N(utilisable cores) // Eg: 5000 files / 8 cores = 625 emails/thread // The default max would be // 60,000 / cores // So assuming that we have eight cores to play with: // 60,000 / 8 cores = 7,500 emails/core files = fileList.GetRange(i * (fileList.Count / utilizableCores), (fileList.Count / utilizableCores)); // Set all files to r/o // This way they'll be ignored by the file list builder // during the next run and we won't have // any files magically disappearing files.ForEach(x => x.IsReadOnly = true); // TODO: Call thread } } } } }