static void getNextFolder() { _currentFolder = SMBDal.GetNextFolderForScanning(_currentFolder); if (_currentFolder != null) { _files = GetFolderFiles(_currentFolder["path"].ToString()); } else { _files = null; } }
private async Task <bool> ProcessDir(string dir) { Log.Trace("Started processing folder: " + dir + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); try { // Process sub dirs IEnumerable <string> directories = Directory.EnumerateDirectories(dir); // Store all the sub dirs in the data store wiht flag - need to traverse if (directories.Count() > 0) { Log.Trace("Sent to BulkAdd " + directories.Count() + " records" + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); List <string> lst = new List <string>(); foreach (string d in directories) { try { DirectoryInfo di = new DirectoryInfo(d); lst.Add(di.FullName); Log.Trace("Added folder: " + di.FullName + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); } catch (Exception ex) { Log.Info(ex, "User doesn't have permissions to folder: " + d); } } await SMBDal.BulkAddFolders(lst); Log.Trace("Line after BulkAdd of " + directories.Count() + " records" + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); } } catch (Exception ex) { Log.Error(ex, "Failed to process folder: " + dir); } return(true); }
public async Task <ScanResult> ScanNext(Repository repo, IteratorItem item) { ScanResult retVal = null; string extention = ""; Log.Trace("Started processing: " + item.DataObjectIdentifier + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); try { if (item == null) { return(null); } Dictionary <string, List <string> > identifiers = null; FileInfo file = new FileInfo(item.DataObjectIdentifier); extention = file.Extension; // Don't scan files that are too big if (file.Length > _maxFileSize * 1000000) { Log.Warn("File too big:" + file.FullName + ", Size:" + file.Length); Counter.Add("file_too_big", 1); } else { // Get text of the file string txt = await FileUtils.Parse(file); if (txt != null && txt.Length > 0) { //Do NER identifiers = await NER.Parse(txt, file.FullName); if (identifiers != null && identifiers.Count > 0) { Counter.Add("found_pi", 1); // Get metadata item = await CollectFileMetadata(item, file); retVal = new ScanResult() { DataObjectIdentifier = item.DataObjectIdentifier, Identifiers = identifiers, Metadata = item.Metadata, RepositoryId = repo.id }; // Store results await SMBDal.AddDataObjectForMatching(retVal); } else { Counter.Add("didnot_find_pi", 1); } } Log.Info("Processed file:" + file.FullName + ", Identifiers: " + (identifiers == null ? "0" : identifiers.Count.ToString())); } } catch (Exception ex) { Log.Error(ex, "Failed to process file: " + item.DataObjectIdentifier); return(null); } Log.Trace("Finished processing: " + retVal.DataObjectIdentifier + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); Counter.Add(extention, 1); return(retVal); }
private async Task <bool> TraverseTree(string root, int maxTasks) { string currentDir = root; using (var throttler = new SemaphoreSlim(maxTasks)) { var postTaskTasks = new List <PFTask>(); while (currentDir != null) { if (currentDir == root) { // First time get the directories syncroniously to populate the DB with directories to run on IEnumerable <string> directories = Directory.EnumerateDirectories(currentDir); List <string> lst = new List <string>(); foreach (string dir in directories) { try { DirectoryInfo di = new DirectoryInfo(dir); lst.Add(di.FullName); Log.Trace("Added folder: " + di.FullName + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); } catch (Exception ex) { Log.Info(ex, "User doesn't have permissions to folder: " + dir); } } //await SMBDal.BulkAddFolders(directories.ToList()); await SMBDal.BulkAddFolders(lst); } else { Log.Trace("Before wait: " + currentDir + ", wait: " + throttler.CurrentCount + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); await throttler.WaitAsync(); Log.Trace("After wait: " + currentDir + ", wait: " + throttler.CurrentCount + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); string value = currentDir.ToString(); PFTask t = new PFTask(); t.Task = Task.Run(() => ProcessDir(value), t.CancellationToken).ContinueWith(tsk => release(throttler)); postTaskTasks.Add(t); Log.Trace("After add task: " + currentDir + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); } // Fetch the next directory to traverse and do it again currentDir = await SMBIterator.GetNextFolderForTraverse(); Log.Trace("GetNextFolderForTraverse returned: " + currentDir + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); // Clean the completed tasks from the array foreach (PFTask t in postTaskTasks) { if (t.Task.IsCompleted || t.Task.IsCanceled || t.Task.IsFaulted) { _tasksToRemove.Add(t); } } foreach (PFTask t in _tasksToRemove) { postTaskTasks.Remove(t); } _tasksToRemove = new List <PFTask>(); if (currentDir == null) { Log.Trace("WaitAll" + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); Task.WaitAll(postTaskTasks.Select(t => t.Task).ToArray()); currentDir = await SMBIterator.GetNextFolderForTraverse(); Log.Trace("Got directory after wait all: " + currentDir + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId); } } } return(true); }
public static async Task <string> GetNextFolderForTraverse() { return(await SMBDal.GetNextFolderForTraverse()); }