Пример #1
0
 static void getNextFolder()
 {
     _currentFolder = SMBDal.GetNextFolderForScanning(_currentFolder);
     if (_currentFolder != null)
     {
         _files = GetFolderFiles(_currentFolder["path"].ToString());
     }
     else
     {
         _files = null;
     }
 }
Пример #2
0
        private async Task <bool> ProcessDir(string dir)
        {
            Log.Trace("Started processing folder: " + dir + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);

            try
            {
                // Process sub dirs
                IEnumerable <string> directories = Directory.EnumerateDirectories(dir);

                // Store all the sub dirs in the data store wiht flag - need to traverse
                if (directories.Count() > 0)
                {
                    Log.Trace("Sent to BulkAdd " + directories.Count() + " records" + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);

                    List <string> lst = new List <string>();

                    foreach (string d in directories)
                    {
                        try
                        {
                            DirectoryInfo di = new DirectoryInfo(d);
                            lst.Add(di.FullName);
                            Log.Trace("Added folder: " + di.FullName + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
                        }
                        catch (Exception ex)
                        {
                            Log.Info(ex, "User doesn't have permissions to folder: " + d);
                        }
                    }

                    await SMBDal.BulkAddFolders(lst);

                    Log.Trace("Line after BulkAdd of " + directories.Count() + " records" + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex, "Failed to process folder: " + dir);
            }
            return(true);
        }
Пример #3
0
        public async Task <ScanResult> ScanNext(Repository repo, IteratorItem item)
        {
            ScanResult retVal    = null;
            string     extention = "";

            Log.Trace("Started processing: " + item.DataObjectIdentifier + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);

            try
            {
                if (item == null)
                {
                    return(null);
                }

                Dictionary <string, List <string> > identifiers = null;
                FileInfo file = new FileInfo(item.DataObjectIdentifier);
                extention = file.Extension;

                // Don't scan files that are too big
                if (file.Length > _maxFileSize * 1000000)
                {
                    Log.Warn("File too big:" + file.FullName + ", Size:" + file.Length);
                    Counter.Add("file_too_big", 1);
                }
                else
                {
                    // Get text of the file
                    string txt = await FileUtils.Parse(file);

                    if (txt != null && txt.Length > 0)
                    {
                        //Do NER
                        identifiers = await NER.Parse(txt, file.FullName);

                        if (identifiers != null && identifiers.Count > 0)
                        {
                            Counter.Add("found_pi", 1);

                            // Get metadata
                            item = await CollectFileMetadata(item, file);

                            retVal = new ScanResult()
                            {
                                DataObjectIdentifier = item.DataObjectIdentifier, Identifiers = identifiers, Metadata = item.Metadata, RepositoryId = repo.id
                            };

                            // Store results
                            await SMBDal.AddDataObjectForMatching(retVal);
                        }
                        else
                        {
                            Counter.Add("didnot_find_pi", 1);
                        }
                    }
                    Log.Info("Processed file:" + file.FullName + ", Identifiers: " + (identifiers == null ? "0" : identifiers.Count.ToString()));
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex, "Failed to process file: " + item.DataObjectIdentifier);
                return(null);
            }

            Log.Trace("Finished processing: " + retVal.DataObjectIdentifier + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
            Counter.Add(extention, 1);

            return(retVal);
        }
Пример #4
0
        private async Task <bool> TraverseTree(string root, int maxTasks)
        {
            string currentDir = root;

            using (var throttler = new SemaphoreSlim(maxTasks))
            {
                var postTaskTasks = new List <PFTask>();

                while (currentDir != null)
                {
                    if (currentDir == root)
                    {
                        // First time get the directories syncroniously to populate the DB with directories to run on
                        IEnumerable <string> directories = Directory.EnumerateDirectories(currentDir);
                        List <string>        lst         = new List <string>();

                        foreach (string dir in directories)
                        {
                            try
                            {
                                DirectoryInfo di = new DirectoryInfo(dir);
                                lst.Add(di.FullName);
                                Log.Trace("Added folder: " + di.FullName + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
                            }
                            catch (Exception ex)
                            {
                                Log.Info(ex, "User doesn't have permissions to folder: " + dir);
                            }
                        }
                        //await SMBDal.BulkAddFolders(directories.ToList());
                        await SMBDal.BulkAddFolders(lst);
                    }
                    else
                    {
                        Log.Trace("Before wait: " + currentDir + ", wait: " + throttler.CurrentCount + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
                        await throttler.WaitAsync();

                        Log.Trace("After wait: " + currentDir + ", wait: " + throttler.CurrentCount + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
                        string value = currentDir.ToString();
                        PFTask t     = new PFTask();
                        t.Task = Task.Run(() => ProcessDir(value), t.CancellationToken).ContinueWith(tsk => release(throttler));
                        postTaskTasks.Add(t);
                        Log.Trace("After add task: " + currentDir + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
                    }

                    // Fetch the next directory to traverse and do it again
                    currentDir = await SMBIterator.GetNextFolderForTraverse();

                    Log.Trace("GetNextFolderForTraverse returned: " + currentDir + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);

                    // Clean the completed tasks from the array
                    foreach (PFTask t in postTaskTasks)
                    {
                        if (t.Task.IsCompleted || t.Task.IsCanceled || t.Task.IsFaulted)
                        {
                            _tasksToRemove.Add(t);
                        }
                    }
                    foreach (PFTask t in _tasksToRemove)
                    {
                        postTaskTasks.Remove(t);
                    }
                    _tasksToRemove = new List <PFTask>();

                    if (currentDir == null)
                    {
                        Log.Trace("WaitAll" + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
                        Task.WaitAll(postTaskTasks.Select(t => t.Task).ToArray());
                        currentDir = await SMBIterator.GetNextFolderForTraverse();

                        Log.Trace("Got directory after wait all: " + currentDir + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
                    }
                }
            }

            return(true);
        }
Пример #5
0
 public static async Task <string> GetNextFolderForTraverse()
 {
     return(await SMBDal.GetNextFolderForTraverse());
 }