예제 #1
0
        private async Task <IteratorItem> CollectFileMetadata(IteratorItem item, FileInfo info)
        {
            try
            {
                item.Metadata.Add(new MetadataItem("Created_At", info.CreationTimeUtc.ToString()));
                item.Metadata.Add(new MetadataItem("Last_Accessed_At", info.LastAccessTimeUtc.ToString()));
                item.Metadata.Add(new MetadataItem("Updated_At", info.LastWriteTimeUtc.ToString()));
                item.Metadata.Add(new MetadataItem("Size", info.Length.ToString()));
                FileSecurity fs = info.GetAccessControl(AccessControlSections.Access);

                foreach (FileSystemAccessRule ar in fs.GetAccessRules(true, true, typeof(System.Security.Principal.NTAccount)))
                {
                    item.Metadata.Add(new MetadataItem("Access_Rights", ar.IdentityReference.Value, ar.FileSystemRights.ToString()));
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex, "Failed to collect metadata: " + info.FullName);
            }
            return(item);
        }
예제 #2
0
        public async Task <ScanResult> ScanNext(Repository repo, IteratorItem item)
        {
            ScanResult retVal    = null;
            string     extention = "";

            Log.Trace("Started processing: " + item.DataObjectIdentifier + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);

            try
            {
                if (item == null)
                {
                    return(null);
                }

                Dictionary <string, List <string> > identifiers = null;
                FileInfo file = new FileInfo(item.DataObjectIdentifier);
                extention = file.Extension;

                // Don't scan files that are too big
                if (file.Length > _maxFileSize * 1000000)
                {
                    Log.Warn("File too big:" + file.FullName + ", Size:" + file.Length);
                    Counter.Add("file_too_big", 1);
                }
                else
                {
                    // Get text of the file
                    string txt = await FileUtils.Parse(file);

                    if (txt != null && txt.Length > 0)
                    {
                        //Do NER
                        identifiers = await NER.Parse(txt, file.FullName);

                        if (identifiers != null && identifiers.Count > 0)
                        {
                            Counter.Add("found_pi", 1);

                            // Get metadata
                            item = await CollectFileMetadata(item, file);

                            retVal = new ScanResult()
                            {
                                DataObjectIdentifier = item.DataObjectIdentifier, Identifiers = identifiers, Metadata = item.Metadata, RepositoryId = repo.id
                            };

                            // Store results
                            await SMBDal.AddDataObjectForMatching(retVal);
                        }
                        else
                        {
                            Counter.Add("didnot_find_pi", 1);
                        }
                    }
                    Log.Info("Processed file:" + file.FullName + ", Identifiers: " + (identifiers == null ? "0" : identifiers.Count.ToString()));
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex, "Failed to process file: " + item.DataObjectIdentifier);
                return(null);
            }

            Log.Trace("Finished processing: " + retVal.DataObjectIdentifier + ", Thread Id: " + Thread.CurrentThread.ManagedThreadId);
            Counter.Add(extention, 1);

            return(retVal);
        }
예제 #3
0
        public async Task <ScanResult> ScanNext(Repository repo)
        {
            IteratorItem item = await SMBIterator.NextItemAsync();

            return(await ScanNext(repo, item));
        }
예제 #4
0
        public async Task <bool> Scan(Repository repo)
        {
            bool   scanDone      = false;
            var    postTaskTasks = new List <PFTask>();
            object SpinLock      = new object();

            PrepCredentials(repo);
            using (var throttler = new SemaphoreSlim(_maxFilesScanCocur))
            {
                // Scan files
                while (!scanDone)
                {
                    bool semValue = await throttler.WaitAsync(_taskTimeout);

                    if (!semValue)
                    {
                        // See what tasks are hanging for more than 30 seconds and kill them
                        foreach (PFTask t in postTaskTasks)
                        {
                            if (t.StartTime.Subtract(DateTime.UtcNow).Seconds > _taskTimeout)
                            {
                                t.TokenSource.Cancel();
                            }
                        }
                    }

                    IteratorItem item = null;
                    lock (SpinLock)
                    {
                        // Get next item for iteration
                        item = SMBIterator.NextItem();
                    }

                    if (item != null)
                    {
                        // process the next item
                        PFTask tsk = new PFTask();
                        tsk.Task = Task.Run <ScanResult>(() => ScanNext(repo, item), tsk.CancellationToken).ContinueWith(t => release(throttler));
                        postTaskTasks.Add(tsk);
                    }
                    else
                    {
                        scanDone = true;
                    }

                    // Clean the completed tasks from the wait array
                    foreach (PFTask t in postTaskTasks)
                    {
                        if (t.Task.IsCompleted || t.Task.IsCanceled || t.Task.IsFaulted)
                        {
                            _tasksToRemove.Add(t);
                        }
                    }
                    foreach (PFTask t in _tasksToRemove)
                    {
                        postTaskTasks.Remove(t);
                        t.Task.Dispose();
                    }
                    _tasksToRemove = new List <PFTask>();

                    if (scanDone)
                    {
                        // Wait for all the tasks to finish before exiting
                        Task.WaitAll(postTaskTasks.Select(i => i.Task).ToArray(), 30000);
                    }
                }
            }
            Counter.PrintAll();
            return(true);
        }