Exemple #1
0
        private void GetFolderItemsFromCursor(CrawlOptions options, string cursor, IDropBoxClient client, DropBoxCrawlJobData jobData, IList <object> list)
        {
            if (_state.CancellationTokenSource.IsCancellationRequested)
            {
                return;
            }

            if (string.IsNullOrEmpty(cursor))
            {
                return;
            }

            var dateTime = GetModifiedLastCrawlFinishTime(jobData);

            try
            {
                var items = client.ListFolderContinueAsync(jobData.LastestCursors["Files"]).Result;

                EnumerateFolderItems(options, client, jobData, items, dateTime, list);
            }
            catch (OperationCanceledException)
            {
            }
            catch (Exception exception)
            {
                _log.Error(() => "Could not fetch data from path in Dropbox", exception);
                _state.Status.Statistics.Tasks.IncrementTaskFailureCount();
            }
        }
Exemple #2
0
        private void SetCursor(CrawlOptions options, IDropBoxClient client, DropBoxCrawlJobData jobData)
        {
            if (_state.CancellationTokenSource.IsCancellationRequested)
            {
                return;
            }

            try
            {
                var cursor = client.ListFolderGetLatestCursorAsync(string.Empty, recursive: true, includeMediaInfo: false).Result;
                if (cursor != null)
                {
                    jobData.LastestCursors["Files"]       = cursor.Cursor;
                    _state.Result.LastestCursors["Files"] = cursor.Cursor;
                }
            }
            catch (OperationCanceledException)
            {
            }
            catch (Exception exception)
            {
                _log.Error(() => "Could not fetch data from Dropbox", exception);
                _state.Status.Statistics.Tasks.IncrementTaskFailureCount();
            }
        }
Exemple #3
0
        protected IEnumerable <object> CrawlDirectory(DirectoryInfo info, CrawlOptions options, FileSystemCrawlJobData filesystemcrawlJobData)
        {
            var directory = new FileSystemItem <DirectoryInfo>(info, filesystemcrawlJobData);

            yield return(directory.Owner);

            yield return(directory);

            foreach (var fileInfo in info.GetFiles())
            {
                var file = new FileSystemItem <FileInfo>(fileInfo, filesystemcrawlJobData);

                yield return(file.Owner);

                yield return(file);
            }

            if (options == CrawlOptions.Recursive)
            {
                foreach (var subDirectory in FilterFileSystemInfos(info.GetDirectories().OrderBy(d => Guid.NewGuid()), filesystemcrawlJobData))
                {
                    CrawlDirectory(subDirectory, options, filesystemcrawlJobData);
                }
            }
        }
Exemple #4
0
        private void GetFolderItems(CrawlOptions options, IDropBoxClient client, DropBoxCrawlJobData jobData, IList <object> list)
        {
            try
            {
                // Files & Folders
                if (jobData.LastCrawlFinishTime > DateTimeOffset.MinValue && jobData.LastestCursors != null && jobData.LastestCursors.ContainsKey("Files") && !string.IsNullOrEmpty(jobData.LastestCursors["Files"]))
                {
                    var cursor = jobData.LastestCursors["Files"];
                    GetFolderItemsFromCursor(options, cursor, client, jobData, list);
                }
                else
                {
                    var folders = (jobData.Folders?.Select(sp => sp.EntryPoint) ?? new string[] { }).ToHashSet();

                    if (!folders.Any() || folders.Contains("/") || folders.Contains(string.Empty))
                    {
                        GetFolderItems(options, client, jobData, "/", new HashSet <string>(), list);
                    }
                    else
                    {
                        foreach (var path in folders)
                        {
                            GetFolderItems(options, client, jobData, path, new HashSet <string>(), list);
                        }
                    }
                }


                // Cursors
                SetCursor(options, client, jobData);
            }
            catch (OperationCanceledException)
            {
                // Swallow
            }
            catch (Exception ex)
            {
                _log.Fatal(() => GetType().Name + " Failed: " + ex.Message, ex);
                _state.Status.Statistics.Tasks.IncrementTaskFailureCount();  //
                _state.Result.Exceptions.Add(ex);
            }
        }
Exemple #5
0
        private void GetFolderItems(CrawlOptions options, IDropBoxClient client, DropBoxCrawlJobData jobData, string path, HashSet <string> visitedFolders, IList <object> list)
        {
            if (_state.CancellationTokenSource.IsCancellationRequested)
            {
                return;
            }

            path = NormalizePath(path);

            if (visitedFolders != null)
            {
                if (visitedFolders.Contains(path))
                {
                    return;
                }

                visitedFolders.Add(path);
            }

            var dateTime = GetModifiedLastCrawlFinishTime(jobData);

            try
            {
                var items = client.ListFolderAsync(path: path, limit: DropBoxConstants.FetchLimit, includeDeleted: false).Result;

                EnumerateFolderItems(options, client, jobData, items, dateTime, list, iterateFolders: true, visitedFolders: visitedFolders);
            }
            catch (OperationCanceledException)
            {
            }
            catch (Exception exception)
            {
                _log.Error(() => "Could not fetch data from path in Dropbox", exception);
                _state.Status.Statistics.Tasks.IncrementTaskFailureCount();
            }
        }
Exemple #6
0
 public GeographyCrawler(CrawlOptions crawlOptions, IGeographyService geographyService) : base(crawlOptions)
 {
     _geographyService = geographyService;
 }
Exemple #7
0
 public GeographyCrawler(CrawlOptions crawlOptions, FinanceService financeService) : base(crawlOptions)
 {
     _financeService = financeService;
 }
Exemple #8
0
        private void EnumerateFolderItems(CrawlOptions options, IDropBoxClient client, DropBoxCrawlJobData jobData, ListFolderResult items, DateTimeOffset dateTime, IList <object> list, bool iterateFolders = true, HashSet <string> visitedFolders = null)
        {
            if (_state.CancellationTokenSource.IsCancellationRequested)
            {
                return;
            }

            try
            {
                var ids = (jobData.Folders?.Select(sp => sp.EntryPoint) ?? new string[] { }).ToList();

                do
                {
                    var files   = items.Entries.Where(i => i != null && i.IsFile).Select(i => i.AsFile);
                    var folders = items.Entries.Where(i => i != null && i.IsFolder).Select(i => i.AsFolder);

                    var concurrencyLevel = ConfigurationManager.AppSettings.GetValue("Providers.Dropbox.CrawlConcurrencyLevel", Environment.ProcessorCount);

                    var parallelOptions = new ParallelOptions
                    {
                        CancellationToken      = _state.CancellationTokenSource.Token,
                        MaxDegreeOfParallelism = concurrencyLevel,
                        TaskScheduler          = _state.TaskScheduler
                    };

                    Parallel.ForEach(files, parallelOptions, file =>
                    {
                        if (_state.CancellationTokenSource.IsCancellationRequested)
                        {
                            return;
                        }

                        list.Add(GetFileAsync(file, client, dateTime));
                    });

                    foreach (var folder in folders)
                    {
                        if (_state.CancellationTokenSource.IsCancellationRequested)
                        {
                            break;
                        }

                        if (!ids.Any() || ids.Contains(folder.PathLower))
                        {
                            list.Add(folder);

                            if (iterateFolders)
                            {
                                GetFolderItems(options, client, jobData, folder.PathLower, visitedFolders, list);
                            }
                        }
                    }

                    if (items.HasMore)
                    {
                        items = client.ListFolderContinueAsync(items.Cursor).Result;
                    }
                    else
                    {
                        break;
                    }
                }while (items != null && !_state.CancellationTokenSource.IsCancellationRequested);
            }
            catch (OperationCanceledException)
            {
            }
            catch (Exception exception)
            {
                _log.Error(() => "Could not enumerate folder items in Dropbox", exception);
                _state.Status.Statistics.Tasks.IncrementTaskFailureCount();
            }
        }