private void GetFolderItemsFromCursor(CrawlOptions options, string cursor, IDropBoxClient client, DropBoxCrawlJobData jobData, IList <object> list) { if (_state.CancellationTokenSource.IsCancellationRequested) { return; } if (string.IsNullOrEmpty(cursor)) { return; } var dateTime = GetModifiedLastCrawlFinishTime(jobData); try { var items = client.ListFolderContinueAsync(jobData.LastestCursors["Files"]).Result; EnumerateFolderItems(options, client, jobData, items, dateTime, list); } catch (OperationCanceledException) { } catch (Exception exception) { _log.Error(() => "Could not fetch data from path in Dropbox", exception); _state.Status.Statistics.Tasks.IncrementTaskFailureCount(); } }
private void SetCursor(CrawlOptions options, IDropBoxClient client, DropBoxCrawlJobData jobData) { if (_state.CancellationTokenSource.IsCancellationRequested) { return; } try { var cursor = client.ListFolderGetLatestCursorAsync(string.Empty, recursive: true, includeMediaInfo: false).Result; if (cursor != null) { jobData.LastestCursors["Files"] = cursor.Cursor; _state.Result.LastestCursors["Files"] = cursor.Cursor; } } catch (OperationCanceledException) { } catch (Exception exception) { _log.Error(() => "Could not fetch data from Dropbox", exception); _state.Status.Statistics.Tasks.IncrementTaskFailureCount(); } }
protected IEnumerable <object> CrawlDirectory(DirectoryInfo info, CrawlOptions options, FileSystemCrawlJobData filesystemcrawlJobData) { var directory = new FileSystemItem <DirectoryInfo>(info, filesystemcrawlJobData); yield return(directory.Owner); yield return(directory); foreach (var fileInfo in info.GetFiles()) { var file = new FileSystemItem <FileInfo>(fileInfo, filesystemcrawlJobData); yield return(file.Owner); yield return(file); } if (options == CrawlOptions.Recursive) { foreach (var subDirectory in FilterFileSystemInfos(info.GetDirectories().OrderBy(d => Guid.NewGuid()), filesystemcrawlJobData)) { CrawlDirectory(subDirectory, options, filesystemcrawlJobData); } } }
private void GetFolderItems(CrawlOptions options, IDropBoxClient client, DropBoxCrawlJobData jobData, IList <object> list) { try { // Files & Folders if (jobData.LastCrawlFinishTime > DateTimeOffset.MinValue && jobData.LastestCursors != null && jobData.LastestCursors.ContainsKey("Files") && !string.IsNullOrEmpty(jobData.LastestCursors["Files"])) { var cursor = jobData.LastestCursors["Files"]; GetFolderItemsFromCursor(options, cursor, client, jobData, list); } else { var folders = (jobData.Folders?.Select(sp => sp.EntryPoint) ?? new string[] { }).ToHashSet(); if (!folders.Any() || folders.Contains("/") || folders.Contains(string.Empty)) { GetFolderItems(options, client, jobData, "/", new HashSet <string>(), list); } else { foreach (var path in folders) { GetFolderItems(options, client, jobData, path, new HashSet <string>(), list); } } } // Cursors SetCursor(options, client, jobData); } catch (OperationCanceledException) { // Swallow } catch (Exception ex) { _log.Fatal(() => GetType().Name + " Failed: " + ex.Message, ex); _state.Status.Statistics.Tasks.IncrementTaskFailureCount(); // _state.Result.Exceptions.Add(ex); } }
private void GetFolderItems(CrawlOptions options, IDropBoxClient client, DropBoxCrawlJobData jobData, string path, HashSet <string> visitedFolders, IList <object> list) { if (_state.CancellationTokenSource.IsCancellationRequested) { return; } path = NormalizePath(path); if (visitedFolders != null) { if (visitedFolders.Contains(path)) { return; } visitedFolders.Add(path); } var dateTime = GetModifiedLastCrawlFinishTime(jobData); try { var items = client.ListFolderAsync(path: path, limit: DropBoxConstants.FetchLimit, includeDeleted: false).Result; EnumerateFolderItems(options, client, jobData, items, dateTime, list, iterateFolders: true, visitedFolders: visitedFolders); } catch (OperationCanceledException) { } catch (Exception exception) { _log.Error(() => "Could not fetch data from path in Dropbox", exception); _state.Status.Statistics.Tasks.IncrementTaskFailureCount(); } }
public GeographyCrawler(CrawlOptions crawlOptions, IGeographyService geographyService) : base(crawlOptions) { _geographyService = geographyService; }
public GeographyCrawler(CrawlOptions crawlOptions, FinanceService financeService) : base(crawlOptions) { _financeService = financeService; }
private void EnumerateFolderItems(CrawlOptions options, IDropBoxClient client, DropBoxCrawlJobData jobData, ListFolderResult items, DateTimeOffset dateTime, IList <object> list, bool iterateFolders = true, HashSet <string> visitedFolders = null) { if (_state.CancellationTokenSource.IsCancellationRequested) { return; } try { var ids = (jobData.Folders?.Select(sp => sp.EntryPoint) ?? new string[] { }).ToList(); do { var files = items.Entries.Where(i => i != null && i.IsFile).Select(i => i.AsFile); var folders = items.Entries.Where(i => i != null && i.IsFolder).Select(i => i.AsFolder); var concurrencyLevel = ConfigurationManager.AppSettings.GetValue("Providers.Dropbox.CrawlConcurrencyLevel", Environment.ProcessorCount); var parallelOptions = new ParallelOptions { CancellationToken = _state.CancellationTokenSource.Token, MaxDegreeOfParallelism = concurrencyLevel, TaskScheduler = _state.TaskScheduler }; Parallel.ForEach(files, parallelOptions, file => { if (_state.CancellationTokenSource.IsCancellationRequested) { return; } list.Add(GetFileAsync(file, client, dateTime)); }); foreach (var folder in folders) { if (_state.CancellationTokenSource.IsCancellationRequested) { break; } if (!ids.Any() || ids.Contains(folder.PathLower)) { list.Add(folder); if (iterateFolders) { GetFolderItems(options, client, jobData, folder.PathLower, visitedFolders, list); } } } if (items.HasMore) { items = client.ListFolderContinueAsync(items.Cursor).Result; } else { break; } }while (items != null && !_state.CancellationTokenSource.IsCancellationRequested); } catch (OperationCanceledException) { } catch (Exception exception) { _log.Error(() => "Could not enumerate folder items in Dropbox", exception); _state.Status.Statistics.Tasks.IncrementTaskFailureCount(); } }