public static void ParallelAsync_ForEach_Func_Delay(int?maxDop) { var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; var sw = new Stopwatch(); sw.Start(); var actual = 0; Func <int, Task <KeyValuePair <int, int> > > func = async n => { Interlocked.Increment(ref actual); await Task.Delay(delay); return(new KeyValuePair <int, int>(n, n * 2)); }; var result = ParallelAsync.ForEachAsync(Enumerable.Range(0, loops), options, func).Result; sw.Stop(); Assert.Equal(loops, actual); Assert.True(sw.ElapsedMilliseconds < delay * loops); // Environmental factors mean we can't assert a lower boundary }
public virtual async ValueTask <IReadOnlyDictionary <Sha1, ReadOnlyMemory <byte> > > ReadObjectBatchAsync(IEnumerable <Sha1> objectIds, CancellationToken cancellationToken) { if (objectIds == null) { return(ReadOnlyDictionary.Empty <Sha1, ReadOnlyMemory <byte> >()); } var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxDop, CancellationToken = cancellationToken }; // Enumerate batches var dict = new ConcurrentDictionary <Sha1, ReadOnlyMemory <byte> >(Sha1Comparer.Default); await ParallelAsync.ForEachAsync(objectIds, parallelOptions, async sha1 => { // Execute batch var buffer = await ReadObjectAsync(sha1, cancellationToken).ConfigureAwait(false); if (buffer.HasValue) { dict[sha1] = buffer.Value; } }).ConfigureAwait(false); return(dict); }
private async Task <bool> CheckPackages( IReadOnlyCollection <IPackageStatusOutdatedCheckSource> sources, CancellationToken cancellationToken) { Logger.LogInformation("Fetching packages to check status of."); var packagesToCheck = new List <PackageStatusOutdatedCheck>(); await _monitoringCursor.LoadAsync(cancellationToken); foreach (var source in sources) { packagesToCheck.AddRange(await source.GetPackagesToCheckAsync( _monitoringCursor.Value - ReprocessRange, Top, cancellationToken)); } var packagesToCheckBag = new ConcurrentBag <PackageStatusOutdatedCheck>(packagesToCheck); Logger.LogInformation("Found {PackagesToCheckCount} packages to check status of.", packagesToCheck.Count()); await ParallelAsync.Repeat(() => ProcessPackagesAsync(packagesToCheckBag, cancellationToken)); Logger.LogInformation("Finished checking status of packages."); foreach (var source in sources) { await source.MarkPackagesCheckedAsync(cancellationToken); } return(packagesToCheck.Any()); }
public static void ParallelAsync_ForEach_Action_Default_Arguments(int?maxDop) { var data = new int[] { 0, 1, 2 }; var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; // Null body Func <int, Task> action = null; Assert.ThrowsAsync <ArgumentNullException>(() => ParallelAsync.ForEachAsync(data, options, action)); var actual = 0; action = n => { Interlocked.Increment(ref actual); return(Task.CompletedTask); }; // Null source actual = 0; ParallelAsync.ForEachAsync(null, options, action).Wait(); Assert.Equal(0, actual); // Empty source actual = 0; ParallelAsync.ForEachAsync(Array.Empty <int>(), options, action).Wait(); Assert.Equal(0, actual); // Null options actual = 0; ParallelAsync.ForEachAsync(data, options, action).Wait(); Assert.Equal(data.Length, actual); }
public static void ParallelAsync_ForEach_Func_Default_Arguments(int?maxDop) { var data = new int[] { 0, 1, 2 }; var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; // Null body Func <int, Task <KeyValuePair <int, int> > > func = null; Assert.ThrowsAsync <ArgumentNullException>(async() => await ParallelAsync.ForEachAsync(data, options, func)); var actual = 0; func = n => { Interlocked.Increment(ref actual); return(Task.FromResult(new KeyValuePair <int, int>(n, n))); }; // Null source actual = 0; var result = ParallelAsync.ForEachAsync(null, options, func).Result; Assert.Equal(0, actual); Assert.Empty(result); // Empty source actual = 0; result = ParallelAsync.ForEachAsync(Array.Empty <int>(), options, func).Result; Assert.Equal(0, actual); Assert.Empty(result); }
public static void Menu() { var ct = new CancellationTokenSource(); Console.WriteLine("Sync, Async and Parallel Console Test"); Console.WriteLine(); Console.WriteLine("Menu"); Console.WriteLine("1 - Sync"); Console.WriteLine("2 - Async"); Console.WriteLine("3 - ParallelSync"); Console.WriteLine("4 - ParallelAsync"); Console.WriteLine("Any Other - Exit"); var key = Console.ReadLine(); Action method = key switch { "1" => () => Logging(Sync.RunDownloadSync()), "2" => async() => Logging(await Async.RunDownloadAsync(ct.Token)), "3" => () => Logging(ParallelSync.RunDownloadParallelSync()), "4" => async() => Logging(await ParallelAsync.RunDownloadParallelASync(ct.Token)), _ => null }; if (method != null) { method.Invoke(); Menu(); } }
public async Task SetResult_ReturnsResult() { var result = await ParallelAsync.ForEachAsync <int, int>(new int[] { 0 }, async (item, controller) => { controller.ProvideResult(item); }).ConfigureAwait(false); result.Should().Be(0); }
public static async Task RunNormal(string[] wordlist) { //for (int i = 0; i < wordlist.Length; i++) //{ // await GetDirectory(url, wordlist[i]); //} await ParallelAsync.ForeachAsync(wordlist, ExecutionOptions.threadCount, async directory => { await GetDirectory(directory); }); }
public static async Task RunExt(string[] wordlist) { await ParallelAsync.ForeachAsync(wordlist, ExecutionOptions.threadCount, async directory => { await GetDirectory(directory); for (int j = 0; j < ExecutionOptions.extensions.Length; j++) { await GetDirectory(directory + "." + ExecutionOptions.extensions[j]); } }); }
private async void btnParallelAsync2_Click(object sender, RoutedEventArgs e) { var progress = new Progress <ProgressReportModel>(); progress.ProgressChanged += ReportProgress; InitTest("PARALLEL ASYNC"); results = await ParallelAsync.RunDownloadParallelASync2(progress, cts.Token); EndTest(); }
private bool Download(HttpClientEx hc) { using (var stopSlim = new ManualResetEventSlim(false)) { this.m_downloaded = 0; var updateTask = Task.Factory.StartNew(() => { var startTime = DateTime.Now; double befSpeed = 0; while (!stopSlim.IsSet) { befSpeed = (befSpeed + Interlocked.Exchange(ref this.m_downloaded, 0) / (DateTime.Now - startTime).TotalSeconds) / 2; if (double.IsNaN(befSpeed)) { befSpeed = 0; } this.SpeedOrFileSize = Utility.ToEICFormat(befSpeed, "/s"); Thread.Sleep(500); } }); var parallelOption = new ParallelOptions { MaxDegreeOfParallelism = 8, }; using (var cts = new CancellationTokenSource()) { ParallelAsync.ForEachAsync( this.m_images, async e => await this.DownloadImage(e, hc, cts.Token, cts.Cancel), 8).GetAwaiter().GetResult(); stopSlim.Set(); updateTask.Wait(); if (!this.IgnoreErrorMissingPage && cts.IsCancellationRequested) { return(false); } } } this.SpeedOrFileSize = null; // 모든 이미지가 다운로드가 완료되어야 함 return(this.IgnoreErrorMissingPage || this.m_images.All(e => e.Extension != null)); }
private async Task <bool> PushIndexChangesAsync() { // The "old" data in this case is the download count data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old download count data from blob storage."); var oldResult = await _downloadDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data in this case is from the statistics pipeline. _logger.LogInformation("Fetching new download count data from blob storage."); var newData = await _auxiliaryFileClient.LoadDownloadDataAsync(); _logger.LogInformation("Removing invalid IDs and versions from the old data."); CleanDownloadData(oldResult.Data); _logger.LogInformation("Removing invalid IDs and versions from the new data."); CleanDownloadData(newData); // Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept // separate from downloads data as the original data will be persisted to auxiliary data, whereas the // overriden data will be persisted to Azure Search. _logger.LogInformation("Overriding download count data."); var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync(); var overridenDownloads = newData.ApplyDownloadOverrides(downloadOverrides, _logger); _logger.LogInformation("Detecting download count changes."); var changes = _downloadSetComparer.Compare(oldResult.Data, overridenDownloads); var idBag = new ConcurrentBag <string>(changes.Keys); _logger.LogInformation("{Count} package IDs have download count changes.", idBag.Count); if (!changes.Any()) { return(false); } _logger.LogInformation( "Starting {Count} workers pushing download count changes to Azure Search.", _options.Value.MaxConcurrentBatches); await ParallelAsync.Repeat( () => WorkAsync(idBag, changes), _options.Value.MaxConcurrentBatches); _logger.LogInformation("All of the download count changes have been pushed to Azure Search."); _logger.LogInformation("Uploading the new download count data to blob storage."); await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition()); return(true); }
public static void ParallelAsync_For_Func(int?maxDop) { var data = new int[] { 0, 1, 2 }; var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; Func <int, Task <int> > func = i => { return(Task.FromResult(data[i] * 2)); }; var actual = ParallelAsync.ForAsync(0, data.Length, options, func); Assert.Collection(actual.Result, n => Assert.Equal(0, n.Value), n => Assert.Equal(2, n.Value), n => Assert.Equal(4, n.Value)); }
public static void ParallelAsync_ForEach_Func(int?maxDop) { var data = new int[] { 0, 1, 2 }; var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; Func <int, Task <KeyValuePair <int, int> > > func = n => { return(Task.FromResult(new KeyValuePair <int, int>(n, n * 2))); }; var actual = ParallelAsync.ForEachAsync(data, options, func); Assert.Collection(actual.Result, n => Assert.Equal(0, n.Value), n => Assert.Equal(2, n.Value), n => Assert.Equal(4, n.Value)); }
public async Task ExecuteAsync() { var stopwatch = Stopwatch.StartNew(); var outcome = JobOutcome.Failure; try { _logger.LogInformation("Fetching old owner data from blob storage."); var storageResult = await _ownerDataClient.ReadLatestIndexedAsync(); _logger.LogInformation("Fetching new owner data from the database."); var databaseResult = await _databaseFetcher.GetPackageIdToOwnersAsync(); _logger.LogInformation("Detecting owner changes."); var changes = _ownerSetComparer.CompareOwners(storageResult.Result, databaseResult); var changesBag = new ConcurrentBag <IdAndValue <string[]> >(changes.Select(x => new IdAndValue <string[]>(x.Key, x.Value))); _logger.LogInformation("{Count} package IDs have owner changes.", changesBag.Count); if (!changes.Any()) { outcome = JobOutcome.NoOp; return; } _logger.LogInformation( "Starting {Count} workers pushing owners changes to Azure Search.", _options.Value.MaxConcurrentBatches); await ParallelAsync.Repeat(() => WorkAndRetryAsync(changesBag), _options.Value.MaxConcurrentBatches); _logger.LogInformation("All of the owner changes have been pushed to Azure Search."); // Persist in storage the list of all package IDs that have owner changes. This allows debugging and future // analytics on frequency of ownership changes. _logger.LogInformation("Uploading the package IDs that have owner changes to blob storage."); await _ownerDataClient.UploadChangeHistoryAsync(changes.Keys.ToList()); _logger.LogInformation("Uploading the new owner data to blob storage."); await _ownerDataClient.ReplaceLatestIndexedAsync(databaseResult, storageResult.AccessCondition); outcome = JobOutcome.Success; } finally { stopwatch.Stop(); _telemetryService.TrackUpdateOwnersCompleted(outcome, stopwatch.Elapsed); } }
public static void ParallelAsync_ForEach_Action(int?maxDop) { var data = new int[] { 0, 1, 2 }; var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; Func <int, Task> action = n => { data[n] = n * 2; return(Task.CompletedTask); }; ParallelAsync.ForEachAsync(data, options, action).Wait(); Assert.Collection(data, n => Assert.Equal(0, n), n => Assert.Equal(2, n), n => Assert.Equal(4, n)); }
protected override async Task RunInternalAsync(CancellationToken cancellationToken) { // We should stop processing messages if the job runner cancels us. var queueMessageCancellationToken = cancellationToken; // We should stop dequeuing more messages if too much time elapses. Logger.LogInformation("Processing messages for {Duration} before restarting the job loop.", _queueLoopDuration); using (var queueLoopCancellationTokenSource = new CancellationTokenSource(_queueLoopDuration)) using (var timeoutCancellationTokenSource = new CancellationTokenSource()) { var queueLoopCancellationToken = queueLoopCancellationTokenSource.Token; var workerId = 0; var allWorkersTask = ParallelAsync.Repeat( () => ProcessPackagesAsync( Interlocked.Increment(ref workerId), queueLoopCancellationToken, queueMessageCancellationToken), _workerCount); // Wait for a specific amount of time past the loop duration. If a worker task is hanging for whatever // reason we don't want to the shutdown to be blocked indefinitely. // // Imagine one worker is stuck and all of the rest of the workers have successfully stopped consuming // messages. This would mean that this process is stuck in a seemingly "healthy" state (no exceptions, // the process is still alive) but it will never terminate and no queue messages will be processed. By // design all jobs must be resilient to unexpected termination (machine shutdown, etc) so not waiting // for a slow worker task to gracefully finish is acceptable. var loopDurationPlusShutdownTask = Task.Delay(_queueLoopDuration.Add(MaxShutdownTime), timeoutCancellationTokenSource.Token); var firstTask = await Task.WhenAny(allWorkersTask, loopDurationPlusShutdownTask); if (firstTask == loopDurationPlusShutdownTask) { Logger.LogWarning("Not all workers shut down gracefully after {Duration}.", MaxShutdownTime); } else { timeoutCancellationTokenSource.Cancel(); Logger.LogInformation("All workers gracefully shut down."); } } }
public static void ParallelAsync_For_Func_Default_Arguments(int?maxDop) { var data = new int[] { 0, 1, 2 }; var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; // Null body Func <int, Task <KeyValuePair <int, int> > > func = null; Assert.ThrowsAsync <ArgumentNullException>(() => ParallelAsync.ForAsync(0, data.Length, options, func)); var actual = 0; func = n => { Interlocked.Increment(ref actual); return(Task.FromResult(new KeyValuePair <int, int>(n, n * 2))); }; // Bad range Assert.ThrowsAsync <ArgumentNullException>(() => ParallelAsync.ForAsync(0, -1, options, func)); // Empty source (-1) actual = 0; var result = ParallelAsync.ForAsync(-1, -1, options, func).Result; Assert.Equal(0, actual); Assert.Empty(result); // Empty source (0) actual = 0; result = ParallelAsync.ForAsync(0, 0, options, func).Result; Assert.Equal(0, actual); Assert.Empty(result); // Empty source (100) actual = 0; result = ParallelAsync.ForAsync(100, 100, options, func).Result; Assert.Equal(0, actual); Assert.Empty(result); }
public static void Train(string udSource) { var trainFiles = Directory.GetFiles(udSource, "*-train.conllu", SearchOption.AllDirectories); var testFiles = Directory.GetFiles(udSource, "*-dev.conllu", SearchOption.AllDirectories); var trainFilesPerLanguage = trainFiles.Select(f => new { lang = Path.GetFileNameWithoutExtension(f).Replace("_", "-").Split(new char[] { '-' }).First(), file = f }).GroupBy(f => f.lang).ToDictionary(g => g.Key, g => g.Select(f => f.file).ToList()); var testFilesPerLanguage = testFiles.Select(f => new { lang = Path.GetFileNameWithoutExtension(f).Replace("_", "-").Split(new char[] { '-' }).First(), file = f }).GroupBy(f => f.lang).ToDictionary(g => g.Key, g => g.Select(f => f.file).ToList()); var languages = trainFilesPerLanguage.Keys.ToList(); Console.WriteLine($"Found these languages for training: {string.Join(", ", languages)}"); foreach (var forceCase in new EnumCase[] { EnumCase.Original, EnumCase.ForceUpper, EnumCase.ForceLower }) //need tom fix the storage model first - maybe join all in one model { ParallelAsync.ForEachAsync(languages, new ParallelOptions(), async lang => { Language language; try { language = Languages.CodeToEnum(lang); } catch { Console.WriteLine($"Unknown language {lang}"); return; } var modelTag = (forceCase == EnumCase.ForceUpper ? "upper" : (forceCase == EnumCase.ForceLower ? "lower" : "")); var sentenceDetector = new SentenceDetector(language, 0, modelTag); var trainDocuments = ReadCorpus(trainFilesPerLanguage[lang], ConvertCase: forceCase, sentenceDetector: sentenceDetector); //TODO: Implement test //if(testFilesPerLanguage.TryGetValue(lang, out var testFile)) //{ // var testDocuments = ReadUniversalDependencyCorpus(testFile, ConvertCase: forceCase, sentenceDetector: sentenceDetector); //} Console.WriteLine($"Now training {lang} in mode {forceCase} using files {string.Join(", ", trainFilesPerLanguage[lang])}"); sentenceDetector.Train(trainDocuments); await sentenceDetector.StoreAsync(); }); } }
public virtual async Task WriteObjectBatchAsync(IEnumerable <KeyValuePair <Sha1, ArraySegment <byte> > > items, bool forceOverwrite, CancellationToken cancellationToken) { if (items == null || !items.Any()) { return; } var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxDop, CancellationToken = cancellationToken }; // Enumerate batches await ParallelAsync.ForEachAsync(items, parallelOptions, async item => { // Execute batch await WriteObjectAsync(item.Key, item.Value, forceOverwrite, cancellationToken).ConfigureAwait(false); }).ConfigureAwait(false); }
public override ValueTask <IReadOnlyDictionary <Sha1, ReadOnlyMemory <byte> > > ReadObjectBatchAsync(IEnumerable <Sha1> objectIds, ParallelOptions parallelOptions) { if (objectIds == null) { return(new ValueTask <IReadOnlyDictionary <Sha1, ReadOnlyMemory <byte> > >(ReadOnlyDictionary.Empty <Sha1, ReadOnlyMemory <byte> >())); } // Execute batches var task = ParallelAsync.ForEachAsync(objectIds, parallelOptions, async n => { // Execute batch var buffer = await ReadObjectAsync(n, parallelOptions.CancellationToken).ConfigureAwait(false); // Transform batch result var kvp = new KeyValuePair <Sha1, ReadOnlyMemory <byte> >(n, buffer); return(kvp); }); return(task); }
public async Task Stop_Stops() { bool eval1 = false; bool eval2 = false; var result = await ParallelAsync.ForEachAsync <Action, int>(new Action[] { () => eval1 = true, () => eval2 = true }, async (item, controller) => { item(); controller.ProvideResult(0); controller.Stop(); }, 1).ConfigureAwait(false); result.Should().Be(0); eval1.Should().BeTrue(); eval2.Should().BeFalse(); }
public static void ParallelAsync_For_Action_Default_Arguments(int?maxDop) { var data = new int[] { 0, 1, 2 }; var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; // Null body Func <int, Task> action = null; Assert.ThrowsAsync <ArgumentNullException>(() => ParallelAsync.ForAsync(0, data.Length, options, action)); var actual = 0; action = n => { Interlocked.Increment(ref actual); return(Task.CompletedTask); }; // Bad range Assert.ThrowsAsync <ArgumentOutOfRangeException>(() => ParallelAsync.ForAsync(0, -1, options, action)); // Empty source (-1) actual = 0; ParallelAsync.ForAsync(-1, -1, options, action).Wait(); Assert.Equal(0, actual); // Empty source (0) actual = 0; ParallelAsync.ForAsync(0, 0, options, action).Wait(); Assert.Equal(0, actual); // Empty source (100) actual = 0; ParallelAsync.ForAsync(100, 100, options, action).Wait(); Assert.Equal(0, actual); }
public static void ParallelAsync_For_Action_Delay(int?maxDop) { var options = maxDop.HasValue ? new ParallelOptions { MaxDegreeOfParallelism = maxDop.Value } : null; var sw = new Stopwatch(); sw.Start(); var actual = 0; Func <int, Task> func = async i => { Interlocked.Increment(ref actual); await Task.Delay(delay); }; ParallelAsync.ForAsync(0, loops, options, func).Wait(); sw.Stop(); Assert.Equal(loops, actual); Assert.True(sw.ElapsedMilliseconds < delay * loops); // Environmental factors mean we can't assert a lower boundary }
private async Task <bool> PushIndexChangesAsync() { // The "old" data in this case is the download count data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old download count data from blob storage."); var oldResult = await _downloadDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data in this case is from the statistics pipeline. _logger.LogInformation("Fetching new download count data from blob storage."); var newData = await _auxiliaryFileClient.LoadDownloadDataAsync(); _logger.LogInformation("Removing invalid IDs and versions from the old downloads data."); CleanDownloadData(oldResult.Data); _logger.LogInformation("Removing invalid IDs and versions from the new downloads data."); CleanDownloadData(newData); _logger.LogInformation("Detecting download count changes."); var changes = _downloadSetComparer.Compare(oldResult.Data, newData); _logger.LogInformation("{Count} package IDs have download count changes.", changes.Count); // The "old" data is the popularity transfers data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old popularity transfer data from blob storage."); var oldTransfers = await _popularityTransferDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data is the latest popularity transfers data from the database. _logger.LogInformation("Fetching new popularity transfer data from database."); var newTransfers = await GetPopularityTransfersAsync(); _logger.LogInformation("Applying download transfers to download changes."); ApplyDownloadTransfers( newData, oldTransfers.Data, newTransfers, changes); var idBag = new ConcurrentBag <string>(changes.Keys); _logger.LogInformation("{Count} package IDs need to be updated.", idBag.Count); if (!changes.Any()) { return(false); } _logger.LogInformation( "Starting {Count} workers pushing download count changes to Azure Search.", _options.Value.MaxConcurrentBatches); await ParallelAsync.Repeat( () => WorkAndRetryAsync(idBag, changes), _options.Value.MaxConcurrentBatches); _logger.LogInformation("All of the download count changes have been pushed to Azure Search."); _logger.LogInformation("Uploading the new download count data to blob storage."); await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition()); _logger.LogInformation("Uploading the new popularity transfer data to blob storage."); await _popularityTransferDataClient.ReplaceLatestIndexedAsync( newTransfers, oldTransfers.Metadata.GetIfMatchCondition()); return(true); }
private void ReadDataBackgroundWorker_DoWork(object sender, DoWorkEventArgs e) { //Parquet.NET doesn't have any async methods or readers that allow sequential records reading so we need to use the ThreadPool to support cancellation. Task task = null; var results = new ConcurrentDictionary <int, ParquetReadResult>(); var cancellationToken = new System.Threading.CancellationTokenSource(); if (AppSettings.ReadingEngine == ParquetEngine.Default) { task = Task.Run(() => { using (var parquetReader = ParquetReader.OpenFromFile(this.OpenFilePath, new ParquetOptions() { TreatByteArrayAsString = true })) { DataTable result = UtilityMethods.ParquetReaderToDataTable(parquetReader, this.SelectedFields, this.CurrentOffset, this.CurrentMaxRowCount, cancellationToken.Token); results.TryAdd(1, new ParquetReadResult(result, parquetReader.ThriftMetadata.Num_rows)); } }); } else { int i = 0; var fieldGroups = new List <(int, List <string>)>(); foreach (List <string> fields in UtilityMethods.Split(this.SelectedFields, (int)(this.selectedFields.Count / Environment.ProcessorCount))) { fieldGroups.Add((i++, fields)); } task = ParallelAsync.ForeachAsync(fieldGroups, Environment.ProcessorCount, async fieldGroup => { await Task.Run(() => { using (Stream parquetStream = new FileStream(this.OpenFilePath, FileMode.Open, FileAccess.Read)) using (var parquetReader = new ParquetReader(parquetStream, new ParquetOptions() { TreatByteArrayAsString = true })) { DataTable result = UtilityMethods.ParquetReaderToDataTable(parquetReader, fieldGroup.Item2, this.CurrentOffset, this.CurrentMaxRowCount, cancellationToken.Token); results.TryAdd(fieldGroup.Item1, new ParquetReadResult(result, parquetReader.ThriftMetadata.Num_rows)); } }); }); } while (!task.IsCompleted && !((BackgroundWorker)sender).CancellationPending) { task.Wait(1000); } if (((BackgroundWorker)sender).CancellationPending) { cancellationToken.Cancel(); e.Cancel = true; } if (task.IsCompleted) { if (results.Count > 0) { DataTable mergedDataTables = UtilityMethods.MergeTables(results.OrderBy(f => f.Key).Select(f => f.Value.Result).AsEnumerable()); ParquetReadResult finalResult = new ParquetReadResult(mergedDataTables, results.First().Value.TotalNumberOfRecordsInFile); e.Result = finalResult; } else { //The code should never reach here e.Result = new ParquetReadResult(new DataTable(), 0); } } }
protected override async Task RunInternalAsync(CancellationToken cancellationToken) { await ParallelAsync.Repeat(() => ProcessPackagesAsync(cancellationToken)); }