Beispiel #1
0
 public static async Task RunNormal(string[] wordlist)
 {
     //for (int i = 0; i < wordlist.Length; i++)
     //{
     //    await GetDirectory(url, wordlist[i]);
     //}
     await ParallelAsync.ForeachAsync(wordlist, ExecutionOptions.threadCount, async directory =>
     {
         await GetDirectory(directory);
     });
 }
Beispiel #2
0
 public static async Task RunExt(string[] wordlist)
 {
     await ParallelAsync.ForeachAsync(wordlist, ExecutionOptions.threadCount, async directory =>
     {
         await GetDirectory(directory);
         for (int j = 0; j < ExecutionOptions.extensions.Length; j++)
         {
             await GetDirectory(directory + "." + ExecutionOptions.extensions[j]);
         }
     });
 }
Beispiel #3
0
        private void ReadDataBackgroundWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            //Parquet.NET doesn't have any async methods or readers that allow sequential records reading so we need to use the ThreadPool to support cancellation.
            Task task              = null;
            var  results           = new ConcurrentDictionary <int, ParquetReadResult>();
            var  cancellationToken = new System.Threading.CancellationTokenSource();

            if (AppSettings.ReadingEngine == ParquetEngine.Default)
            {
                task = Task.Run(() =>
                {
                    using (var parquetReader = ParquetReader.OpenFromFile(this.OpenFilePath, new ParquetOptions()
                    {
                        TreatByteArrayAsString = true
                    }))
                    {
                        DataTable result = UtilityMethods.ParquetReaderToDataTable(parquetReader, this.SelectedFields, this.CurrentOffset, this.CurrentMaxRowCount, cancellationToken.Token);
                        results.TryAdd(1, new ParquetReadResult(result, parquetReader.ThriftMetadata.Num_rows));
                    }
                });
            }
            else
            {
                int i           = 0;
                var fieldGroups = new List <(int, List <string>)>();
                foreach (List <string> fields in UtilityMethods.Split(this.SelectedFields, (int)(this.selectedFields.Count / Environment.ProcessorCount)))
                {
                    fieldGroups.Add((i++, fields));
                }

                task = ParallelAsync.ForeachAsync(fieldGroups, Environment.ProcessorCount,
                                                  async fieldGroup =>
                {
                    await Task.Run(() =>
                    {
                        using (Stream parquetStream = new FileStream(this.OpenFilePath, FileMode.Open, FileAccess.Read))
                            using (var parquetReader = new ParquetReader(parquetStream, new ParquetOptions()
                            {
                                TreatByteArrayAsString = true
                            }))
                            {
                                DataTable result = UtilityMethods.ParquetReaderToDataTable(parquetReader, fieldGroup.Item2, this.CurrentOffset, this.CurrentMaxRowCount, cancellationToken.Token);
                                results.TryAdd(fieldGroup.Item1, new ParquetReadResult(result, parquetReader.ThriftMetadata.Num_rows));
                            }
                    });
                });
            }

            while (!task.IsCompleted && !((BackgroundWorker)sender).CancellationPending)
            {
                task.Wait(1000);
            }

            if (((BackgroundWorker)sender).CancellationPending)
            {
                cancellationToken.Cancel();
                e.Cancel = true;
            }

            if (task.IsCompleted)
            {
                if (results.Count > 0)
                {
                    DataTable         mergedDataTables = UtilityMethods.MergeTables(results.OrderBy(f => f.Key).Select(f => f.Value.Result).AsEnumerable());
                    ParquetReadResult finalResult      = new ParquetReadResult(mergedDataTables, results.First().Value.TotalNumberOfRecordsInFile);
                    e.Result = finalResult;
                }
                else
                {
                    //The code should never reach here
                    e.Result = new ParquetReadResult(new DataTable(), 0);
                }
            }
        }