/// <summary>
        /// Flushes an exported batch piece to the current CSV file
        /// </summary>
        /// <param name="piece"></param>
        public void FlushPiece(BatchExportPiece piece)
        {
            piece.Load();
            var table = piece.Table;

            foreach (DataRow row in table.Rows)
            {
                foreach (var columnName in columns)
                {
                    csvWriter.WriteField(table.Columns.Contains(columnName) ? row[columnName] : string.Empty);
                }
                csvWriter.NextRecord();

                // TODO: progress report
            }

            piece.Unload(false);
        }
Beispiel #2
0
        /// <summary>
        /// Performs a batch export returning assembly of several pieces of smaller batches
        /// </summary>
        /// <param name="token"></param>
        /// <param name="luceneQuery"></param>
        /// <param name="progressCallback"></param>
        /// <returns></returns>
        public async Task <BatchExportResult> BatchExport(CancellationToken token, string luceneQuery = "*", DataSetExporterReportProgressCallback progressCallback = null)
        {
            return(await Task.Run(async() =>
            {
                var context = new DataFetchContext();
                context.LuceneQuery = luceneQuery;
                context.MaxBatchSize = MaxBatchSize;
                context.Scroll = true;

                var export = new BatchExportResult(context);
                var firstBatch = new BatchExportPiece();
                export.Pieces.Add(firstBatch);

                int docsTotal, docsProcessed;

                try
                {
                    // fill data from first batch
                    foreach (var hit in base.elasticsearchGetSearch(context))
                    {
                        token.ThrowIfCancellationRequested();
                        flattenDocumentToRow(firstBatch.Table, hit);
                    }

                    // add columns to context
                    foreach (DataColumn col in firstBatch.Table.Columns)
                    {
                        if (!context.Columns.Contains(col.ColumnName))
                        {
                            context.Columns.Add(col.ColumnName);
                        }
                    }

                    unchecked
                    {
                        docsTotal = (int)context.TotalHits;
                        docsProcessed = firstBatch.Table.Rows.Count;
                    }
                    if (progressCallback != null)
                    {
                        await Task.Factory.StartNew(() => { progressCallback(docsProcessed, docsTotal); }, CancellationToken.None, TaskCreationOptions.None, sourceScheduler);
                    }

                    // flush to disk and allow GC to collect memory
                    firstBatch.Unload();

                    // now get all the other batches
                    while (context.Scroll) // elasticsearchGetScroll will set Scroll to false at the end
                    {
                        var batchPiece = new BatchExportPiece();
                        export.Pieces.Add(batchPiece);
                        foreach (var hit in base.elasticsearchGetScroll(context))
                        {
                            token.ThrowIfCancellationRequested();
                            flattenDocumentToRow(batchPiece.Table, hit);
                        }

                        // add columns to context
                        foreach (DataColumn col in batchPiece.Table.Columns)
                        {
                            if (!context.Columns.Contains(col.ColumnName))
                            {
                                context.Columns.Add(col.ColumnName);
                            }
                        }

                        unchecked
                        {
                            docsProcessed += batchPiece.Table.Rows.Count;
                        }
                        if (progressCallback != null)
                        {
                            await Task.Factory.StartNew(() => { progressCallback(docsProcessed, docsTotal); }, CancellationToken.None, TaskCreationOptions.None, sourceScheduler);
                        }

                        batchPiece.Unload();
                    }
                }
                catch
                {
                    export.Dispose();
                    throw;
                }
                finally
                {
                    await clearScrollRequest(context);
                }

                return export;
            }));
        }