public void Dispose() { if (!disposed) { foreach (var item in Pieces) { item.Dispose(); } Pieces.Clear(); Context = null; Pieces = null; disposed = true; } }
protected async Task clearScrollRequest(DataFetchContext context) { var client = base.getClient(); var clearRequest = client.ClearScroll <dynamic>(new { scroll_id = new string[] { context.ScrollToken } }); if (clearRequest.Success) { context.Scroll = false; context.ScrollToken = null; } else { throw clearRequest.OriginalException; } }
protected IEnumerable <dynamic> elasticsearchGetScroll(DataFetchContext context) { var client = getClient(); var response = client.Scroll <dynamic>(new { scroll = "1m", scroll_id = context.ScrollToken }); if (response.Success) { context.TimeTaken = TimeSpan.FromMilliseconds(Convert.ToInt32(response.Body.took)); context.Scroll = response.Body.hits.hits.Count > 0; foreach (var hit in response.Body.hits.hits) { yield return(hit); } } else { throw response.OriginalException; } }
/// <summary> /// Fetches a dataset from ElasticSearch, flattening documents into a DataTable /// </summary> /// <param name="token">Cancellation token to cancel request and processing</param> /// <param name="luceneQuery">Specifies Lucene query to Search documents</param> /// <param name="maxSize">Maximum number of results that will be returned. Specify int.MaxValue to disable this limit</param> /// <returns></returns> public async Task <DataFetchResult> FetchDataSet(CancellationToken token, string luceneQuery = "*") { return(await Task.Run(() => { var context = new DataFetchContext(); context.LuceneQuery = luceneQuery; context.MaxBatchSize = MaxSampleSize; context.Scroll = false; var table = new DataTable(); try { foreach (var hit in elasticsearchGetSearch(context)) { token.ThrowIfCancellationRequested(); flattenDocumentToRow(table, hit); } } catch { table.Dispose(); throw; } // add columns to context foreach (DataColumn col in table.Columns) { if (!context.Columns.Contains(col.ColumnName)) { context.Columns.Add(col.ColumnName); } } return new DataFetchResult(table, context); })); }
protected IEnumerable <dynamic> elasticsearchGetSearch(DataFetchContext context) { var client = getClient(); var response = client.SearchGet <dynamic>(IndexPattern, x => getSearchParameters(x, context.LuceneQuery, context.MaxBatchSize, context.Scroll)); if (response.Success) { context.TotalHits = Convert.ToInt64(response.Body.hits.total); context.TimeTaken = TimeSpan.FromMilliseconds(Convert.ToInt32(response.Body.took)); if (context.Scroll) { context.ScrollToken = response.Body._scroll_id; } foreach (var hit in response.Body.hits.hits) { yield return(hit); } } else { throw response.OriginalException; } }
/// <summary> /// Performs a batch export returning assembly of several pieces of smaller batches /// </summary> /// <param name="token"></param> /// <param name="luceneQuery"></param> /// <param name="progressCallback"></param> /// <returns></returns> public async Task <BatchExportResult> BatchExport(CancellationToken token, string luceneQuery = "*", DataSetExporterReportProgressCallback progressCallback = null) { return(await Task.Run(async() => { var context = new DataFetchContext(); context.LuceneQuery = luceneQuery; context.MaxBatchSize = MaxBatchSize; context.Scroll = true; var export = new BatchExportResult(context); var firstBatch = new BatchExportPiece(); export.Pieces.Add(firstBatch); int docsTotal, docsProcessed; try { // fill data from first batch foreach (var hit in base.elasticsearchGetSearch(context)) { token.ThrowIfCancellationRequested(); flattenDocumentToRow(firstBatch.Table, hit); } // add columns to context foreach (DataColumn col in firstBatch.Table.Columns) { if (!context.Columns.Contains(col.ColumnName)) { context.Columns.Add(col.ColumnName); } } unchecked { docsTotal = (int)context.TotalHits; docsProcessed = firstBatch.Table.Rows.Count; } if (progressCallback != null) { await Task.Factory.StartNew(() => { progressCallback(docsProcessed, docsTotal); }, CancellationToken.None, TaskCreationOptions.None, sourceScheduler); } // flush to disk and allow GC to collect memory firstBatch.Unload(); // now get all the other batches while (context.Scroll) // elasticsearchGetScroll will set Scroll to false at the end { var batchPiece = new BatchExportPiece(); export.Pieces.Add(batchPiece); foreach (var hit in base.elasticsearchGetScroll(context)) { token.ThrowIfCancellationRequested(); flattenDocumentToRow(batchPiece.Table, hit); } // add columns to context foreach (DataColumn col in batchPiece.Table.Columns) { if (!context.Columns.Contains(col.ColumnName)) { context.Columns.Add(col.ColumnName); } } unchecked { docsProcessed += batchPiece.Table.Rows.Count; } if (progressCallback != null) { await Task.Factory.StartNew(() => { progressCallback(docsProcessed, docsTotal); }, CancellationToken.None, TaskCreationOptions.None, sourceScheduler); } batchPiece.Unload(); } } catch { export.Dispose(); throw; } finally { await clearScrollRequest(context); } return export; })); }
public BatchExportResult(DataFetchContext context) { Context = context; Pieces = new List <BatchExportPiece>(); }
public DataFetchResult(DataTable source, DataFetchContext context) { Result = source; Context = context; }