public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; ElasticsearchDynamicValue hits; var from = 1; var size = 10; string body; if (_context.Entity.IsPageRequest()) { from = (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize; body = WriteQuery(_fields, _readFrom, _context, from, _context.Entity.PageSize); } else { body = WriteQuery(_fields, _readFrom, _context, 0, 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var properties = hits.Value as IDictionary <string, object>; if (properties != null && properties.ContainsKey("total")) { size = Convert.ToInt32(properties["total"]); body = WriteQuery(_fields, _readFrom, _context, 0, size > 10000 ? 10000 : size); } } } } _context.Debug(() => body); _context.Entity.Query = body; // move 10000 to configurable limit response = from + size > 10000 ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.Scroll(TimeSpan.FromMinutes(1.0))) : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (!response.Success) { LogError(response); yield break; } _context.Entity.Hits = Convert.ToInt32((response.Body["hits"]["total"] as ElasticsearchDynamicValue).Value); hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits == null || !hits.HasValue) { _context.Warn("No hits from elasticsearch"); yield break; } var docs = hits.Value as IList <object>; if (docs == null) { _context.Error("No documents returned from elasticsearch!"); yield break; } // if any of the fields do not exist, yield break if (docs.Count > 0) { var doc = docs.First() as IDictionary <string, object>; var source = doc?["_source"] as IDictionary <string, object>; if (source == null) { _context.Error("Missing _source from elasticsearch response!"); yield break; } for (var i = 0; i < _fields.Length; i++) { if (source.ContainsKey(_fieldNames[i])) { continue; } _context.Error($"Field {_fieldNames[i]} does not exist!"); yield break; } } var count = 0; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } _context.Increment(); yield return(row); } count += docs.Count; // get this from first search response (maybe), unless you have to aggregate it from all... foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }); } } if (!response.Body.ContainsKey("_scroll_id")) { yield break; } if (size == count) { _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value })); yield break; } var scrolls = new HashSet <string>(); do { var scrollId = response.Body["_scroll_id"].Value; scrolls.Add(scrollId); response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = "1m", scroll_id = scrollId })); if (response.Success) { docs = (IList <object>)response.Body["hits"]["hits"].Value; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } _context.Increment(); yield return(row); } count += docs.Count; } else { LogError(response); } } while (response.Success && count < size); _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() })); }
public IEnumerable <IRow> Read() { if (_parent.Entities.Sum(e => e.Inserts + e.Updates + e.Deletes) == 0) { yield break; } ; var batches = _parent.Entities.Select(e => e.BatchId).ToArray(); var minBatchId = batches.Min(); var maxBatchId = batches.Max(); _output.Info("Batch Range: {0} to {1}.", minBatchId, maxBatchId); var threshold = minBatchId - 1; var sql = string.Empty; if (_cf.AdoProvider == AdoProvider.SqlCe) { // because SqlCe doesn't support views, re-construct the parent view's definition var ctx = new PipelineContext(_output.Logger, _parent); var master = _parent.Entities.First(e => e.IsMaster); var builder = new StringBuilder(); builder.AppendLine($"SELECT {string.Join(",", _output.Entity.Fields.Where(f => f.Output).Select(f => _cf.Enclose(f.Source.Split('.')[0]) + "." + _cf.Enclose(f.Source.Split('.')[1])))}"); foreach (var from in ctx.SqlStarFroms(_cf)) { builder.AppendLine(@from); } builder.AppendLine($"WHERE {_cf.Enclose(Utility.GetExcelName(master.Index))}.{_cf.Enclose(master.TflBatchId().FieldName())} > @Threshold;"); sql = builder.ToString(); } else { sql = $@" SELECT {string.Join(",", _output.Entity.Fields.Where(f => f.Output).Select(f => _cf.Enclose(f.Alias)))} FROM {_cf.Enclose(_output.Process.Star)} {(_cf.AdoProvider == AdoProvider.SqlServer ? "WITH (NOLOCK)" : string.Empty)} WHERE {_cf.Enclose(Constants.TflBatchId)} > @Threshold;"; } _output.Debug(() => sql); using (var cn = _cf.GetConnection()) { cn.Open(); var cmd = cn.CreateCommand(); cmd.CommandTimeout = 0; cmd.CommandType = CommandType.Text; cmd.CommandText = sql; var min = cmd.CreateParameter(); min.ParameterName = "@Threshold"; min.Value = threshold; min.Direction = ParameterDirection.Input; min.DbType = DbType.Int32; cmd.Parameters.Add(min); var reader = cmd.ExecuteReader(CommandBehavior.SequentialAccess); var rowCount = 0; var fieldArray = _output.Entity.Fields.ToArray(); while (reader.Read()) { rowCount++; _output.Increment(); yield return(_rowCreator.Create(reader, fieldArray)); } _output.Info("{0} from {1}", rowCount, _output.Connection.Name); } }
public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; string body; if (_context.Entity.IsPageRequest()) { var from = (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize; body = WriteQuery(_fields, _readFrom, _context, from, _context.Entity.PageSize); } else { body = WriteQuery(_fields, _readFrom, _context, 0, 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { var hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var properties = hits.Value as IDictionary <string, object>; if (properties != null && properties.ContainsKey("total")) { var size = Convert.ToInt32(properties["total"]) + 1; body = WriteQuery(_fields, _readFrom, _context, 0, size); } } } } _context.Debug(() => body); _context.Entity.Query = body; response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { _context.Entity.Hits = Convert.ToInt32((response.Body["hits"]["total"] as ElasticsearchDynamicValue).Value); var hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var docs = hits.Value as IEnumerable <object>; if (docs != null) { foreach (var doc in docs.OfType <IDictionary <string, object> >()) { var row = _rowFactory.Create(); if (doc != null && doc.ContainsKey("_source")) { var source = doc["_source"] as IDictionary <string, object>; if (source != null) { for (var i = 0; i < _fields.Length; i++) { var field = _fields[i]; row[field] = field.Convert(source[_fieldNames[i]]); } } } _context.Increment(); yield return(row); } } } foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }.WithDefaults()); } } } else { _context.Error(response.DebugInformation); } }