public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; ElasticsearchDynamicValue hits; var from = 1; var size = 10; string body; if (_context.Entity.IsPageRequest()) { from = (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize; body = WriteQuery(_fields, _readFrom, _context, from, _context.Entity.PageSize); } else { body = WriteQuery(_fields, _readFrom, _context, 0, 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var properties = hits.Value as IDictionary <string, object>; if (properties != null && properties.ContainsKey("total")) { size = Convert.ToInt32(properties["total"]); body = WriteQuery(_fields, _readFrom, _context, 0, size > 10000 ? 10000 : size); } } } } _context.Debug(() => body); _context.Entity.Query = body; // move 10000 to configurable limit response = from + size > 10000 ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.Scroll(TimeSpan.FromMinutes(1.0))) : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (!response.Success) { LogError(response); yield break; } _context.Entity.Hits = Convert.ToInt32((response.Body["hits"]["total"] as ElasticsearchDynamicValue).Value); hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits == null || !hits.HasValue) { _context.Warn("No hits from elasticsearch"); yield break; } var docs = hits.Value as IList <object>; if (docs == null) { _context.Error("No documents returned from elasticsearch!"); yield break; } // if any of the fields do not exist, yield break if (docs.Count > 0) { var doc = docs.First() as IDictionary <string, object>; var source = doc?["_source"] as IDictionary <string, object>; if (source == null) { _context.Error("Missing _source from elasticsearch response!"); yield break; } for (var i = 0; i < _fields.Length; i++) { if (source.ContainsKey(_fieldNames[i])) { continue; } _context.Error($"Field {_fieldNames[i]} does not exist!"); yield break; } } var count = 0; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } _context.Increment(); yield return(row); } count += docs.Count; // get this from first search response (maybe), unless you have to aggregate it from all... foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }); } } if (!response.Body.ContainsKey("_scroll_id")) { yield break; } if (size == count) { _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value })); yield break; } var scrolls = new HashSet <string>(); do { var scrollId = response.Body["_scroll_id"].Value; scrolls.Add(scrollId); response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = "1m", scroll_id = scrollId })); if (response.Success) { docs = (IList <object>)response.Body["hits"]["hits"].Value; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } _context.Increment(); yield return(row); } count += docs.Count; } else { LogError(response); } } while (response.Success && count < size); _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() })); }
public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; ElasticsearchDynamicValue hits; var from = 0; var size = 10; string body; bool warned = false; var scroll = !_context.Entity.IsPageRequest(); if (!scroll) { from = (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size; body = WriteQuery(_fields, _readFrom, _context, scroll: false, from: from, size: _context.Entity.Size); } else { body = WriteQuery(_fields, _readFrom, _context, scroll: false, from: 0, size: 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var total = hits["total"]; try { if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties { size = Convert.ToInt32(total["value"].Value); } else { size = Convert.ToInt32(total.Value); } } catch (Exception ex) { warned = true; _context.Debug(() => total); _context.Warn($"Could not get total number of matching documents from the elasticsearch response. Are you sure you using version {_version}?"); _context.Error(ex, ex.Message); } body = WriteQuery(_fields, _readFrom, _context, scroll: true, from: 0, size: size > ElasticsearchDefaultSizeLimit ? DefaultSize : size); } } } _context.Debug(() => body); _context.Entity.Query = body; response = scroll ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.AddQueryString("scroll", _context.Connection.Scroll)) : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (!response.Success) { LogError(response); yield break; } try { if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties { _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"]["value"].Value); } else { _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"].Value); } } catch (Exception ex) { if (!warned) { _context.Debug(() => response.Body["hits"]); _context.Warn($"Could not get total number of matching documents from the elasticsearch response. Are you sure you using version {_version}?"); _context.Error(ex.Message); } } hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits == null || !hits.HasValue) { _context.Warn("No hits from elasticsearch"); yield break; } var docs = hits.Value as IList <object>; if (docs == null) { _context.Error("No documents returned from elasticsearch!"); yield break; } // if any of the fields do not exist, yield break if (docs.Count > 0) { var doc = docs.First() as IDictionary <string, object>; var source = doc?["_source"] as IDictionary <string, object>; if (source == null) { _context.Error("Missing _source from elasticsearch response!"); yield break; } for (var i = 0; i < _fields.Length; i++) { if (source.ContainsKey(_fieldNames[i])) { continue; } _context.Error($"Field {_fieldNames[i]} does not exist!"); yield break; } } var count = 0; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } yield return(row); } count += docs.Count; // get this from first search response (maybe), unless you have to aggregate it from all... foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }); } } if (!response.Body.ContainsKey("_scroll_id")) { yield break; } if (size == count) { _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value })); yield break; } var scrolls = new HashSet <string>(); do { var scrollId = response.Body["_scroll_id"].Value; scrolls.Add(scrollId); response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = _context.Connection.Scroll, scroll_id = scrollId })); if (response.Success) { docs = (IList <object>)response.Body["hits"]["hits"].Value; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } yield return(row); } count += docs.Count; } else { LogError(response); } } while (response.Success && count < size); _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() })); }