コード例 #1
0
ファイル: ElasticReader.cs プロジェクト: zzms/Transformalize
        public IEnumerable <IRow> Read()
        {
            ElasticsearchResponse <DynamicResponse> response;
            ElasticsearchDynamicValue hits;
            var    from = 1;
            var    size = 10;
            string body;

            if (_context.Entity.IsPageRequest())
            {
                from = (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize;
                body = WriteQuery(_fields, _readFrom, _context, from, _context.Entity.PageSize);
            }
            else
            {
                body     = WriteQuery(_fields, _readFrom, _context, 0, 0);
                response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body);
                if (response.Success)
                {
                    hits = response.Body["hits"] as ElasticsearchDynamicValue;
                    if (hits != null && hits.HasValue)
                    {
                        var properties = hits.Value as IDictionary <string, object>;
                        if (properties != null && properties.ContainsKey("total"))
                        {
                            size = Convert.ToInt32(properties["total"]);
                            body = WriteQuery(_fields, _readFrom, _context, 0, size > 10000 ? 10000 : size);
                        }
                    }
                }
            }

            _context.Debug(() => body);
            _context.Entity.Query = body;

            // move 10000 to configurable limit
            response = from + size > 10000
                ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.Scroll(TimeSpan.FromMinutes(1.0)))
                : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body);

            if (!response.Success)
            {
                LogError(response);
                yield break;
            }

            _context.Entity.Hits = Convert.ToInt32((response.Body["hits"]["total"] as ElasticsearchDynamicValue).Value);
            hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue;


            if (hits == null || !hits.HasValue)
            {
                _context.Warn("No hits from elasticsearch");
                yield break;
            }

            var docs = hits.Value as IList <object>;

            if (docs == null)
            {
                _context.Error("No documents returned from elasticsearch!");
                yield break;
            }

            // if any of the fields do not exist, yield break
            if (docs.Count > 0)
            {
                var doc    = docs.First() as IDictionary <string, object>;
                var source = doc?["_source"] as IDictionary <string, object>;
                if (source == null)
                {
                    _context.Error("Missing _source from elasticsearch response!");
                    yield break;
                }

                for (var i = 0; i < _fields.Length; i++)
                {
                    if (source.ContainsKey(_fieldNames[i]))
                    {
                        continue;
                    }

                    _context.Error($"Field {_fieldNames[i]} does not exist!");
                    yield break;
                }
            }

            var count = 0;

            foreach (var d in docs)
            {
                var doc    = (IDictionary <string, object>)d;
                var row    = _rowFactory.Create();
                var source = (IDictionary <string, object>)doc["_source"];
                for (var i = 0; i < _fields.Length; i++)
                {
                    row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]);
                }
                _context.Increment();
                yield return(row);
            }
            count += docs.Count;

            // get this from first search response (maybe), unless you have to aggregate it from all...
            foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map)))
            {
                var map     = _context.Process.Maps.First(m => m.Name == filter.Map);
                var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue;
                if (buckets == null || !buckets.HasValue)
                {
                    continue;
                }

                var items = buckets.Value as IEnumerable <object>;

                if (items == null)
                {
                    continue;
                }

                foreach (var item in items.OfType <IDictionary <string, object> >())
                {
                    map.Items.Add(new MapItem {
                        From = $"{item["key"]} ({item["doc_count"]})", To = item["key"]
                    });
                }
            }

            if (!response.Body.ContainsKey("_scroll_id"))
            {
                yield break;
            }

            if (size == count)
            {
                _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value }));
                yield break;
            }

            var scrolls = new HashSet <string>();

            do
            {
                var scrollId = response.Body["_scroll_id"].Value;
                scrolls.Add(scrollId);
                response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = "1m", scroll_id = scrollId }));
                if (response.Success)
                {
                    docs = (IList <object>)response.Body["hits"]["hits"].Value;
                    foreach (var d in docs)
                    {
                        var doc    = (IDictionary <string, object>)d;
                        var row    = _rowFactory.Create();
                        var source = (IDictionary <string, object>)doc["_source"];
                        for (var i = 0; i < _fields.Length; i++)
                        {
                            row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]);
                        }
                        _context.Increment();
                        yield return(row);
                    }
                    count += docs.Count;
                }
                else
                {
                    LogError(response);
                }
            } while (response.Success && count < size);

            _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() }));
        }
        public IEnumerable <IRow> Read()
        {
            ElasticsearchResponse <DynamicResponse> response;
            ElasticsearchDynamicValue hits;

            var    from = 0;
            var    size = 10;
            string body;
            bool   warned = false;

            var scroll = !_context.Entity.IsPageRequest();

            if (!scroll)
            {
                from = (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size;
                body = WriteQuery(_fields, _readFrom, _context, scroll: false, from: from, size: _context.Entity.Size);
            }
            else
            {
                body     = WriteQuery(_fields, _readFrom, _context, scroll: false, from: 0, size: 0);
                response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body);
                if (response.Success)
                {
                    hits = response.Body["hits"] as ElasticsearchDynamicValue;
                    if (hits != null && hits.HasValue)
                    {
                        var total = hits["total"];

                        try {
                            if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties
                            {
                                size = Convert.ToInt32(total["value"].Value);
                            }
                            else
                            {
                                size = Convert.ToInt32(total.Value);
                            }
                        } catch (Exception ex) {
                            warned = true;
                            _context.Debug(() => total);
                            _context.Warn($"Could not get total number of matching documents from the elasticsearch response.  Are you sure you using version {_version}?");
                            _context.Error(ex, ex.Message);
                        }
                        body = WriteQuery(_fields, _readFrom, _context, scroll: true, from: 0, size: size > ElasticsearchDefaultSizeLimit ? DefaultSize : size);
                    }
                }
            }

            _context.Debug(() => body);
            _context.Entity.Query = body;

            response = scroll
            ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.AddQueryString("scroll", _context.Connection.Scroll))
            : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body);

            if (!response.Success)
            {
                LogError(response);
                yield break;
            }

            try {
                if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties
                {
                    _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"]["value"].Value);
                }
                else
                {
                    _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"].Value);
                }
            } catch (Exception ex) {
                if (!warned)
                {
                    _context.Debug(() => response.Body["hits"]);
                    _context.Warn($"Could not get total number of matching documents from the elasticsearch response.  Are you sure you using version {_version}?");
                    _context.Error(ex.Message);
                }
            }

            hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue;

            if (hits == null || !hits.HasValue)
            {
                _context.Warn("No hits from elasticsearch");
                yield break;
            }

            var docs = hits.Value as IList <object>;

            if (docs == null)
            {
                _context.Error("No documents returned from elasticsearch!");
                yield break;
            }

            // if any of the fields do not exist, yield break
            if (docs.Count > 0)
            {
                var doc    = docs.First() as IDictionary <string, object>;
                var source = doc?["_source"] as IDictionary <string, object>;
                if (source == null)
                {
                    _context.Error("Missing _source from elasticsearch response!");
                    yield break;
                }

                for (var i = 0; i < _fields.Length; i++)
                {
                    if (source.ContainsKey(_fieldNames[i]))
                    {
                        continue;
                    }

                    _context.Error($"Field {_fieldNames[i]} does not exist!");
                    yield break;
                }
            }

            var count = 0;

            foreach (var d in docs)
            {
                var doc    = (IDictionary <string, object>)d;
                var row    = _rowFactory.Create();
                var source = (IDictionary <string, object>)doc["_source"];
                for (var i = 0; i < _fields.Length; i++)
                {
                    row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]);
                }
                yield return(row);
            }
            count += docs.Count;

            // get this from first search response (maybe), unless you have to aggregate it from all...
            foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map)))
            {
                var map     = _context.Process.Maps.First(m => m.Name == filter.Map);
                var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue;
                if (buckets == null || !buckets.HasValue)
                {
                    continue;
                }

                var items = buckets.Value as IEnumerable <object>;

                if (items == null)
                {
                    continue;
                }

                foreach (var item in items.OfType <IDictionary <string, object> >())
                {
                    map.Items.Add(new MapItem {
                        From = $"{item["key"]} ({item["doc_count"]})", To = item["key"]
                    });
                }
            }

            if (!response.Body.ContainsKey("_scroll_id"))
            {
                yield break;
            }

            if (size == count)
            {
                _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value }));
                yield break;
            }

            var scrolls = new HashSet <string>();

            do
            {
                var scrollId = response.Body["_scroll_id"].Value;
                scrolls.Add(scrollId);
                response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = _context.Connection.Scroll, scroll_id = scrollId }));
                if (response.Success)
                {
                    docs = (IList <object>)response.Body["hits"]["hits"].Value;
                    foreach (var d in docs)
                    {
                        var doc    = (IDictionary <string, object>)d;
                        var row    = _rowFactory.Create();
                        var source = (IDictionary <string, object>)doc["_source"];
                        for (var i = 0; i < _fields.Length; i++)
                        {
                            row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]);
                        }
                        yield return(row);
                    }
                    count += docs.Count;
                }
                else
                {
                    LogError(response);
                }
            } while (response.Success && count < size);

            _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() }));
        }