public ElasticReader(
            IConnectionContext context,
            Field[] fields,
            IElasticLowLevelClient client,
            IRowFactory rowFactory,
            ReadFrom readFrom
            )
        {
            _context    = context;
            _fields     = fields;
            _fieldNames = fields.Select(f => _readFrom == ReadFrom.Input ? f.Name : f.Alias.ToLower()).ToArray();
            _client     = client;
            _rowFactory = rowFactory;
            _readFrom   = readFrom;
            _typeName   = readFrom == ReadFrom.Input ? context.Entity.Name : context.Entity.Alias.ToLower();

            _context.Entity.ReadSize = _context.Entity.ReadSize == 0 ? DefaultSize : _context.Entity.ReadSize;

            if (_context.Entity.ReadSize > ElasticsearchDefaultSizeLimit)
            {
                _context.Warn("Elasticsearch's default size limit is 10000.  {0} may be too high.", _context.Entity.ReadSize);
            }

            _version = ElasticVersionParser.ParseVersion(_context);
        }
Beispiel #2
0
 private IEnumerable <Field> PropertiesToFields(string name, IDictionary <string, object> properties)
 {
     if (properties != null)
     {
         foreach (var field in properties)
         {
             var f = new Field {
                 Name = field.Key
             };
             var attributes = field.Value as IDictionary <string, object>;
             if (attributes != null && attributes.ContainsKey("type"))
             {
                 f.Type = attributes["type"].ToString();
                 if (f.Type == "integer")
                 {
                     f.Type = "int";
                 }
             }
             else
             {
                 _input.Warn("Could not find type for index {0} type {1} field {2}. Default is string.", _index, name, field.Key);
             }
             yield return(f);
         }
     }
     else
     {
         _input.Error("Could not find fields for index {0} type {1}.", _index, name);
     }
 }
Beispiel #3
0
        public static Version ParseVersion(IConnectionContext context)
        {
            if (context.Connection.Version == Constants.DefaultSetting || context.Connection.Version == string.Empty)
            {
                context.Warn("Defaulting to Elasticsearch version 5.0.0");
                context.Connection.Version = "5.0.0";
            }

            if (Version.TryParse(context.Connection.Version, out var parsed))
            {
                return(parsed);
            }

            context.Warn($"Unable to parse Elasticsearch version {context.Connection.Version}.");
            context.Connection.Version = "5.0.0";
            return(new Version(5, 0, 0, 0));
        }
        public Batch Read(IEnumerable <IRow> input)
        {
            var batch = new Batch();

            using (var cn = _cf.GetConnection()) {
                cn.Open();
                _context.Debug(() => "begin transaction");
                var trans = cn.BeginTransaction();

                try {
                    var createSql = SqlCreateKeysTable(_tempTable);
                    cn.Execute(createSql, null, trans);

                    var index = 0;
                    var keys  = new List <ExpandoObject>();
                    foreach (var row in input)
                    {
                        var obj = row.ToExpandoObject(_keys);
                        ((IDictionary <string, object>)obj)["TflIndex"] = index;
                        keys.Add(obj);
                        ++index;
                    }

                    var insertSql = SqlInsertTemplate(_context, _tempTable, _keys);
                    cn.Execute(insertSql, keys, trans, 0, System.Data.CommandType.Text);
                    var i = _fields.Length;

                    using (var reader = cn.ExecuteReader(SqlQuery(), null, trans, 0, System.Data.CommandType.Text)) {
                        while (reader.Read())
                        {
                            batch[reader.GetInt32(i)] = _rowCreator.Create(reader, _fields);
                        }
                    }

                    var sqlDrop = SqlDrop(_tempTable);
                    cn.Execute(sqlDrop, null, trans);

                    _context.Debug(() => "commit transaction");
                    trans.Commit();
                } catch (Exception ex) {
                    _context.Error(ex.Message);
                    _context.Warn("rollback transaction");
                    trans.Rollback();
                }
            }
            return(batch);
        }
Beispiel #5
0
        public IRow Create(IDataReader reader, Field[] fields)
        {
            var fieldCount = Math.Min(reader.FieldCount, fields.Length);
            var row        = _rowFactory.Create();

            if (_errors == null)    // check types
            {
                _errors = new bool[fields.Length];
                for (var i = 0; i < fieldCount; i++)
                {
                    _errors[i] = reader.GetFieldType(i) != _typeMap[fields[i].Type];
                    if (!_errors[i])
                    {
                        continue;
                    }

                    if (fields[i].Type != "char")
                    {
                        if (_context.Connection.Provider != "sqlite")
                        {
                            _context.Warn("Type mismatch for {0}. Expected {1}, but read {2}.", fields[i].Name, fields[i].Type, reader.GetFieldType(i));
                        }
                    }
                }
            }

            for (var i = 0; i < fieldCount; i++)
            {
                if (reader.IsDBNull(i))
                {
                    continue;
                }
                if (_errors[i])
                {
                    row[fields[i]] = fields[i].Convert(reader.GetValue(i));
                }
                else
                {
                    row[fields[i]] = reader.GetValue(i);
                }
            }
            return(row);
        }
        public IEnumerable <IRow> Read(IEnumerable <IRow> input)
        {
            var results = new List <IRow>();

            using (var cn = _cf.GetConnection()) {
                cn.Open();
                _context.Debug(() => "begin transaction");
                var trans = cn.BeginTransaction();

                try {
                    var createSql = SqlCreateKeysTable(_tempTable);
                    cn.Execute(createSql, null, trans);

                    var keys      = input.Select(r => r.ToExpandoObject(_keys));
                    var insertSql = SqlInsertTemplate(_context, _tempTable, _keys);
                    cn.Execute(insertSql, keys, trans, 0, System.Data.CommandType.Text);

                    using (var reader = cn.ExecuteReader(SqlQuery(), null, trans, 0, System.Data.CommandType.Text)) {
                        while (reader.Read())
                        {
                            var row = _rowCreator.Create(reader, _fields);
                            results.Add(row);
                        }
                    }

                    var sqlDrop = SqlDrop(_tempTable, _cf);
                    cn.Execute(sqlDrop, null, trans);

                    _context.Debug(() => "commit transaction");
                    trans.Commit();
                } catch (Exception ex) {
                    _context.Error(ex.Message);
                    _context.Warn("rollback transaction");
                    trans.Rollback();
                }
            }
            return(results);
        }
        public string Create()
        {
            var identifier = Utility.Identifier(_fileInfo.Name.Replace(_fileInfo.Extension, string.Empty));
            var quoted     = _fileInfo.Extension.ToLower() == ".csv";

            var lines      = new FileLineReader(_fileInfo, _lines).Read().ToArray();
            var delimiters = _context.Connection.Delimiters.Any() ?
                             _context.Connection.Delimiters :
                             new List <Delimiter> {
                new Delimiter {
                    Character = (_context.Connection.Delimiter.Length == 0 ? ',' : _context.Connection.Delimiter[0]),
                    Name      = "Delimiter"
                }
            };
            var delimiter = Utility.FindDelimiter(lines, delimiters, quoted);

            var values = lines.First()
                         .SplitLine(delimiter, quoted)
                         .Select(c => c.Trim('"'))
                         .Select(c => c.Trim())
                         .ToArray();

            // substitute blank headers with excel column names (useful when some of the column headers are blank)
            for (var i = 0; i < values.Length; i++)
            {
                if (values[i] == string.Empty)
                {
                    values[i] = Utility.GetExcelName(i);
                }
            }

            var hasColumnNames = ColumnNames.AreValid(_context, values);
            var fieldNames     = hasColumnNames ? values : ColumnNames.Generate(values.Length).ToArray();

            var connection = new Connection {
                Name      = "input",
                Provider  = "file",
                File      = _fileInfo.FullName,
                Delimiter = delimiter == default(char) ? "," : delimiter.ToString(),
                Start     = hasColumnNames ? 2 : 1,
                Types     = _context.Connection.Types
            };

            var process = new Process {
                Name        = "FileInspector",
                ReadOnly    = true,
                Connections = new List <Connection> {
                    connection
                }
            };

            process.Entities.Add(new Entity {
                Name  = identifier,
                Input = "input",
                PrependProcessNameToOutputName = false,
                Sample = Convert.ToInt32(_context.Connection.Sample)
            });

            foreach (var name in fieldNames)
            {
                process.Entities[0].Fields.Add(new Field {
                    Name   = name,
                    Alias  = Constants.InvalidFieldNames.Contains(name) ? identifier + name : name,
                    Length = "max"
                });
            }

            process.Load();

            foreach (var warning in process.Warnings())
            {
                _context.Warn(warning);
            }

            foreach (var error in process.Errors())
            {
                _context.Error(error);
            }

            return(process.Serialize());
        }
        public IRow Create(IDataReader reader, Field[] fields)
        {
            var row = _rowFactory.Create();

            if (_fieldCount == 0)
            {
                _fieldCount  = Math.Min(reader.FieldCount, fields.Length);
                _conversions = new List <Func <object, object> >(_fieldCount);
                for (var i = 0; i < _fieldCount; i++)
                {
                    _conversions.Add(null);
                }
                _errors = new bool[fields.Length];
                for (var i = 0; i < _fieldCount; i++)
                {
                    var inputType = reader.GetFieldType(i);
                    _errors[i] = inputType != _typeMap[fields[i].Type];

                    if (_errors[i])
                    {
                        if (fields[i].Transforms.Any() && fields[i].Transforms.First().Method == "convert")
                        {
                            _conversions[i] = o => o;  // the user has set a conversion
                        }
                        else
                        {
                            _conversions[i] = fields[i].Convert;
                            _context.Warn("Type mismatch for {0}. Expected {1}, but read {2}.  Change type or add conversion.", fields[i].Name, fields[i].Type, inputType);
                        }
                    }
                    else
                    {
                        _conversions[i] = o => o;
                    }
                }

                for (var i = 0; i < _fieldCount; i++)
                {
                    if (reader.IsDBNull(i))
                    {
                        continue;
                    }
                    if (_errors[i])
                    {
                        var value = reader.GetValue(i);
                        try {
                            row[fields[i]] = fields[i].Type == "object" ? value : _conversions[i](value);
                        } catch (FormatException) {
                            _context.Error($"Could not convert value {value} in field {fields[i].Alias} to {fields[i].Type}");
                        }
                    }
                    else
                    {
                        row[fields[i]] = reader.GetValue(i);
                    }
                }
            }
            else
            {
                for (var i = 0; i < _fieldCount; i++)
                {
                    if (reader.IsDBNull(i))
                    {
                        continue;
                    }
                    if (_errors[i])
                    {
                        row[fields[i]] = fields[i].Type == "object" ? reader.GetValue(i) : _conversions[i](reader.GetValue(i));
                    }
                    else
                    {
                        row[fields[i]] = reader.GetValue(i);
                    }
                }
            }

            return(row);
        }
Beispiel #9
0
        public IEnumerable <IRow> Read()
        {
            ElasticsearchResponse <DynamicResponse> response;
            ElasticsearchDynamicValue hits;
            var    from = 1;
            var    size = 10;
            string body;

            if (_context.Entity.IsPageRequest())
            {
                from = (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize;
                body = WriteQuery(_fields, _readFrom, _context, from, _context.Entity.PageSize);
            }
            else
            {
                body     = WriteQuery(_fields, _readFrom, _context, 0, 0);
                response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body);
                if (response.Success)
                {
                    hits = response.Body["hits"] as ElasticsearchDynamicValue;
                    if (hits != null && hits.HasValue)
                    {
                        var properties = hits.Value as IDictionary <string, object>;
                        if (properties != null && properties.ContainsKey("total"))
                        {
                            size = Convert.ToInt32(properties["total"]);
                            body = WriteQuery(_fields, _readFrom, _context, 0, size > 10000 ? 10000 : size);
                        }
                    }
                }
            }

            _context.Debug(() => body);
            _context.Entity.Query = body;

            // move 10000 to configurable limit
            response = from + size > 10000
                ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.Scroll(TimeSpan.FromMinutes(1.0)))
                : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body);

            if (!response.Success)
            {
                LogError(response);
                yield break;
            }

            _context.Entity.Hits = Convert.ToInt32((response.Body["hits"]["total"] as ElasticsearchDynamicValue).Value);
            hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue;


            if (hits == null || !hits.HasValue)
            {
                _context.Warn("No hits from elasticsearch");
                yield break;
            }

            var docs = hits.Value as IList <object>;

            if (docs == null)
            {
                _context.Error("No documents returned from elasticsearch!");
                yield break;
            }

            // if any of the fields do not exist, yield break
            if (docs.Count > 0)
            {
                var doc    = docs.First() as IDictionary <string, object>;
                var source = doc?["_source"] as IDictionary <string, object>;
                if (source == null)
                {
                    _context.Error("Missing _source from elasticsearch response!");
                    yield break;
                }

                for (var i = 0; i < _fields.Length; i++)
                {
                    if (source.ContainsKey(_fieldNames[i]))
                    {
                        continue;
                    }

                    _context.Error($"Field {_fieldNames[i]} does not exist!");
                    yield break;
                }
            }

            var count = 0;

            foreach (var d in docs)
            {
                var doc    = (IDictionary <string, object>)d;
                var row    = _rowFactory.Create();
                var source = (IDictionary <string, object>)doc["_source"];
                for (var i = 0; i < _fields.Length; i++)
                {
                    row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]);
                }
                _context.Increment();
                yield return(row);
            }
            count += docs.Count;

            // get this from first search response (maybe), unless you have to aggregate it from all...
            foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map)))
            {
                var map     = _context.Process.Maps.First(m => m.Name == filter.Map);
                var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue;
                if (buckets == null || !buckets.HasValue)
                {
                    continue;
                }

                var items = buckets.Value as IEnumerable <object>;

                if (items == null)
                {
                    continue;
                }

                foreach (var item in items.OfType <IDictionary <string, object> >())
                {
                    map.Items.Add(new MapItem {
                        From = $"{item["key"]} ({item["doc_count"]})", To = item["key"]
                    });
                }
            }

            if (!response.Body.ContainsKey("_scroll_id"))
            {
                yield break;
            }

            if (size == count)
            {
                _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value }));
                yield break;
            }

            var scrolls = new HashSet <string>();

            do
            {
                var scrollId = response.Body["_scroll_id"].Value;
                scrolls.Add(scrollId);
                response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = "1m", scroll_id = scrollId }));
                if (response.Success)
                {
                    docs = (IList <object>)response.Body["hits"]["hits"].Value;
                    foreach (var d in docs)
                    {
                        var doc    = (IDictionary <string, object>)d;
                        var row    = _rowFactory.Create();
                        var source = (IDictionary <string, object>)doc["_source"];
                        for (var i = 0; i < _fields.Length; i++)
                        {
                            row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]);
                        }
                        _context.Increment();
                        yield return(row);
                    }
                    count += docs.Count;
                }
                else
                {
                    LogError(response);
                }
            } while (response.Success && count < size);

            _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() }));
        }
        public IEnumerable <IRow> Read()
        {
            ElasticsearchResponse <DynamicResponse> response;
            ElasticsearchDynamicValue hits;

            var    from = 0;
            var    size = 10;
            string body;
            bool   warned = false;

            var scroll = !_context.Entity.IsPageRequest();

            if (!scroll)
            {
                from = (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size;
                body = WriteQuery(_fields, _readFrom, _context, scroll: false, from: from, size: _context.Entity.Size);
            }
            else
            {
                body     = WriteQuery(_fields, _readFrom, _context, scroll: false, from: 0, size: 0);
                response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body);
                if (response.Success)
                {
                    hits = response.Body["hits"] as ElasticsearchDynamicValue;
                    if (hits != null && hits.HasValue)
                    {
                        var total = hits["total"];

                        try {
                            if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties
                            {
                                size = Convert.ToInt32(total["value"].Value);
                            }
                            else
                            {
                                size = Convert.ToInt32(total.Value);
                            }
                        } catch (Exception ex) {
                            warned = true;
                            _context.Debug(() => total);
                            _context.Warn($"Could not get total number of matching documents from the elasticsearch response.  Are you sure you using version {_version}?");
                            _context.Error(ex, ex.Message);
                        }
                        body = WriteQuery(_fields, _readFrom, _context, scroll: true, from: 0, size: size > ElasticsearchDefaultSizeLimit ? DefaultSize : size);
                    }
                }
            }

            _context.Debug(() => body);
            _context.Entity.Query = body;

            response = scroll
            ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.AddQueryString("scroll", _context.Connection.Scroll))
            : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body);

            if (!response.Success)
            {
                LogError(response);
                yield break;
            }

            try {
                if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties
                {
                    _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"]["value"].Value);
                }
                else
                {
                    _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"].Value);
                }
            } catch (Exception ex) {
                if (!warned)
                {
                    _context.Debug(() => response.Body["hits"]);
                    _context.Warn($"Could not get total number of matching documents from the elasticsearch response.  Are you sure you using version {_version}?");
                    _context.Error(ex.Message);
                }
            }

            hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue;

            if (hits == null || !hits.HasValue)
            {
                _context.Warn("No hits from elasticsearch");
                yield break;
            }

            var docs = hits.Value as IList <object>;

            if (docs == null)
            {
                _context.Error("No documents returned from elasticsearch!");
                yield break;
            }

            // if any of the fields do not exist, yield break
            if (docs.Count > 0)
            {
                var doc    = docs.First() as IDictionary <string, object>;
                var source = doc?["_source"] as IDictionary <string, object>;
                if (source == null)
                {
                    _context.Error("Missing _source from elasticsearch response!");
                    yield break;
                }

                for (var i = 0; i < _fields.Length; i++)
                {
                    if (source.ContainsKey(_fieldNames[i]))
                    {
                        continue;
                    }

                    _context.Error($"Field {_fieldNames[i]} does not exist!");
                    yield break;
                }
            }

            var count = 0;

            foreach (var d in docs)
            {
                var doc    = (IDictionary <string, object>)d;
                var row    = _rowFactory.Create();
                var source = (IDictionary <string, object>)doc["_source"];
                for (var i = 0; i < _fields.Length; i++)
                {
                    row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]);
                }
                yield return(row);
            }
            count += docs.Count;

            // get this from first search response (maybe), unless you have to aggregate it from all...
            foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map)))
            {
                var map     = _context.Process.Maps.First(m => m.Name == filter.Map);
                var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue;
                if (buckets == null || !buckets.HasValue)
                {
                    continue;
                }

                var items = buckets.Value as IEnumerable <object>;

                if (items == null)
                {
                    continue;
                }

                foreach (var item in items.OfType <IDictionary <string, object> >())
                {
                    map.Items.Add(new MapItem {
                        From = $"{item["key"]} ({item["doc_count"]})", To = item["key"]
                    });
                }
            }

            if (!response.Body.ContainsKey("_scroll_id"))
            {
                yield break;
            }

            if (size == count)
            {
                _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value }));
                yield break;
            }

            var scrolls = new HashSet <string>();

            do
            {
                var scrollId = response.Body["_scroll_id"].Value;
                scrolls.Add(scrollId);
                response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = _context.Connection.Scroll, scroll_id = scrollId }));
                if (response.Success)
                {
                    docs = (IList <object>)response.Body["hits"]["hits"].Value;
                    foreach (var d in docs)
                    {
                        var doc    = (IDictionary <string, object>)d;
                        var row    = _rowFactory.Create();
                        var source = (IDictionary <string, object>)doc["_source"];
                        for (var i = 0; i < _fields.Length; i++)
                        {
                            row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]);
                        }
                        yield return(row);
                    }
                    count += docs.Count;
                }
                else
                {
                    LogError(response);
                }
            } while (response.Success && count < size);

            _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() }));
        }