public ElasticReader( IConnectionContext context, Field[] fields, IElasticLowLevelClient client, IRowFactory rowFactory, ReadFrom readFrom ) { _context = context; _fields = fields; _fieldNames = fields.Select(f => _readFrom == ReadFrom.Input ? f.Name : f.Alias.ToLower()).ToArray(); _client = client; _rowFactory = rowFactory; _readFrom = readFrom; _typeName = readFrom == ReadFrom.Input ? context.Entity.Name : context.Entity.Alias.ToLower(); _context.Entity.ReadSize = _context.Entity.ReadSize == 0 ? DefaultSize : _context.Entity.ReadSize; if (_context.Entity.ReadSize > ElasticsearchDefaultSizeLimit) { _context.Warn("Elasticsearch's default size limit is 10000. {0} may be too high.", _context.Entity.ReadSize); } _version = ElasticVersionParser.ParseVersion(_context); }
private IEnumerable <Field> PropertiesToFields(string name, IDictionary <string, object> properties) { if (properties != null) { foreach (var field in properties) { var f = new Field { Name = field.Key }; var attributes = field.Value as IDictionary <string, object>; if (attributes != null && attributes.ContainsKey("type")) { f.Type = attributes["type"].ToString(); if (f.Type == "integer") { f.Type = "int"; } } else { _input.Warn("Could not find type for index {0} type {1} field {2}. Default is string.", _index, name, field.Key); } yield return(f); } } else { _input.Error("Could not find fields for index {0} type {1}.", _index, name); } }
public static Version ParseVersion(IConnectionContext context) { if (context.Connection.Version == Constants.DefaultSetting || context.Connection.Version == string.Empty) { context.Warn("Defaulting to Elasticsearch version 5.0.0"); context.Connection.Version = "5.0.0"; } if (Version.TryParse(context.Connection.Version, out var parsed)) { return(parsed); } context.Warn($"Unable to parse Elasticsearch version {context.Connection.Version}."); context.Connection.Version = "5.0.0"; return(new Version(5, 0, 0, 0)); }
public Batch Read(IEnumerable <IRow> input) { var batch = new Batch(); using (var cn = _cf.GetConnection()) { cn.Open(); _context.Debug(() => "begin transaction"); var trans = cn.BeginTransaction(); try { var createSql = SqlCreateKeysTable(_tempTable); cn.Execute(createSql, null, trans); var index = 0; var keys = new List <ExpandoObject>(); foreach (var row in input) { var obj = row.ToExpandoObject(_keys); ((IDictionary <string, object>)obj)["TflIndex"] = index; keys.Add(obj); ++index; } var insertSql = SqlInsertTemplate(_context, _tempTable, _keys); cn.Execute(insertSql, keys, trans, 0, System.Data.CommandType.Text); var i = _fields.Length; using (var reader = cn.ExecuteReader(SqlQuery(), null, trans, 0, System.Data.CommandType.Text)) { while (reader.Read()) { batch[reader.GetInt32(i)] = _rowCreator.Create(reader, _fields); } } var sqlDrop = SqlDrop(_tempTable); cn.Execute(sqlDrop, null, trans); _context.Debug(() => "commit transaction"); trans.Commit(); } catch (Exception ex) { _context.Error(ex.Message); _context.Warn("rollback transaction"); trans.Rollback(); } } return(batch); }
public IRow Create(IDataReader reader, Field[] fields) { var fieldCount = Math.Min(reader.FieldCount, fields.Length); var row = _rowFactory.Create(); if (_errors == null) // check types { _errors = new bool[fields.Length]; for (var i = 0; i < fieldCount; i++) { _errors[i] = reader.GetFieldType(i) != _typeMap[fields[i].Type]; if (!_errors[i]) { continue; } if (fields[i].Type != "char") { if (_context.Connection.Provider != "sqlite") { _context.Warn("Type mismatch for {0}. Expected {1}, but read {2}.", fields[i].Name, fields[i].Type, reader.GetFieldType(i)); } } } } for (var i = 0; i < fieldCount; i++) { if (reader.IsDBNull(i)) { continue; } if (_errors[i]) { row[fields[i]] = fields[i].Convert(reader.GetValue(i)); } else { row[fields[i]] = reader.GetValue(i); } } return(row); }
public IEnumerable <IRow> Read(IEnumerable <IRow> input) { var results = new List <IRow>(); using (var cn = _cf.GetConnection()) { cn.Open(); _context.Debug(() => "begin transaction"); var trans = cn.BeginTransaction(); try { var createSql = SqlCreateKeysTable(_tempTable); cn.Execute(createSql, null, trans); var keys = input.Select(r => r.ToExpandoObject(_keys)); var insertSql = SqlInsertTemplate(_context, _tempTable, _keys); cn.Execute(insertSql, keys, trans, 0, System.Data.CommandType.Text); using (var reader = cn.ExecuteReader(SqlQuery(), null, trans, 0, System.Data.CommandType.Text)) { while (reader.Read()) { var row = _rowCreator.Create(reader, _fields); results.Add(row); } } var sqlDrop = SqlDrop(_tempTable, _cf); cn.Execute(sqlDrop, null, trans); _context.Debug(() => "commit transaction"); trans.Commit(); } catch (Exception ex) { _context.Error(ex.Message); _context.Warn("rollback transaction"); trans.Rollback(); } } return(results); }
public string Create() { var identifier = Utility.Identifier(_fileInfo.Name.Replace(_fileInfo.Extension, string.Empty)); var quoted = _fileInfo.Extension.ToLower() == ".csv"; var lines = new FileLineReader(_fileInfo, _lines).Read().ToArray(); var delimiters = _context.Connection.Delimiters.Any() ? _context.Connection.Delimiters : new List <Delimiter> { new Delimiter { Character = (_context.Connection.Delimiter.Length == 0 ? ',' : _context.Connection.Delimiter[0]), Name = "Delimiter" } }; var delimiter = Utility.FindDelimiter(lines, delimiters, quoted); var values = lines.First() .SplitLine(delimiter, quoted) .Select(c => c.Trim('"')) .Select(c => c.Trim()) .ToArray(); // substitute blank headers with excel column names (useful when some of the column headers are blank) for (var i = 0; i < values.Length; i++) { if (values[i] == string.Empty) { values[i] = Utility.GetExcelName(i); } } var hasColumnNames = ColumnNames.AreValid(_context, values); var fieldNames = hasColumnNames ? values : ColumnNames.Generate(values.Length).ToArray(); var connection = new Connection { Name = "input", Provider = "file", File = _fileInfo.FullName, Delimiter = delimiter == default(char) ? "," : delimiter.ToString(), Start = hasColumnNames ? 2 : 1, Types = _context.Connection.Types }; var process = new Process { Name = "FileInspector", ReadOnly = true, Connections = new List <Connection> { connection } }; process.Entities.Add(new Entity { Name = identifier, Input = "input", PrependProcessNameToOutputName = false, Sample = Convert.ToInt32(_context.Connection.Sample) }); foreach (var name in fieldNames) { process.Entities[0].Fields.Add(new Field { Name = name, Alias = Constants.InvalidFieldNames.Contains(name) ? identifier + name : name, Length = "max" }); } process.Load(); foreach (var warning in process.Warnings()) { _context.Warn(warning); } foreach (var error in process.Errors()) { _context.Error(error); } return(process.Serialize()); }
public IRow Create(IDataReader reader, Field[] fields) { var row = _rowFactory.Create(); if (_fieldCount == 0) { _fieldCount = Math.Min(reader.FieldCount, fields.Length); _conversions = new List <Func <object, object> >(_fieldCount); for (var i = 0; i < _fieldCount; i++) { _conversions.Add(null); } _errors = new bool[fields.Length]; for (var i = 0; i < _fieldCount; i++) { var inputType = reader.GetFieldType(i); _errors[i] = inputType != _typeMap[fields[i].Type]; if (_errors[i]) { if (fields[i].Transforms.Any() && fields[i].Transforms.First().Method == "convert") { _conversions[i] = o => o; // the user has set a conversion } else { _conversions[i] = fields[i].Convert; _context.Warn("Type mismatch for {0}. Expected {1}, but read {2}. Change type or add conversion.", fields[i].Name, fields[i].Type, inputType); } } else { _conversions[i] = o => o; } } for (var i = 0; i < _fieldCount; i++) { if (reader.IsDBNull(i)) { continue; } if (_errors[i]) { var value = reader.GetValue(i); try { row[fields[i]] = fields[i].Type == "object" ? value : _conversions[i](value); } catch (FormatException) { _context.Error($"Could not convert value {value} in field {fields[i].Alias} to {fields[i].Type}"); } } else { row[fields[i]] = reader.GetValue(i); } } } else { for (var i = 0; i < _fieldCount; i++) { if (reader.IsDBNull(i)) { continue; } if (_errors[i]) { row[fields[i]] = fields[i].Type == "object" ? reader.GetValue(i) : _conversions[i](reader.GetValue(i)); } else { row[fields[i]] = reader.GetValue(i); } } } return(row); }
public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; ElasticsearchDynamicValue hits; var from = 1; var size = 10; string body; if (_context.Entity.IsPageRequest()) { from = (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize; body = WriteQuery(_fields, _readFrom, _context, from, _context.Entity.PageSize); } else { body = WriteQuery(_fields, _readFrom, _context, 0, 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var properties = hits.Value as IDictionary <string, object>; if (properties != null && properties.ContainsKey("total")) { size = Convert.ToInt32(properties["total"]); body = WriteQuery(_fields, _readFrom, _context, 0, size > 10000 ? 10000 : size); } } } } _context.Debug(() => body); _context.Entity.Query = body; // move 10000 to configurable limit response = from + size > 10000 ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.Scroll(TimeSpan.FromMinutes(1.0))) : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (!response.Success) { LogError(response); yield break; } _context.Entity.Hits = Convert.ToInt32((response.Body["hits"]["total"] as ElasticsearchDynamicValue).Value); hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits == null || !hits.HasValue) { _context.Warn("No hits from elasticsearch"); yield break; } var docs = hits.Value as IList <object>; if (docs == null) { _context.Error("No documents returned from elasticsearch!"); yield break; } // if any of the fields do not exist, yield break if (docs.Count > 0) { var doc = docs.First() as IDictionary <string, object>; var source = doc?["_source"] as IDictionary <string, object>; if (source == null) { _context.Error("Missing _source from elasticsearch response!"); yield break; } for (var i = 0; i < _fields.Length; i++) { if (source.ContainsKey(_fieldNames[i])) { continue; } _context.Error($"Field {_fieldNames[i]} does not exist!"); yield break; } } var count = 0; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } _context.Increment(); yield return(row); } count += docs.Count; // get this from first search response (maybe), unless you have to aggregate it from all... foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }); } } if (!response.Body.ContainsKey("_scroll_id")) { yield break; } if (size == count) { _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value })); yield break; } var scrolls = new HashSet <string>(); do { var scrollId = response.Body["_scroll_id"].Value; scrolls.Add(scrollId); response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = "1m", scroll_id = scrollId })); if (response.Success) { docs = (IList <object>)response.Body["hits"]["hits"].Value; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } _context.Increment(); yield return(row); } count += docs.Count; } else { LogError(response); } } while (response.Success && count < size); _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() })); }
public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; ElasticsearchDynamicValue hits; var from = 0; var size = 10; string body; bool warned = false; var scroll = !_context.Entity.IsPageRequest(); if (!scroll) { from = (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size; body = WriteQuery(_fields, _readFrom, _context, scroll: false, from: from, size: _context.Entity.Size); } else { body = WriteQuery(_fields, _readFrom, _context, scroll: false, from: 0, size: 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var total = hits["total"]; try { if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties { size = Convert.ToInt32(total["value"].Value); } else { size = Convert.ToInt32(total.Value); } } catch (Exception ex) { warned = true; _context.Debug(() => total); _context.Warn($"Could not get total number of matching documents from the elasticsearch response. Are you sure you using version {_version}?"); _context.Error(ex, ex.Message); } body = WriteQuery(_fields, _readFrom, _context, scroll: true, from: 0, size: size > ElasticsearchDefaultSizeLimit ? DefaultSize : size); } } } _context.Debug(() => body); _context.Entity.Query = body; response = scroll ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.AddQueryString("scroll", _context.Connection.Scroll)) : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (!response.Success) { LogError(response); yield break; } try { if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties { _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"]["value"].Value); } else { _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"].Value); } } catch (Exception ex) { if (!warned) { _context.Debug(() => response.Body["hits"]); _context.Warn($"Could not get total number of matching documents from the elasticsearch response. Are you sure you using version {_version}?"); _context.Error(ex.Message); } } hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits == null || !hits.HasValue) { _context.Warn("No hits from elasticsearch"); yield break; } var docs = hits.Value as IList <object>; if (docs == null) { _context.Error("No documents returned from elasticsearch!"); yield break; } // if any of the fields do not exist, yield break if (docs.Count > 0) { var doc = docs.First() as IDictionary <string, object>; var source = doc?["_source"] as IDictionary <string, object>; if (source == null) { _context.Error("Missing _source from elasticsearch response!"); yield break; } for (var i = 0; i < _fields.Length; i++) { if (source.ContainsKey(_fieldNames[i])) { continue; } _context.Error($"Field {_fieldNames[i]} does not exist!"); yield break; } } var count = 0; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } yield return(row); } count += docs.Count; // get this from first search response (maybe), unless you have to aggregate it from all... foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }); } } if (!response.Body.ContainsKey("_scroll_id")) { yield break; } if (size == count) { _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value })); yield break; } var scrolls = new HashSet <string>(); do { var scrollId = response.Body["_scroll_id"].Value; scrolls.Add(scrollId); response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = _context.Connection.Scroll, scroll_id = scrollId })); if (response.Success) { docs = (IList <object>)response.Body["hits"]["hits"].Value; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } yield return(row); } count += docs.Count; } else { LogError(response); } } while (response.Success && count < size); _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() })); }