public IEnumerable <IRow> Read() { var encoding = Encoding.GetEncoding(_context.Connection.Encoding); var lineNo = 0; if (System.IO.Path.GetExtension(_context.Connection.File) == ".xml") { var row = _rowFactory.Create(); row[_field] = System.IO.File.ReadAllText(_context.Connection.File, encoding); yield return(row); } else { foreach (var line in System.IO.File.ReadLines(_context.Connection.File, encoding)) { ++lineNo; if (lineNo < _context.Connection.Start) { continue; } var row = _rowFactory.Create(); row[_field] = line; yield return(row); } } }
public IRow Create(IDataReader reader, Field[] fields) { var fieldCount = Math.Min(reader.FieldCount, fields.Length); var row = _rowFactory.Create(); for (var i = 0; i < fieldCount; i++) { var field = fields[i]; if (field.Type == "string") { if (reader.GetFieldType(i) == typeof(string)) { row[field] = reader.IsDBNull(i) ? null : reader.GetString(i); } else { TypeMismatch(field, reader, i); var value = reader.GetValue(i); row[field] = value == DBNull.Value ? null : value; } } else { var value = reader.GetValue(i); row[field] = value == DBNull.Value ? null : value; } } return(row); }
public IEnumerable <IRow> Read() { var fields = _input.Entity.Fields.Where(f => f.Input).ToArray(); var rows = new List <IRow>(); foreach (var row in _input.Entity.Rows) { var typed = _rowFactory.Create(); foreach (var field in fields) { if (row.Map.ContainsKey(field.Name)) { typed[field] = field.Convert(row[field.Name]); } else { if (_missing.Add(field.Name)) { _input.Warn($"An internal row in {_input.Entity.Alias} is missing the field {field.Name}."); } } } _input.Increment(); rows.Add(typed); } _input.Entity.Hits = rows.Count; return(rows); }
private IEnumerable <IRow> PreRead() { var rows = new List <IRow>(); foreach (var row in _input.Entity.Rows) { var stringRow = _rowFactory.Create(); foreach (var field in _fields) { if (row.Map.ContainsKey(field.Name)) { stringRow[field] = row[field.Name]; } else { if (_missing.Add(field.Name)) { _input.Warn($"An internal row in {_input.Entity.Alias} is missing the field {field.Name}."); } } } rows.Add(stringRow); } _input.Entity.Hits = rows.Count; return(rows); }
public IEnumerable <IRow> Read() { if (!_run) { yield break; } do { var response = _client.DescribeLogGroupsAsync().Result; foreach (var logGroup in response.LogGroups) { var row = _rowFactory.Create(); foreach (var field in _context.InputFields) { var name = field.Name.ToLower(); switch (name) { case "arn": row[field] = logGroup.Arn ?? string.Empty; break; case "creationtime": row[field] = logGroup.CreationTime; break; case "kmskeyid": row[field] = logGroup.KmsKeyId ?? string.Empty; break; case "loggroupname": row[field] = logGroup.LogGroupName ?? string.Empty; break; case "metricfiltercount": row[field] = logGroup.MetricFilterCount; break; case "retentionindays": row[field] = logGroup.RetentionInDays.GetValueOrDefault(); break; case "storedbytes": row[field] = logGroup.StoredBytes; break; default: break; } } yield return(row); } _request.NextToken = response.NextToken; } while (!string.IsNullOrEmpty(_request.NextToken)); }
public IEnumerable <IRow> Read() { if (_inputField == null) { _input.Error("You must have one input field for console provider input."); yield break; } // Start the child process. using (var p = new Process { StartInfo = { UseShellExecute = false, RedirectStandardOutput = true, FileName = _input.Connection.Command, Arguments = _input.Connection.Arguments } }) { if (_input.Connection.Folder != string.Empty) { p.StartInfo.WorkingDirectory = _input.Connection.Folder; } // Redirect the output stream of the child process. p.Start(); // Do not wait for the child process to exit before // reading to the end of its redirected stream. // p.WaitForExit(); // Read the output stream first and then wait. var output = p.StandardOutput.ReadToEnd(); var lineNumber = 1; foreach (var line in new LineReader(output).Read()) { if (line == string.Empty || lineNumber < _input.Connection.Start) { lineNumber++; continue; } if (_input.Connection.End > 0 && lineNumber > _input.Connection.End) { yield break; } var row = _rowFactory.Create(); row[_inputField] = line; lineNumber++; yield return(row); } p.WaitForExit(); }; }
public IEnumerable <IRow> Read() { var types = Constants.TypeDefaults(); var row = _rowFactory.Create(); foreach (var field in _context.Entity.GetAllFields()) { row[field] = field.Convert(field.Default == Constants.DefaultSetting ? types[field.Type] : field.Convert(field.Default)); } yield return(row); }
public IEnumerable <IRow> Read() { _context.Debug(() => "Reading file stream."); var start = _context.Connection.Start; var end = 0; if (_context.Entity.IsPageRequest()) { start += (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size; end = start + _context.Entity.Size; } var current = _context.Connection.Start; var engine = FileHelpersEngineFactory.Create(_context); using (engine.BeginReadStream(new StreamReader(_stream))) { foreach (var record in engine) { if (end == 0 || current.Between(start, end)) { var values = engine.LastRecordValues; var row = _rowFactory.Create(); for (var i = 0; i < _context.InputFields.Length; i++) { var field = _context.InputFields[i]; if (field.Type == "string") { row[field] = values[i] as string; } else { row[field] = field.Convert(values[i]); } } yield return(row); } ++current; if (current == end) { break; } } } if (engine.ErrorManager.HasErrors) { foreach (var error in engine.ErrorManager.Errors) { _context.Error(error.ExceptionInfo.Message); } } }
public IEnumerable <IRow> Read() { var searchOption = (SearchOption)Enum.Parse(typeof(SearchOption), _input.Connection.SearchOption, true); _input.Info($"Searching folder: {_input.Connection.Folder}"); var files = new DirectoryInfo(_input.Connection.Folder).GetFiles(_input.Connection.SearchPattern, searchOption); _input.Info($"Found {files.Length} files."); var names = _input.InputFields.Select(f => f.Name.ToLower()).ToArray(); foreach (var file in files) { var row = _rowFactory.Create(); for (var i = 0; i < _input.InputFields.Length; i++) { var field = _input.InputFields[i]; switch (names[i]) { case "creationtimeutc": row[field] = file.CreationTimeUtc; break; case "directoryname": row[field] = file.DirectoryName; break; case "extension": row[field] = file.Extension; break; case "fullname": row[field] = file.FullName; break; case "lastwritetimeutc": row[field] = file.LastWriteTimeUtc; break; case "length": row[field] = file.Length; break; case "name": row[field] = file.Name; break; } } yield return(row); } }
public IEnumerable <IRow> Read() { var row = _rowFactory.Create(); var lookup = new Dictionary <string, Parameter>(); foreach (var parameter in _input.Process.Parameters) { lookup[parameter.Name] = parameter; } foreach (var field in _fields) { var fallBack = field.DefaultValue(); if (lookup.ContainsKey(field.Name)) { var p = lookup[field.Name]; if (_parameters.ContainsKey(field.Name)) { if (_parameters[field.Name] == null) { row[field] = fallBack; } else { row[field] = Convert(p, _parameters[field.Name]); } _parameters.Remove(field.Name); // since parameter may be transformed and placed in value, we discard the original } else { if (p.Value == null) { row[field] = fallBack; } else { row[field] = Convert(p, p.Value); } } } else { row[field] = fallBack; } } _input.Entity.Hits = 1; yield return(row); yield break; }
public IEnumerable<IRow> Read() { if (!_run) { yield break; } do { var response = _client.ListInstancesAsync(_request).Result; foreach (var inst in response.InstanceSummaryList) { var row = _rowFactory.Create(); foreach (var field in _context.InputFields) { var name = field.Name.ToLower(); switch (name) { case "arn": row[field] = inst.Arn ?? string.Empty; break; case "createdtime": row[field] = inst.CreatedTime; break; case "id": row[field] = inst.Id; break; case "identitymanagementtype": row[field] = inst.IdentityManagementType.Value ?? string.Empty; break; case "inboundcallsenabled": row[field] = inst.InboundCallsEnabled; break; case "instancealias": row[field] = inst.InstanceAlias ?? string.Empty; break; case "instancestatus": row[field] = inst.InstanceStatus.Value ?? string.Empty; break; case "outboundcallsenabled": row[field] = inst.OutboundCallsEnabled; break; case "servicerole": row[field] = inst.ServiceRole ?? string.Empty; break; default: break; } } yield return row; } _request.NextToken = response.NextToken; } while (!string.IsNullOrEmpty(_request.NextToken)); }
public IEnumerable <IRow> Read() { var row = _rowFactory.Create(); try { row[_field] = _client.DownloadString(_context.Connection.Url); } catch (Exception ex) { _context.Error(ex.Message); _context.Debug(() => ex.StackTrace); yield break; } yield return(row); }
public IEnumerable <IRow> Read() { ErrorMode errorMode; Enum.TryParse(_context.Connection.ErrorMode, true, out errorMode); var engine = new FileHelperAsyncEngine(_builder.CreateRecordClass()); engine.ErrorManager.ErrorMode = errorMode; engine.ErrorManager.ErrorLimit = _context.Connection.ErrorLimit; _context.Debug(() => $"Reading {_fileInfo.Name}."); using (engine.BeginReadFile(_fileInfo.FullName)) { foreach (var record in engine) { var values = engine.LastRecordValues; var row = _rowFactory.Create(); for (var i = 0; i < _context.InputFields.Length; i++) { var field = _context.InputFields[i]; if (field.Type == "string") { row[field] = values[i] as string; } else { row[field] = field.Convert(values[i]); } } if (_rowCondition.Eval(row)) { yield return(row); } } if (engine.ErrorManager.HasErrors) { foreach (var error in engine.ErrorManager.Errors) { _context.Error(error.ExceptionInfo.Message); } } } }
public IRow Create(IDataReader reader, Field[] fields) { var fieldCount = Math.Min(reader.FieldCount, fields.Length); var row = _rowFactory.Create(); if (_errors == null) // check types { _errors = new bool[fields.Length]; for (var i = 0; i < fieldCount; i++) { _errors[i] = reader.GetFieldType(i) != _typeMap[fields[i].Type]; if (!_errors[i]) { continue; } if (fields[i].Type != "char") { if (_context.Connection.Provider != "sqlite") { _context.Warn("Type mismatch for {0}. Expected {1}, but read {2}.", fields[i].Name, fields[i].Type, reader.GetFieldType(i)); } } } } for (var i = 0; i < fieldCount; i++) { if (reader.IsDBNull(i)) { continue; } if (_errors[i]) { row[fields[i]] = fields[i].Convert(reader.GetValue(i)); } else { row[fields[i]] = reader.GetValue(i); } } return(row); }
public IEnumerable <IRow> Read() { if (!_run) { yield break; } do { var response = _client.ListUsersAsync(_request).Result; foreach (var inst in response.UserSummaryList) { var row = _rowFactory.Create(); foreach (var field in _context.InputFields) { var name = field.Name.ToLower(); switch (name) { case "arn": row[field] = inst.Arn ?? string.Empty; break; case "id": row[field] = inst.Id ?? string.Empty; break; case "username": row[field] = inst.Username ?? string.Empty; break; default: break; } } yield return(row); } _request.NextToken = response.NextToken; } while (!string.IsNullOrEmpty(_request.NextToken)); }
public IEnumerable <IRow> Read() { if (_inputField == null) { _input.Error("You must have one input field for console provider input."); yield break; } if (!System.Console.IsInputRedirected) { yield break; } string line; var lineNumber = 1; while ((line = System.Console.In.ReadLine()) != null) { if (line == string.Empty || lineNumber < _input.Connection.Start) { lineNumber++; continue; } if (_input.Connection.End > 0 && lineNumber > _input.Connection.End) { yield break; } var row = _rowFactory.Create(); row[_inputField] = line; lineNumber++; yield return(row); } }
public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; ElasticsearchDynamicValue hits; var from = 1; var size = 10; string body; if (_context.Entity.IsPageRequest()) { from = (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize; body = WriteQuery(_fields, _readFrom, _context, from, _context.Entity.PageSize); } else { body = WriteQuery(_fields, _readFrom, _context, 0, 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var properties = hits.Value as IDictionary <string, object>; if (properties != null && properties.ContainsKey("total")) { size = Convert.ToInt32(properties["total"]); body = WriteQuery(_fields, _readFrom, _context, 0, size > 10000 ? 10000 : size); } } } } _context.Debug(() => body); _context.Entity.Query = body; // move 10000 to configurable limit response = from + size > 10000 ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.Scroll(TimeSpan.FromMinutes(1.0))) : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (!response.Success) { LogError(response); yield break; } _context.Entity.Hits = Convert.ToInt32((response.Body["hits"]["total"] as ElasticsearchDynamicValue).Value); hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits == null || !hits.HasValue) { _context.Warn("No hits from elasticsearch"); yield break; } var docs = hits.Value as IList <object>; if (docs == null) { _context.Error("No documents returned from elasticsearch!"); yield break; } // if any of the fields do not exist, yield break if (docs.Count > 0) { var doc = docs.First() as IDictionary <string, object>; var source = doc?["_source"] as IDictionary <string, object>; if (source == null) { _context.Error("Missing _source from elasticsearch response!"); yield break; } for (var i = 0; i < _fields.Length; i++) { if (source.ContainsKey(_fieldNames[i])) { continue; } _context.Error($"Field {_fieldNames[i]} does not exist!"); yield break; } } var count = 0; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } _context.Increment(); yield return(row); } count += docs.Count; // get this from first search response (maybe), unless you have to aggregate it from all... foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }); } } if (!response.Body.ContainsKey("_scroll_id")) { yield break; } if (size == count) { _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value })); yield break; } var scrolls = new HashSet <string>(); do { var scrollId = response.Body["_scroll_id"].Value; scrolls.Add(scrollId); response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = "1m", scroll_id = scrollId })); if (response.Success) { docs = (IList <object>)response.Body["hits"]["hits"].Value; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } _context.Increment(); yield return(row); } count += docs.Count; } else { LogError(response); } } while (response.Success && count < size); _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() })); }
public IEnumerable <IRow> Read() { _context.Debug(() => $"Reading {_fileInfo.Name}."); var start = _context.Connection.Start; var end = 0; if (_context.Entity.IsPageRequest()) { start += (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size; end = start + _context.Entity.Size; } var current = _context.Connection.Start; var engine = FileHelpersEngineFactory.Create(_context); IDisposable reader; try { reader = engine.BeginReadFile(_fileInfo.FullName); } catch (Exception ex) { _context.Error(ex.Message); yield break; } using (reader) { foreach (var record in engine) { if (end == 0 || current.Between(start, end)) { var values = engine.LastRecordValues; var row = _rowFactory.Create(); for (var i = 0; i < _context.InputFields.Length; i++) { var field = _context.InputFields[i]; row[field] = values[i]; } if (_fileField != null) { row[_fileField] = _context.Connection.File; } yield return(row); } ++current; if (current == end) { break; } } } if (engine.ErrorManager.HasErrors) { foreach (var error in engine.ErrorManager.Errors) { _context.Error(error.ExceptionInfo.Message); _context.Error($"Error processing line {error.LineNumber} in {_context.Connection.File}."); _context.Warn(error.RecordString.Replace("{", "{{").Replace("}", "}}")); } } }
public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; string body; if (_context.Entity.IsPageRequest()) { var from = (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize; body = WriteQuery(_fields, _readFrom, _context, from, _context.Entity.PageSize); } else { body = WriteQuery(_fields, _readFrom, _context, 0, 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { var hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var properties = hits.Value as IDictionary <string, object>; if (properties != null && properties.ContainsKey("total")) { var size = Convert.ToInt32(properties["total"]) + 1; body = WriteQuery(_fields, _readFrom, _context, 0, size); } } } } _context.Debug(() => body); _context.Entity.Query = body; response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { _context.Entity.Hits = Convert.ToInt32((response.Body["hits"]["total"] as ElasticsearchDynamicValue).Value); var hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var docs = hits.Value as IEnumerable <object>; if (docs != null) { foreach (var doc in docs.OfType <IDictionary <string, object> >()) { var row = _rowFactory.Create(); if (doc != null && doc.ContainsKey("_source")) { var source = doc["_source"] as IDictionary <string, object>; if (source != null) { for (var i = 0; i < _fields.Length; i++) { var field = _fields[i]; row[field] = field.Convert(source[_fieldNames[i]]); } } } _context.Increment(); yield return(row); } } } foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }.WithDefaults()); } } } else { _context.Error(response.DebugInformation); } }
public IEnumerable <IRow> Read() { AbstractSolrQuery query = SolrQuery.All; var filterQueries = new Collection <ISolrQuery>(); var facetQueries = new Collection <ISolrFacetQuery>(); if (_context.Entity.Filter.Any()) { var queries = new Collection <ISolrQuery>(); foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "search" && f.Value != "*")) { if (filter.Field == string.Empty) { queries.Add(new SolrQuery(filter.Expression)); } else { foreach (var term in Terms(filter.Value)) { queries.Add(new SolrQueryByField(filter.Field, term) { Quoted = false }); } } } query = queries.Any() ? new SolrMultipleCriteriaQuery(queries, "AND") : SolrQuery.All; foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "filter")) { if (filter.Field == string.Empty) { filterQueries.Add(new SolrQuery(filter.Expression)); } else { if (filter.Value != "*") { foreach (var term in Terms(filter.Value)) { queries.Add(new SolrQueryByField(filter.Field, term) { Quoted = false }); } } } } foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet")) { facetQueries.Add(new SolrFacetFieldQuery(filter.Field) { MinCount = filter.Min, Limit = filter.Size }); if (filter.Value != "*") { if (filter.Value.IndexOf(',') > 0) { filterQueries.Add(new SolrQueryInList(filter.Field, filter.Value.Split(new[] { ',' }))); } else { filterQueries.Add(new SolrQueryByField(filter.Field, filter.Value)); } } } } int rows; StartOrCursor startOrCursor; if (_context.Entity.IsPageRequest()) { rows = _context.Entity.PageSize; startOrCursor = new StartOrCursor.Start((_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize); } else { rows = _context.Entity.ReadSize > 0 ? _context.Entity.ReadSize : _solr.Query(query, new QueryOptions { StartOrCursor = new StartOrCursor.Start(0), Rows = 0 }).NumFound; startOrCursor = _context.Entity.ReadSize == 0 ? (StartOrCursor) new StartOrCursor.Start(0) : StartOrCursor.Cursor.Start; } var sortOrder = new Collection <SortOrder>(); foreach (var orderBy in _context.Entity.Order) { Field field; if (_context.Entity.TryGetField(orderBy.Field, out field)) { var name = field.SortField.ToLower(); sortOrder.Add(new SortOrder(name, orderBy.Sort == "asc" ? SolrNet.Order.ASC : SolrNet.Order.DESC)); } } sortOrder.Add(new SortOrder("score", SolrNet.Order.DESC)); var result = _solr.Query( query, new QueryOptions { StartOrCursor = startOrCursor, Rows = rows, Fields = _fieldNames, OrderBy = sortOrder, FilterQueries = filterQueries, Facet = new FacetParameters { Queries = facetQueries, Sort = false } } ); foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet")) { if (result.FacetFields.ContainsKey(filter.Field)) { var facet = result.FacetFields[filter.Field]; var map = _context.Process.Maps.First(m => m.Name == filter.Map); foreach (var f in facet) { map.Items.Add(new MapItem { From = $"{f.Key} ({f.Value})", To = f.Key }); } } } if (result.NumFound <= 0) { yield break; } _context.Entity.Hits = result.NumFound; foreach (var row in result.Select(x => DocToRow(_rowFactory.Create(), _fields, x))) { _context.Increment(); ++_localCount; yield return(row); } // using cursor, solr 4.7+ (un-tested) while (result.NextCursorMark != null) { result = _solr.Query( query, new QueryOptions { StartOrCursor = result.NextCursorMark, Rows = _context.Entity.ReadSize, Fields = _fieldNames, OrderBy = sortOrder, FilterQueries = filterQueries, Facet = new FacetParameters { Queries = facetQueries, Sort = false } } ); foreach (var row in result.Select(r => DocToRow(_rowFactory.Create(), _fields, r))) { _context.Increment(); ++_localCount; yield return(row); } } // traditional paging if (_context.Entity.ReadSize == 0 || _localCount >= result.NumFound) { yield break; } var pages = result.NumFound / _context.Entity.ReadSize; for (var page = 1; page <= pages; page++) { result = _solr.Query( query, new QueryOptions { StartOrCursor = new StartOrCursor.Start(page * _context.Entity.ReadSize), Rows = _context.Entity.ReadSize, Fields = _fieldNames, OrderBy = sortOrder, FilterQueries = filterQueries, Facet = new FacetParameters { Queries = facetQueries, Sort = false } } ); foreach (var row in result.Select(r => DocToRow(_rowFactory.Create(), _fields, r))) { ++_localCount; _context.Increment(); yield return(row); } } }
private IEnumerable <IRow> Flatten(string key, object obj, LinkedList <IRow> results = null) { IField field = _fields.ContainsKey(key) ? _fields[key] : null; // in the beginning, create an empty result if (results == null) { results = new LinkedList <IRow>(); } if (obj == null) { return(results); } var dict = obj as IDictionary <string, object>; if (dict == null) { var list = obj as IList <object>; if (list != null) { foreach (var item in list) { results.AddLast(_rowFactory.Create()); Flatten(key, item, results); } } } else { if (dict.Count == 1 && dict.ContainsKey("value")) { if (field == null) { _missing.Add(key); } else { // set a field value var value = dict["value"]; if (results.Last.Value[field] == null) { results.Last.Value[field] = value; } else { // this must be a total var total = _rowFactory.Create(); total[field] = value; results.AddLast(total); } } } else { if (dict.ContainsKey("buckets")) { Flatten(key, dict["buckets"], results); } else { if (dict.ContainsKey("key") && dict.ContainsKey("doc_count")) { foreach (var pair in dict) { if (pair.Key == "key") { if (field == null) { _missing.Add(key); } else { // set a key field value results.Last()[field] = pair.Value; } } else { if (pair.Key != "doc_count") { Flatten(pair.Key, pair.Value, results); } } } } else { foreach (var pair in dict) { Flatten(pair.Key, pair.Value, results); } } } } } if (!_missing.Any()) { return(results); } foreach (var missing in _missing) { _context.Warn($"The query returns field {missing}, but you do not have that field defined in {_context.Entity.Alias}."); } return(results); }
public override IEnumerable <IRow> Transform(IEnumerable <IRow> rows) { foreach (var row in rows) { var outerRow = row; var innerRow = _rowFactory.Create(); foreach (var field in _fields) { innerRow[field] = field.Default == Constants.DefaultSetting ? _typeDefaults[field.Type] : field.Convert(field.Default); } var innerRows = new List <IRow>(); string startKey = null; var xml = row[_input] as string; if (!string.IsNullOrEmpty(xml)) { xml = xml.Trim(); using (var reader = XmlReader.Create(new StringReader(xml), Settings)) { if (_findRoot) { do { reader.Read(); } while (reader.Name != _root); } else { reader.Read(); } do { if (_nameMap.ContainsKey(reader.Name)) { // must while here because reader.Read*Xml advances the reader while (_nameMap.ContainsKey(reader.Name) && reader.IsStartElement()) { InnerRow(ref startKey, reader.Name, ref innerRow, ref outerRow, ref innerRows); var field = _nameMap[reader.Name]; var value = field.ReadInnerXml ? reader.ReadInnerXml() : reader.ReadOuterXml(); if (value != string.Empty) { innerRow[field] = field.Convert(value); } } } else if (_searchAttributes && reader.HasAttributes) { for (var i = 0; i < reader.AttributeCount; i++) { reader.MoveToNextAttribute(); if (!_nameMap.ContainsKey(reader.Name)) { continue; } InnerRow(ref startKey, reader.Name, ref innerRow, ref outerRow, ref innerRows); var field = _nameMap[reader.Name]; if (!string.IsNullOrEmpty(reader.Value)) { innerRow[field] = field.Convert(reader.Value); } } } if (_findRoot && !reader.IsStartElement() && reader.Name == _root) { break; } } while (reader.Read()); } } AddInnerRow(ref innerRow, ref outerRow, ref innerRows); foreach (var r in innerRows) { yield return(r); } } }
public IEnumerable <IRow> Read() { IRow row = null; Field field = null; var types = Constants.TypeSystem(); var current = 0; var start = 0; var end = 0; if (_context.Entity.IsPageRequest()) { start += (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size; end = start + _context.Entity.Size; } var textReader = new StreamReader(_stream); var reader = new JsonTextReader(textReader); var textWriter = new StringWriter(); var jsonWriter = new JsonTextWriter(textWriter); while (reader.Read()) { switch (reader.TokenType) { case JsonToken.StartObject: if (reader.Depth == 1) { row = _rowFactory.Create(); } if (reader.Depth > 1) { jsonWriter.WriteStartObject(); } break; case JsonToken.EndObject: if (reader.Depth == 1) { if (end == 0 || current.Between(start, end)) { yield return(row); } ++current; if (current == end) { yield break; } } if (reader.Depth > 1 && field != null) { jsonWriter.WriteEndObject(); jsonWriter.Flush(); row[field] = textWriter.ToString(); field = null; textWriter = new StringWriter(); jsonWriter = new JsonTextWriter(textWriter); } break; case JsonToken.StartArray: if (reader.Depth > 0) { jsonWriter.WriteStartArray(); } break; case JsonToken.EndArray: if (reader.Depth > 0) { jsonWriter.WriteEndArray(); } break; case JsonToken.PropertyName: var name = (string)reader.Value; if (_fieldLookup.ContainsKey(name)) { if (reader.Depth == 2) { field = _fieldLookup[name]; } } if (reader.Depth > 2) { jsonWriter.WritePropertyName(name); } break; default: if (reader.Depth == 2 && reader.Value != null && field != null) { if (types[field.Type] == reader.ValueType) { row[field] = reader.Value; } else { row[field] = field.Convert(reader.Value); } field = null; } if (reader.Depth > 2 && reader.Value != null) { jsonWriter.WriteValue(reader.Value); } break; } } _context.Entity.Hits = current; }
public IEnumerable <IRow> Read() { // TODO: fix major duplication here using (var fileStream = File.Open(_fileInfo.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { var isBinary = _fileInfo.Extension.ToLower() == ".xls"; using (var reader = isBinary ? ExcelReaderFactory.CreateBinaryReader(fileStream) : ExcelReaderFactory.CreateOpenXmlReader(fileStream)) { int index = 0; if (reader == null) { yield break; } var start = _context.Connection.Start; var end = 0; if (_context.Entity.IsPageRequest()) { start += (_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize; end = start + _context.Entity.PageSize; } for (var i = 0; i < start; i++) { reader.Read(); ++index; } var readerHasData = false; var emptyDetector = new StringBuilder(); if (reader.Read()) { ++index; readerHasData = true; if (end > 0 && end <= index) { _context.Entity.Hits++; } else { emptyDetector.Clear(); var row = _rowFactory.Create(); for (var i = 0; i < _context.InputFields.Length; i++) { var field = _context.InputFields[i]; var expected = Constants.TypeSystem()[field.Type]; var actual = reader.IsDBNull(i) ? null : reader.GetValue(i); if (_context.Entity.DataTypeWarnings) { if (actual != null && expected != actual.GetType()) { _context.Warn($"The {field.Alias} field in {_context.Entity.Alias} expects a {expected}, but is reading a ({actual.GetType().Name}){actual}."); } } if (field.Type == "string") { row[field] = reader.IsDBNull(i) ? null : reader.GetValue(i).ToString(); } else { row[field] = reader.IsDBNull(i) ? null : field.Convert(reader.GetValue(i)); } emptyDetector.Append(row[field]); } emptyDetector.Trim(" "); if (!emptyDetector.ToString().Equals(string.Empty)) { _context.Entity.Hits++; yield return(row); } } } if (readerHasData) { while (reader.Read()) { ++index; if (end > 0 && end <= index) { _context.Entity.Hits++; } else { emptyDetector.Clear(); var row = _rowFactory.Create(); for (var i = 0; i < _context.InputFields.Length; i++) { var field = _context.InputFields[i]; if (field.Type == "string") { row[field] = reader.IsDBNull(i) ? null : reader.GetString(i); } else { row[field] = reader.IsDBNull(i) ? null : field.Convert(reader.GetValue(i)); } emptyDetector.Append(row[field]); } emptyDetector.Trim(" "); if (!emptyDetector.ToString().Equals(string.Empty)) { _context.Entity.Hits++; yield return(row); } } } } else { // try read from data set var dr = reader.AsDataSet().CreateDataReader(); for (var i = 0; i < start; i++) { dr.Read(); ++index; } // check type on first read if (dr.Read()) { ++index; if (end <= index) { _context.Entity.Hits++; } else { emptyDetector.Clear(); var row = _rowFactory.Create(); for (var i = 0; i < _context.InputFields.Length; i++) { var field = _context.InputFields[i]; var expected = Constants.TypeSystem()[field.Type]; var actual = dr.GetValue(i).GetType(); if (expected != actual) { _context.Warn($"The {field.Alias} field in {_context.Entity.Alias} expects a {expected}, but is reading a {actual}."); } if (field.Type == "string") { row[field] = dr.GetValue(i) as string; } else { row[field] = field.Convert(dr.GetValue(i)); } emptyDetector.Append(row[field]); } emptyDetector.Trim(" "); if (!emptyDetector.ToString().Equals(string.Empty)) { _context.Entity.Hits++; yield return(row); } } } while (dr.Read()) { emptyDetector.Clear(); var row = _rowFactory.Create(); for (var i = 0; i < _context.InputFields.Length; i++) { var field = _context.InputFields[i]; if (field.Type == "string") { row[field] = dr.GetString(i); } else { row[field] = field.Convert(dr.GetValue(i)); } emptyDetector.Append(row[field]); } emptyDetector.Trim(" "); if (!emptyDetector.ToString().Equals(string.Empty)) { _context.Entity.Hits++; yield return(row); } } } } } }
public IEnumerable <IRow> Read() { var stream = _client.OpenRead(_context.Connection.Url); if (stream == null) { _context.Error("Could not open {0}.", _context.Connection.Url); yield break; } var start = _context.Connection.Start; var end = _context.Connection.End; var isPageRequest = _context.Entity.IsPageRequest(); if (isPageRequest) { start += ((_context.Entity.Page * _context.Entity.PageSize) - _context.Entity.PageSize); end = start + _context.Entity.PageSize; } using (var reader = new StreamReader(stream)) { string line; _context.Entity.Hits = 1; if (start > 1) { for (var i = 1; i < start; i++) { reader.ReadLine(); _context.Entity.Hits++; } } while ((line = reader.ReadLine()) != null) { if (end > 0 && _context.Entity.Hits >= end) { if (isPageRequest) { _context.Entity.Hits++; continue; } yield break; } _context.Entity.Hits++; var tokens = _regex.Split(line.Trim('"')); if (tokens.Length > 0) { var row = _rowFactory.Create(); for (var i = 0; i < _context.InputFields.Length && i < tokens.Length; i++) { var field = _context.InputFields[i]; row[field] = field.Convert(tokens[i]); } yield return(row); } } if (isPageRequest && start > 1) { _context.Entity.Hits -= (start - 1); } } }
private IEnumerable <IRow> PreRead() { _context.Debug(() => "Reading file stream."); var ignoreFirstLines = _context.Connection.Start > 1 ? _context.Connection.Start - 1 : _context.Connection.Start; var start = _context.Connection.Start; var end = 0; if (_context.Entity.IsPageRequest()) { start += (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size; end = start + _context.Entity.Size; } var current = _context.Connection.Start; var configuration = new CsvConfiguration(CultureInfo.InvariantCulture) { IgnoreBlankLines = true, Delimiter = string.IsNullOrEmpty(_context.Connection.Delimiter) ? "," : _context.Connection.Delimiter, Encoding = Encoding.GetEncoding(_context.Connection.Encoding) }; if (_context.Connection.ErrorMode.Equals("IgnoreAndContinue", System.StringComparison.OrdinalIgnoreCase)) { configuration.BadDataFound = null; // skip the record } if (_context.Connection.TextQualifier != string.Empty) { configuration.Escape = _context.Connection.TextQualifier[0]; configuration.Quote = _context.Connection.TextQualifier[0]; } using (var csv = new CsvReader(_streamReader, configuration)) { while (csv.Read()) { if (csv.Parser.RawRow <= ignoreFirstLines) { continue; } if (end == 0 || current.Between(start, end)) { var row = _rowFactory.Create(); for (int i = 0; i < _context.InputFields.Length; i++) { var data = csv.GetField(i); var field = _context.InputFields[i]; row[field] = data; } yield return(row); ++_context.Entity.Hits; } ++current; if (current == end) { break; } } } _streamReader.Close(); }
public IEnumerable <IRow> Read() { var encoding = Encoding.GetEncoding(_context.Connection.Encoding); var lineNo = 0; if (_fileInfo.Extension == ".xml") { var row = _rowFactory.Create(); row[_field] = System.IO.File.ReadAllText(_fileInfo.FullName, encoding); yield return(row); } else { if (_context.Connection.LinePattern != string.Empty) { var regex = new Regex(_context.Connection.LinePattern, RegexOptions.Compiled); var prevLine = string.Empty; foreach (var line in ReadLines(_fileInfo.FullName, encoding)) { ++lineNo; if (_linesToKeep.Contains(lineNo)) { _context.Connection.Lines[lineNo] = line; } if (lineNo < _context.Connection.Start) { continue; } if (regex.IsMatch(line)) // CURRENT LINE PASSES { if (regex.IsMatch(prevLine)) // PREVIOUS LINE PASSES { var row = _rowFactory.Create(); row[_field] = string.Copy(prevLine); prevLine = line; yield return(row); } else // PREVIOUS LINE FAILS { prevLine = line; } } else // CURRENT LINE FAILS { var combined = prevLine + " " + line; if (regex.IsMatch(prevLine)) { if (regex.IsMatch(combined)) // IF COMBINED THEY STILL PASS, COMBINE AND CONTINUE { prevLine = combined; } else // IF COMBINED THEY FAIL, LET THE VALID PREVIOUS LINE THROUGH AND PUT LINE IN PREV LINE IN HOPES SUBSEQUENT LINES WILL MAKE IT PASS { var row = _rowFactory.Create(); row[_field] = string.Copy(prevLine); prevLine = line; yield return(row); } } else { prevLine = combined; } } } if (regex.IsMatch(prevLine)) { var row = _rowFactory.Create(); row[_field] = prevLine; yield return(row); } } else { foreach (var line in ReadLines(_fileInfo.FullName, encoding)) { ++lineNo; if (_linesToKeep.Contains(lineNo)) { _context.Connection.Lines[lineNo] = line; } if (lineNo < _context.Connection.Start) { continue; } var row = _rowFactory.Create(); row[_field] = line; yield return(row); } } } }
public IEnumerable <IRow> Read() { ElasticsearchResponse <DynamicResponse> response; ElasticsearchDynamicValue hits; var from = 0; var size = 10; string body; bool warned = false; var scroll = !_context.Entity.IsPageRequest(); if (!scroll) { from = (_context.Entity.Page * _context.Entity.Size) - _context.Entity.Size; body = WriteQuery(_fields, _readFrom, _context, scroll: false, from: from, size: _context.Entity.Size); } else { body = WriteQuery(_fields, _readFrom, _context, scroll: false, from: 0, size: 0); response = _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (response.Success) { hits = response.Body["hits"] as ElasticsearchDynamicValue; if (hits != null && hits.HasValue) { var total = hits["total"]; try { if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties { size = Convert.ToInt32(total["value"].Value); } else { size = Convert.ToInt32(total.Value); } } catch (Exception ex) { warned = true; _context.Debug(() => total); _context.Warn($"Could not get total number of matching documents from the elasticsearch response. Are you sure you using version {_version}?"); _context.Error(ex, ex.Message); } body = WriteQuery(_fields, _readFrom, _context, scroll: true, from: 0, size: size > ElasticsearchDefaultSizeLimit ? DefaultSize : size); } } } _context.Debug(() => body); _context.Entity.Query = body; response = scroll ? _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body, p => p.AddQueryString("scroll", _context.Connection.Scroll)) : _client.Search <DynamicResponse>(_context.Connection.Index, _typeName, body); if (!response.Success) { LogError(response); yield break; } try { if (_version.Major >= 7) // version 7 changed total to an object with "value" and "relation" properties { _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"]["value"].Value); } else { _context.Entity.Hits = Convert.ToInt32(response.Body["hits"]["total"].Value); } } catch (Exception ex) { if (!warned) { _context.Debug(() => response.Body["hits"]); _context.Warn($"Could not get total number of matching documents from the elasticsearch response. Are you sure you using version {_version}?"); _context.Error(ex.Message); } } hits = response.Body["hits"]["hits"] as ElasticsearchDynamicValue; if (hits == null || !hits.HasValue) { _context.Warn("No hits from elasticsearch"); yield break; } var docs = hits.Value as IList <object>; if (docs == null) { _context.Error("No documents returned from elasticsearch!"); yield break; } // if any of the fields do not exist, yield break if (docs.Count > 0) { var doc = docs.First() as IDictionary <string, object>; var source = doc?["_source"] as IDictionary <string, object>; if (source == null) { _context.Error("Missing _source from elasticsearch response!"); yield break; } for (var i = 0; i < _fields.Length; i++) { if (source.ContainsKey(_fieldNames[i])) { continue; } _context.Error($"Field {_fieldNames[i]} does not exist!"); yield break; } } var count = 0; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } yield return(row); } count += docs.Count; // get this from first search response (maybe), unless you have to aggregate it from all... foreach (var filter in _context.Entity.Filter.Where(f => f.Type == "facet" && !string.IsNullOrEmpty(f.Map))) { var map = _context.Process.Maps.First(m => m.Name == filter.Map); var buckets = response.Body["aggregations"][filter.Key]["buckets"] as ElasticsearchDynamicValue; if (buckets == null || !buckets.HasValue) { continue; } var items = buckets.Value as IEnumerable <object>; if (items == null) { continue; } foreach (var item in items.OfType <IDictionary <string, object> >()) { map.Items.Add(new MapItem { From = $"{item["key"]} ({item["doc_count"]})", To = item["key"] }); } } if (!response.Body.ContainsKey("_scroll_id")) { yield break; } if (size == count) { _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = response.Body["_scroll_id"].Value })); yield break; } var scrolls = new HashSet <string>(); do { var scrollId = response.Body["_scroll_id"].Value; scrolls.Add(scrollId); response = _client.Scroll <DynamicResponse>(new PostData <object>(new { scroll = _context.Connection.Scroll, scroll_id = scrollId })); if (response.Success) { docs = (IList <object>)response.Body["hits"]["hits"].Value; foreach (var d in docs) { var doc = (IDictionary <string, object>)d; var row = _rowFactory.Create(); var source = (IDictionary <string, object>)doc["_source"]; for (var i = 0; i < _fields.Length; i++) { row[_fields[i]] = _fields[i].Convert(source[_fieldNames[i]]); } yield return(row); } count += docs.Count; } else { LogError(response); } } while (response.Success && count < size); _client.ClearScroll <DynamicResponse>(new PostData <object>(new { scroll_id = scrolls.ToArray() })); }
public IRow Create(IDataReader reader, Field[] fields) { var row = _rowFactory.Create(); if (_fieldCount == 0) { _fieldCount = Math.Min(reader.FieldCount, fields.Length); _conversions = new List <Func <object, object> >(_fieldCount); for (var i = 0; i < _fieldCount; i++) { _conversions.Add(null); } _errors = new bool[fields.Length]; for (var i = 0; i < _fieldCount; i++) { var inputType = reader.GetFieldType(i); _errors[i] = inputType != _typeMap[fields[i].Type]; if (_errors[i]) { if (fields[i].Transforms.Any() && fields[i].Transforms.First().Method == "convert") { _conversions[i] = o => o; // the user has set a conversion } else { _conversions[i] = fields[i].Convert; _context.Warn("Type mismatch for {0}. Expected {1}, but read {2}. Change type or add conversion.", fields[i].Name, fields[i].Type, inputType); } } else { _conversions[i] = o => o; } } for (var i = 0; i < _fieldCount; i++) { if (reader.IsDBNull(i)) { continue; } if (_errors[i]) { var value = reader.GetValue(i); try { row[fields[i]] = fields[i].Type == "object" ? value : _conversions[i](value); } catch (FormatException) { _context.Error($"Could not convert value {value} in field {fields[i].Alias} to {fields[i].Type}"); } } else { row[fields[i]] = reader.GetValue(i); } } } else { for (var i = 0; i < _fieldCount; i++) { if (reader.IsDBNull(i)) { continue; } if (_errors[i]) { row[fields[i]] = fields[i].Type == "object" ? reader.GetValue(i) : _conversions[i](reader.GetValue(i)); } else { row[fields[i]] = reader.GetValue(i); } } } return(row); }
public IEnumerable <IRow> Read() { var searchOption = (SearchOption)Enum.Parse(typeof(SearchOption), _input.Connection.SearchOption, true); _input.Info($"Searching folder: {_input.Connection.Folder}"); var files = new DirectoryInfo(_input.Connection.Folder).GetFiles(_input.Connection.SearchPattern, searchOption); _input.Info($"Found {files.Length} files."); var names = _input.InputFields.Select(f => f.Name.ToLower()).ToArray(); foreach (var file in files) { var row = _rowFactory.Create(); for (var i = 0; i < _input.InputFields.Length; i++) { var field = _input.InputFields[i]; switch (names[i]) { case "creation": case "created": case "creationtime": case "creationtimeutc": row[field] = file.CreationTimeUtc; break; case "folder": case "foldername": case "directory": case "directoryname": row[field] = file.DirectoryName ?? string.Empty; break; case "ext": case "extension": row[field] = file.Extension; break; case "fullname": row[field] = file.FullName; break; case "lastwrite": case "updated": case "lastwritetime": case "lastwritetimeutc": row[field] = file.LastWriteTimeUtc; break; case "lastaccess": case "accessed": case "lastaccesstime": case "lastaccesstimeutc": row[field] = file.LastAccessTimeUtc; break; case "size": case "bytes": case "length": if (field.Type != "long" && field.Type != "int64") { _input.Warn($"The {names[i]} input is type long, but field is setup as {field.Type}."); } row[field] = file.Length; break; case "name": row[field] = file.Name; break; case "readonly": case "isreadonly": row[field] = file.IsReadOnly; break; } } yield return(row); } }