public IEnumerable <TargetRecord> Filter(IEnumerable <TargetRecord> targetRecords, RecordConfiguration recordConfiguration) { var recordNumber = 1; foreach (var targetRecord in _innerFieldFilterService.Filter(targetRecords, recordConfiguration)) { var recordFiltered = false; try { foreach (var targetField in targetRecord.Fields .Where(f => !f.Type.IsNullOrEmpty())) { targetField.TypedValue = targetField.Value.ConvertToType(targetField.Type.GetSystemType()); } recordFiltered = true; } catch (Exception x) when(!TextWranglerConfig.OnException(x, $"Could not convert target record [{recordNumber}] fields to types specified")) { // OnException handler says not to rethrow, so keep on going, skipping this record } if (recordFiltered) { yield return(targetRecord); } recordNumber++; } }
/// <summary> /// Takes raw source records (likely via a <see cref="IRecordReader" />) serially and builds <see cref="TargetRecord" />s based on the /// <see cref="RecordConfiguration" /> and <see cref="IFieldFilterService" /> /// </summary> /// <param name="sourceRecords">Raw field name/value map of source records</param> /// <param name="recordConfiguration">Configuration (<see cref="RecordConfiguration" />) for use with generating the resulting emumerable of <see cref="TargetRecord" /></param> /// <returns>Enumerable of <see cref="TargetRecord" /></returns> public IEnumerable <TargetRecord> Build(IEnumerable <IReadOnlyDictionary <string, string> > sourceRecords, RecordConfiguration recordConfiguration) { var recordNumber = 1; foreach (var sourceRecord in sourceRecords) { TargetRecord targetRecord = null; try { targetRecord = new TargetRecord(recordConfiguration.Fields .Select(fc => new TargetField(fc.Name, fc.Sources? .Select(sourceFieldConfig => { if (!sourceRecord.TryGetValue(sourceFieldConfig.Name, out var sourceFieldValue)) { throw new TextWranglerRecordFieldConfigInvalidException(recordConfiguration.RecordTypeName, fc.Name, $"Source field [{sourceFieldConfig.Name}] does not exist in source record"); } var filteredSourceValue = _fieldFilterService.Filter(sourceFieldValue, sourceFieldConfig.Filters); return(Name: sourceFieldConfig.Name, Value: filteredSourceValue); })) { Value = fc.Format, Type = fc.Type })); }
public void Wrangle(int limit = int.MaxValue) { var stopWatch = Stopwatch.StartNew(); _logger.LogInformation($"Starting Wrangling for recordType [{_recordConfiguration.RecordTypeName}]"); // Simple lazy pipeline // Get source records, build target records, format the built targets, filter the final result, write the records... var countProcessed = _recordReader.GetRecords(limit) .Then(source => _recordBuilder.Build(source, _recordConfiguration)) .Then(built => _fieldFormatter.Format(built, _recordConfiguration)) .Then(format => _fieldFilterService.Filter(format, _recordConfiguration)) .Then(filter => _recordWriter.Write(filter, _recordConfiguration)) .Count(); stopWatch.Stop(); _logger.LogInformation($"Finished Wrangling, [{countProcessed}] target records successfully written, [{_recordReader.CountRead}] source reacords read, in [{stopWatch.Elapsed:mm\\:ss}]"); }