public IEnumerable <TargetRecord> Filter(IEnumerable <TargetRecord> targetRecords, RecordConfiguration recordConfiguration)
        {
            var recordNumber = 1;

            foreach (var targetRecord in _innerFieldFilterService.Filter(targetRecords, recordConfiguration))
            {
                var recordFiltered = false;

                try
                {
                    foreach (var targetField in targetRecord.Fields
                             .Where(f => !f.Type.IsNullOrEmpty()))
                    {
                        targetField.TypedValue = targetField.Value.ConvertToType(targetField.Type.GetSystemType());
                    }

                    recordFiltered = true;
                }
                catch (Exception x) when(!TextWranglerConfig.OnException(x, $"Could not convert target record [{recordNumber}] fields to types specified"))
                {
                    // OnException handler says not to rethrow, so keep on going, skipping this record
                }

                if (recordFiltered)
                {
                    yield return(targetRecord);
                }

                recordNumber++;
            }
        }
Example #2
0
        /// <summary>
        /// Takes raw source records (likely via a <see cref="IRecordReader" />) serially and builds <see cref="TargetRecord" />s based on the
        /// <see cref="RecordConfiguration" /> and <see cref="IFieldFilterService" />
        /// </summary>
        /// <param name="sourceRecords">Raw field name/value map of source records</param>
        /// <param name="recordConfiguration">Configuration (<see cref="RecordConfiguration" />) for use with generating the resulting emumerable of <see cref="TargetRecord" /></param>
        /// <returns>Enumerable of <see cref="TargetRecord" /></returns>
        public IEnumerable <TargetRecord> Build(IEnumerable <IReadOnlyDictionary <string, string> > sourceRecords,
                                                RecordConfiguration recordConfiguration)
        {
            var recordNumber = 1;

            foreach (var sourceRecord in sourceRecords)
            {
                TargetRecord targetRecord = null;

                try
                {
                    targetRecord = new TargetRecord(recordConfiguration.Fields
                                                    .Select(fc => new TargetField(fc.Name,
                                                                                  fc.Sources?
                                                                                  .Select(sourceFieldConfig =>
                    {
                        if (!sourceRecord.TryGetValue(sourceFieldConfig.Name, out var sourceFieldValue))
                        {
                            throw new TextWranglerRecordFieldConfigInvalidException(recordConfiguration.RecordTypeName,
                                                                                    fc.Name,
                                                                                    $"Source field [{sourceFieldConfig.Name}] does not exist in source record");
                        }

                        var filteredSourceValue = _fieldFilterService.Filter(sourceFieldValue, sourceFieldConfig.Filters);

                        return(Name: sourceFieldConfig.Name, Value: filteredSourceValue);
                    }))
                    {
                        Value = fc.Format,
                        Type  = fc.Type
                    }));
                }
Example #3
0
        public void Wrangle(int limit = int.MaxValue)
        {
            var stopWatch = Stopwatch.StartNew();

            _logger.LogInformation($"Starting Wrangling for recordType [{_recordConfiguration.RecordTypeName}]");

            // Simple lazy pipeline
            // Get source records, build target records, format the built targets, filter the final result, write the records...
            var countProcessed = _recordReader.GetRecords(limit)
                                 .Then(source => _recordBuilder.Build(source, _recordConfiguration))
                                 .Then(built => _fieldFormatter.Format(built, _recordConfiguration))
                                 .Then(format => _fieldFilterService.Filter(format, _recordConfiguration))
                                 .Then(filter => _recordWriter.Write(filter, _recordConfiguration))
                                 .Count();

            stopWatch.Stop();

            _logger.LogInformation($"Finished Wrangling, [{countProcessed}] target records successfully written, [{_recordReader.CountRead}] source reacords read, in [{stopWatch.Elapsed:mm\\:ss}]");
        }