internal DataProcessor(Func <Task <string> > intakeSupplierAsync, Action <string> outputConsumer, Action <int> progressHandler)
        {
            var config = new OrchestratorConfig()
            {
                ReportProgress         = true,
                ProgressInterval       = 10,
                ProgressChangedHandler = (s, e) => { if (e.Phase == Phase.Intake)
                                                     {
                                                         progressHandler(e.RecCnt);
                                                     }
                },
                PhaseFinishedHandler = (s, e) => { if (e.Phase == Phase.Intake)
                                                   {
                                                       progressHandler(e.RecCnt);
                                                   }
                },
                InputDataKind               = KindOfTextData.Delimited,
                InputFields                 = "PlaneDescription,IataCode,IcaoCode",
                AsyncIntake                 = true,
                TransformerType             = TransformerType.Universal,
                UniversalTransformer        = FilterAndReorganizeFields,
                AllowTransformToAlterFields = true,
                OutputDataKind              = KindOfTextData.Flat,
                OutputFields                = "IataCode|4,Hyphen|2,PlaneDescription|70",
                ExcludeExtraneousFields     = true
            };

            config.SetAsyncIntakeSupplier(intakeSupplierAsync);
            config.SetOutputConsumer(outputConsumer);

            Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config);
        }
Exemplo n.º 2
0
        public void Initialize()
        {
            _config = new OrchestratorConfig();

            _inLines        = _intakeLines().Select(l => l.ToExternalTuple()).ToList();
            _resultingLines = new List <ExternalLine>();
        }
Exemplo n.º 3
0
        public EtlOrchestrator_tests_ArbitraryOutput()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return(true); };                                                //single record clusters
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true; // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.Arbitrary;
            _config.ArbitraryOutputDefs      = new string[] {
                "Record type is {RECTYPE},",
                " name is {NAME}",
                " and number is {NUM}.",
                " Void item here."
            };
            _config.SetOutputConsumer(l => { if (l != null)
                                             {
                                                 _resultingLines.Add(l);
                                             }
                                      });

            _resultingLines = new List <string>();
        }
        public void Process_IntakeFileAndSupplierPresent_SupplierWins()
        {
            //arrange
            int inCnt = 0;

            var config = new OrchestratorConfig();

            config.InputFileNames = "C:\\non-existing-file.abc";
            //config.IntakeTextSupplier = () => inCnt++ < 5 ? $"Line #{inCnt}" : null;
            config.TextIntakeSupplier = () =>
            {
                return(inCnt++ < 5 ? $"Line #{inCnt}" : null);
            };

            var orchestrator = new EtlOrchestrator(config);

            //act
            var result = orchestrator.ExecuteAsync().Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted); //note that InputFileNames would've caused InitializationError
            result.RowsRead.Should().Be(5);
            result.ClustersRead.Should().Be(5);
            result.RowsWritten.Should().Be(5);
            result.ClustersWritten.Should().Be(5);
            inCnt.Should().Be(6);
        }
Exemplo n.º 5
0
        public ClusterMarker_tests()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            var sn = 0;                                                                                                           //closure to facilitate SourceNo calculation (note that GetStringTupleSupplier calls the sourceNoEval function (provided as parameter below) exactly once per iteration)

            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).GetStringTupleSupplier(() => sn++ < 3 ? 1 : 2)); //first 3 - source 1, rest - source 2
            _config.RetainQuotes   = false;
            _config.InputKeyPrefix = "@p";
            // The DeferTransformation.Indefinitely setting below prevents Data Conveyer from linking clusteringBlock to transformingBlock.
            // Without it, transformingBlock could randomly "steal" clusters from _resultsExtractor, which would cause randowm test failures.
            _config.DeferTransformation      = DeferTransformation.Indefinitely;
            _config.AllowOnTheFlyInputFields = true;
            _config.OutputConsumer           = (t, gc) => { }; //throwaway consumer

            //prepare extraction of the results from the pipeline
            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));

            // AsserterOutput tuple: Item1=Ext, Item2=Header, Item3=Formatter, Item4=ExcFormatter
            string ShowRec(ICluster c, int idx) => idx >= c.Count ? string.Empty : c[idx]?["NUM"] +"{" + c[idx].Count + "}"; // e.g. 223{4} means record with 4 items and item NUM = 223

            AsserterOutput asserterOutputToCsv = (".csv",
                                                  "ClstrNo,NoOfRecs,Rec1,Rec2,Rec3,Rec4,Rec5,Rec6,Rec7,Rec8,Rec9,Rec10",
                                                  c => $"{ c.ClstrNo },{ c.Count },{ ShowRec(c,0) },{ ShowRec(c,1) },{ ShowRec(c,2) },{ ShowRec(c,3) },{ ShowRec(c,4) },{ ShowRec(c,5) },{ ShowRec(c,6) },{ ShowRec(c,7) },{ ShowRec(c,8) },{ ShowRec(c,9) }",
                                                  ex => ex.ToString().Split("\r\n").Select(l => "\"" + l.Replace('"', '\'') + "\"")
                                                  );

            _traceableAsserter = new TraceableAsserter <KeyValCluster>("ClusterTestFailures\\", asserterOutputToCsv);
        }
Exemplo n.º 6
0
        public void Initialize()
        {
            _config = new OrchestratorConfig();
            _config.AllowTransformToAlterFields = true;
            _config.PropertyBinEntities         = PropertyBinAttachedTo.Clusters;

            // simple type definitions, everything string, except for fields starting with I_ (int)
            Func <string, ItemType> fldTypeFunc = key => key.StartsWith("I_") ? ItemType.Int : ItemType.String;
            var initFldTypes = new ConcurrentDictionary <string, ItemType>();
            Func <string, string> fldFormatFunc = key => string.Empty;
            var initFldFormats = new ConcurrentDictionary <string, string>();

            _typeDefs = new TypeDefinitions(fldTypeFunc, initFldTypes, fldFormatFunc, initFldFormats);

            var items1 = new IItem[] { KeyValItem.CreateItem("IDCD_ID", "71941", _typeDefs),
                                       KeyValItem.CreateItem("blah", "blahblah", _typeDefs),
                                       KeyValItem.CreateItem("I_num", 243, _typeDefs) };

            var items2 = new IItem[] { KeyValItem.CreateItem("I_#", 15, _typeDefs),
                                       KeyValItem.CreateItem("Fld1", "data1", _typeDefs) };

            var recs = new KeyValRecord[] { new KeyValRecord(items1, 16, 1, 0, null, null, null, _typeDefs, _config, null, ActionOnDuplicateKey.IgnoreItem),
                                            new KeyValRecord(items2, 17, 1, 0, null, null, null, _typeDefs, _config, null, ActionOnDuplicateKey.IgnoreItem) };

            _cluster = new KeyValCluster(recs, 9, 16, 1, null, new Dictionary <string, object>(), _typeDefs, _config, null); //clstr# 9 starting at rec# 16; AllowTransformToAlterFields is true
        }
Exemplo n.º 7
0
        internal readonly Func <Phase, int, PhaseStatus> _processingStatusSupplier; // internal to allow ReadOnlyCluster ctor (& record specific transform providers) use the same processingStatusSupplier

        //Note that KeyValCluster is constructed eagerly (recList will be consumed upon object creation)

        internal KeyValCluster(IEnumerable <IRecord> recList,
                               int clstrNo,
                               int startRecNo,
                               int startSourceNo,
                               IGlobalCache globalCache,
                               IDictionary <string, object> propertyBin,
                               TypeDefinitions typeDefinitions,
                               OrchestratorConfig config,
                               Func <Phase, int, PhaseStatus> processingStatusSupplier)
        {
            this.ClstrNo       = clstrNo;
            this.StartRecNo    = startRecNo;
            this.StartSourceNo = startSourceNo;
            //Note that StartSourceNo should match the 1st record, but to facilitate cloning, etc. it was decided to expose it
            // separately as ctor parm as opposed to reading it like this:  this.StartSourceNo = recList.Any() ? recList.First().SourceNo : 0;
            this._recordColl               = new RecordCollection(recList);
            this.GlobalCache               = globalCache;
            this._typeDefinitions          = typeDefinitions;
            this._config                   = config;
            this._processingStatusSupplier = processingStatusSupplier;

            //Make sure all records have the ClstrNo matching the cluster they belong to:
            this._recordColl.ForEach(r => (r as KeyValRecord)?.SetClstrNo(clstrNo));

            PropertyBin = (_config.PropertyBinEntities & PropertyBinAttachedTo.Clusters) == PropertyBinAttachedTo.Clusters
               ? propertyBin ?? new Dictionary <string, object>() //"reuse" PB in case of cloning, creation of ReadOnlyCluster wrapper, etc.
               : null;                                            //null if Clusters flag not set in PropertyBinEntities
        } //ctor
        internal FileProcessor(string inFile, string outLocation)
        {
            var config = new OrchestratorConfig()
            {
                ReportProgress         = true,
                ProgressInterval       = 1000,
                ProgressChangedHandler = (s, e) => { if (e.Phase == Phase.Intake)
                                                     {
                                                         Console.Write($"\rProcessed {e.RecCnt:N0} records so far...");
                                                     }
                },
                PhaseFinishedHandler = (s, e) => { if (e.Phase == Phase.Intake)
                                                   {
                                                       Console.WriteLine($"\rProcessed {e.RecCnt:N0} records. Done!   ");
                                                   }
                },
                InputDataKind           = KindOfTextData.Delimited,
                HeadersInFirstInputRow  = true,
                InputFileName           = inFile,
                TransformerType         = TransformerType.RecordFilter,
                RecordFilterPredicate   = r => (string)r["NPPES Provider State"] == "NJ" && ((string)r["Specialty Description"]).ToLower() == "dentist",
                OutputDataKind          = KindOfTextData.Delimited,
                HeadersInFirstOutputRow = true,
                OutputFileName          = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile) + "_NJ_dentists.csv"
            };

            Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config);
        }
        internal FileProcessor(string inFile, string outLocation)
        {
            var outFileBody = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile);
            var outFileExt  = Path.GetExtension(inFile);
            var config      = new OrchestratorConfig()
            {
                GlobalCacheElements        = new string[] { "LowCnt|0", "HighCnt|0", "AllCnt|0", "IsaElems", "GsElems" },
                DefaultX12SegmentDelimiter = "~\r\n",
                InputDataKind           = KindOfTextData.X12,
                InputFileName           = inFile,
                ClusterMarker           = SegmentStartsCluster,
                MarkerStartsCluster     = true, //predicate (marker) matches the first record in cluster
                PrependHeadCluster      = true, // to contain ISA/GS segments for _high file
                AppendFootCluster       = true, // to contain IEA/GE segments for _high file
                RecordInitiator         = StoreIsaAndGsSegments,
                PropertyBinEntities     = PropertyBinAttachedTo.Clusters,
                DeferTransformation     = DeferTransformation.UntilRecordInitiation,
                ConcurrencyLevel        = 4,
                TransformerType         = TransformerType.Clusterbound,
                ClusterboundTransformer = ProcessX12Transaction,
                RouterType      = RouterType.PerCluster,
                ClusterRouter   = SendToLowOrHigh,
                OutputDataKind  = KindOfTextData.X12,
                OutputFileNames = outFileBody + "_low" + outFileExt + "|" + outFileBody + "_high" + outFileExt //1st: less than $1,000; 2nd: $1,000 or more
            };

            Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config);
        }
Exemplo n.º 10
0
        public void Initialize()
        {
            _config = new OrchestratorConfig(new MockLogger(LogEntrySeverity.Debug)); //log all levels

            // simple type definitions, everything string
            _typeDefs = new TypeDefinitions(k => ItemType.String, new ConcurrentDictionary <string, ItemType>(), k => string.Empty, new ConcurrentDictionary <string, string>());
        }
Exemplo n.º 11
0
        protected OutputProvider(OrchestratorConfig config, IGlobalCache globalCache)
        {
            _config      = config;
            _globalCache = globalCache;

            if (config.OutputFields == null)
            {
                _fieldsToUse = null; //this will be overwritten based on actual set of fields used (SetFieldsToUse method)
            }
            else //Output fields specified in config, they will drive output
            {                                                //this list of field names will not be overwritten
                _fieldsToUse = config.OutputFields.ListOfSingleElements(0)?.ToList();
                Debug.Assert(_fieldsToUse.IsNonEmptyList()); //if specified, the list must be complete
                //TODO: Error message instead of the above Assert (part of "config scrubber")
            }

            _outputToWriter = _config.AsyncOutput ? _config.AsyncOutputConsumer == OrchestratorConfig.DefaultAsyncOutputConsumer && (_config.OutputWriters != null || _config.OutputFileNames != null)
                                               : _config.OutputConsumer == OrchestratorConfig.DefaultOutputConsumer && (_config.OutputWriters != null || _config.OutputFileNames != null);

            _initErrorOccurred = new Lazy <bool>(() => !InitOutput());

            _targets      = new ConcurrentDictionary <int, LineCounts>();
            _lastTargetNo = 1;

            _atOutputStart = new SingleUseBool();
        } //ctor
Exemplo n.º 12
0
        /// <summary>
        /// Factory method that returns a concrete instance of the derived class
        /// </summary>
        /// <param name="config"></param>
        /// <param name="globalCache"></param>
        /// <param name="typeDefinitions"></param>
        /// <param name="x12DelimitersForOutput"></param>
        /// <returns></returns>
        internal static IntakeProvider CreateProvider(OrchestratorConfig config, IGlobalCache globalCache, TypeDefinitions typeDefinitions, X12Delimiters x12DelimitersForOutput)
        {
            switch (config.InputDataKind)
            {
            case KindOfTextData.Raw:
                return(new RawIntakeProvider(config, globalCache, typeDefinitions));

            case KindOfTextData.Keyword:
                return(new KwIntakeProvider(config, globalCache, typeDefinitions));

            case KindOfTextData.Delimited:
                return(new DelimitedIntakeProvider(config, globalCache, typeDefinitions));

            case KindOfTextData.Flat:
                return(new FlatIntakeProvider(config, globalCache, typeDefinitions));

            case KindOfTextData.Arbitrary:
                return(new ArbitraryIntakeProvider(config, globalCache, typeDefinitions));

            case KindOfTextData.X12:
                return(new X12IntakeProvider(config, globalCache, typeDefinitions, x12DelimitersForOutput));

            case KindOfTextData.XML:
            case KindOfTextData.JSON:
            case KindOfTextData.UnboundJSON:
                return(new XrecordIntakeProvider(config, globalCache, typeDefinitions));

            default:
                //TODO: Message - fatal error, undetermined type of intake data
                return(null);
            }
        }
        private async Task <(ProcessResult result, string output)> ProcessX12Async(TextReader reader)
        {
            var retVal = new StringBuilder();

            var config = new OrchestratorConfig()
            {
                InputDataKind               = KindOfTextData.X12,
                AsyncIntake                 = true,
                IntakeReader                = () => reader,
                ClusterMarker               = SegmentStartsCluster,
                MarkerStartsCluster         = true, //predicate (marker) matches the first record in cluster
                TransformerType             = TransformerType.Universal,
                AllowTransformToAlterFields = true,
                UniversalTransformer        = ExtractNeededElements,
                OutputDataKind              = KindOfTextData.JSON,
                XmlJsonOutputSettings       = "RecordNode|,IndentChars| ",
                OutputWriter                = () => new StringWriter(retVal)
            };

            ProcessResult result;

            using (var orchtr = OrchestratorCreator.GetEtlOrchestrator(config))
            {
                result = await orchtr.ExecuteAsync();
            }

            return(result, retVal.ToString());
        }
Exemplo n.º 14
0
        /// <summary>
        /// Factory method that returns a concrete instance of the derived class
        /// </summary>
        /// <param name="config"></param>
        /// <param name="globalCache"></param>
        /// <param name="x12Delimiters"></param>
        /// <returns></returns>
        internal static OutputProvider CreateProvider(OrchestratorConfig config, IGlobalCache globalCache, X12Delimiters x12Delimiters)
        {
            switch (config.OutputDataKind)
            {
            case KindOfTextData.Raw:
                return(new RawOutputProvider(config, globalCache));

            case KindOfTextData.Keyword:
                return(new KwOutputProvider(config, globalCache));

            case KindOfTextData.Delimited:
                return(new DelimitedOutputProvider(config, globalCache));

            case KindOfTextData.Flat:
                return(new FlatOutputProvider(config, globalCache));

            case KindOfTextData.Arbitrary:
                return(new ArbitraryOutputProvider(config, globalCache));

            case KindOfTextData.X12:
                return(new X12OutputProvider(config, globalCache, x12Delimiters));

            case KindOfTextData.XML:
                var allItemTypes = ItemType.Void | ItemType.Bool | ItemType.DateTime | ItemType.Decimal | ItemType.Int | ItemType.String;
                return(new XrecordOutputProvider(allItemTypes, config, globalCache));

            case KindOfTextData.JSON:
            case KindOfTextData.UnboundJSON:
                return(new XrecordOutputProvider(ItemType.DateTime, config, globalCache));

            default:
                //TODO: Message - fatal error, undetermined type of output data
                return(null);
            }
        }
        private readonly List <string> _resultingLines; //container of the test results

        public EtlOrchestrator_tests_ProcessResult()
        {
            _fatalLogMsgs = new List <Tuple <LogEntrySeverity, string, LogEntry> >(); var mockFatalLogger = new Mock <ILogger>(); //records Fatal messages to _fatalLogMsgs (plus title box)
            mockFatalLogger.Setup(l => l.LoggingThreshold).Returns(LogEntrySeverity.Fatal);
            mockFatalLogger.Setup(l => l.Log(It.IsAny <LogEntry>()))
            .Callback((LogEntry e) => { if (e.Severity <= LogEntrySeverity.Fatal)
                                        {
                                            _fatalLogMsgs.Add(Tuple.Create(e.Severity, e.MessageOnDemand(), e));
                                        }
                      });

            _config = new OrchestratorConfig(mockFatalLogger.Object)
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); };                     //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster      = true;                                                                                       //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.OutputConsumer           = (t, gc) => _resultingLines.Add(t?.Item1.Text); // place the lines on the list to be tested/asserted


            //prepare extraction of the results from the pipeline
            _resultingLines = new List <string>();
        }
        public void Process_OutputFileAndConsumererPresent_ConsumerWins()
        {
            //arrange
            int inCnt    = 0;
            var outLines = new List <Tuple <ExternalLine, int> >();

            var config = new OrchestratorConfig();

            config.InputFileNames = "C:\\non-existing-file.abc";
            config.IntakeSupplier = gc => inCnt++ < 3 ? $"Line #{inCnt}".ToExternalTuple() : null;
            config.OutputConsumer = (tpl, gc) => outLines.Add(tpl);

            var orchestrator = new EtlOrchestrator(config);

            //act
            var result = orchestrator.ExecuteAsync().Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted); //note that either InputFileNames or OutputFileNames would've caused InitializationError
            result.RowsRead.Should().Be(3);
            result.ClustersRead.Should().Be(3);
            result.RowsWritten.Should().Be(3);
            result.ClustersWritten.Should().Be(3);
            inCnt.Should().Be(4);
            outLines.Count.Should().Be(4); //incl. EOD, i.e. null
            outLines[0].Item1.Text.Should().Be("Line #1");
            outLines[0].Item2.Should().Be(1);
            outLines[2].Item1.Text.Should().Be("Line #3");
            outLines[2].Item2.Should().Be(1);
            outLines[3].Should().BeNull();
        }
        public void Initialize()
        {
            _config = new OrchestratorConfig();

            //prepare extraction of the results from the pipeline
            _resultingLines = new List <string>();
            _inLines        = _intakeLines().Select(l => l.ToExternalTuple()).ToList();
        }
        private readonly ActionBlock <KeyValCluster> _resultsDiscarder; //block to intercept output from the clustering block

        public EtlOrchestrator_tests_FldNames()
        {
            _config = new OrchestratorConfig
            {
                DeferTransformation = DeferTransformation.Indefinitely //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)
            };

            _resultsDiscarder = new ActionBlock <KeyValCluster>(c => { }); // any output produced by the clustering block is irrelevant for these tests (and hence discarded)
        }
Exemplo n.º 19
0
        public void Initialize()
        {
            _config = new OrchestratorConfig(new MockLogger(LogEntrySeverity.Warning));

            //prepare extraction of the results from the pipeline
            _resultingClusters = new List <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Add(c));
            _inLines           = _intakeLines().Select(l => l.ToExternalTuple()).ToList();
        }
Exemplo n.º 20
0
 /// <summary>
 /// Ctor intended to be called by the CreateProvider method of the base class
 /// </summary>
 /// <param name="typesToConvertToString">Flags set for item types to be converted to string.</param>
 /// <param name="config"></param>
 /// <param name="globalCache"></param>
 internal XrecordOutputProvider(ItemType typesToConvertToString, OrchestratorConfig config, IGlobalCache globalCache) : base(config, globalCache)
 {
     _typesToConvertToString = typesToConvertToString;
     //For XML, all types are converted to string - this way the format (if set in ItemDef) is applied on output.
     //For JSON, all types except for DateTime are NOT converted to string; this is because JSON natively represents data type (however, it doesn't support dates).
     //Note that XrecordOutputProvider could be split into 2 separate output providers (XmlOutputProvider and JsonOutputProvider), in which case
     //no typesToConvertToString would be needed (and it would directly fit into the strategy pattern).
     //However, a single XrecordOutputProvider class allows for "symmetry" with Intake, where there is a single XrecordIntakeProvider.
     //TODO: Consider splitting XrecordOutputProvider into XmlOutputProvider and JsonOutputProvider if performance slowdown is suspected.
 }
Exemplo n.º 21
0
 /// <summary>
 /// Ctor intended to be called by the CreateProvider method of the base class
 /// </summary>
 /// <param name="config"></param>
 /// <param name="globalCache"></param>
 /// <param name="typeDefinitions"></param>
 /// <param name="x12DelimitersForOuput"></param>
 internal X12IntakeProvider(OrchestratorConfig config, IGlobalCache globalCache, TypeDefinitions typeDefinitions, X12Delimiters x12DelimitersForOuput)
     : base(config, globalCache, typeDefinitions, sNo => sNo == 0 ? "Segment" : string.Format("Elem{0:000}", sNo)) //in case of X12, fields are named: Segment, Elem001, Elem002,...
 {
     _x12DelimitersForOutput = x12DelimitersForOuput;
     _x12FieldDelimiter      = config.DefaultX12FieldDelimiter;
     if (_x12FieldDelimiter == default(char))
     {
         _x12FieldDelimiter = '*';
     }
 }
Exemplo n.º 22
0
        public void Initialize()
        {
            _config = new OrchestratorConfig();

            _xmlOutput1    = new StringBuilder();
            _outputWriter1 = new StringWriter(_xmlOutput1);
            _xmlOutput2    = new StringBuilder();
            _outputWriter2 = new StringWriter(_xmlOutput2);

            _inLines = _intakeLines().Select(l => l.ToExternalTuple()).ToList();
        }
Exemplo n.º 23
0
        /// <summary>
        /// Helper method that creates an orchestrator which intercepts output from a given block (e.g. ClusteringBlock or HoldingBlock)
        /// and sends it directly to the results extractor.
        /// Call to this method should be the last step of the arrange part for each test.
        /// </summary>
        /// <typeparam name="T">The output type of the intercepted block, e.g. KeyValCluster.</typeparam>
        /// <param name="config">Orchestrator configuration to crate the orchestrator for.</param>
        /// <param name="nameOfBlockToIntercept">Name of the last block of the pipeline as it appears in EtlOrchestrator class, e.g "_clusteringBlock" or "_holdingBlock".
        /// This block must not be linked to other blocks.</param>
        /// <param name="resultsExtractor">Action block that will be receiving clusters instead of the rest of the pipeline.</param>
        /// <returns>The orchestrator just created.</returns>
        internal static EtlOrchestrator GetTestOrchestrator <T>(OrchestratorConfig config, string nameOfBlockToIntercept, ActionBlock <T> resultsExtractor)
        {
            var orchestrator     = new EtlOrchestrator(config);
            var orchestratorPA   = new PrivateAccessor(orchestrator);
            var blockToIntercept = (ISourceBlock <T>)orchestratorPA.GetField(nameOfBlockToIntercept);

            blockToIntercept.LinkTo(resultsExtractor, new DataflowLinkOptions {
                PropagateCompletion = true
            });
            return(orchestrator);
        }
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public EtlOrchestrator_tests_DelimitedIntake()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind       = KindOfTextData.Delimited,
                DeferTransformation = DeferTransformation.Indefinitely //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)
            };

            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
        }
Exemplo n.º 25
0
        public void Initialize()
        {
            _config = new OrchestratorConfig();

            //prepare extru of the results from the pipeline
            _resultingLines = new List <string>();

            var sn = 0; //to assign sourceNo in a round-robin fashion: 1,2,3,1,2,3,1,2

            _inLines = _intakeLines().Select(l => l.ToExternalTuple(sn++ % 3 + 1)).ToList();
        }
        private readonly ConcurrentQueue <IReadOnlyDictionary <string, object> > _traceBinHistory; //will contain results to verify

        public EtlOrchestrator_tests_TraceBin()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.X12
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            //no type definitions (everything string)

            _traceBinHistory = new ConcurrentQueue <IReadOnlyDictionary <string, object> >();
        }
Exemplo n.º 27
0
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public EtlOrchestrator_tests_JsonIntake()
        {
            _config = new OrchestratorConfig
            {
                DeferTransformation = DeferTransformation.Indefinitely //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)
            };

            _intakeReader = new StringReader(_intake);

            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
        }
Exemplo n.º 28
0
        private readonly List <int> _fieldWidths; //fixed field widths

        /// <summary>
        /// Ctor intended to be called by the CreateProvider method of the base class
        /// </summary>
        /// <param name="config"></param>
        /// <param name="globalCache"></param>
        /// <param name="typeDefinitions"></param>
        internal FlatIntakeProvider(OrchestratorConfig config, IGlobalCache globalCache, TypeDefinitions typeDefinitions) : base(config, globalCache, typeDefinitions)
        {
            //In case field widths only are configured (no field names) and no names in the header row, allow assignment of default names:
            if (FieldsNamesFromConfig.IsEmptyList() && !_config.HeadersInFirstInputRow)
            {
                IncludeFieldsEnMasse(FieldsNamesFromConfig);
            }

            _fieldWidths = config.InputFields.ListOfSingleElements(1)
                           .ToListOfInts((config.DefaultInputFieldWidth == 0) ? 10 : config.DefaultInputFieldWidth)      //in case of undefined default width, 10 will be used
                           .ToList();
        }
Exemplo n.º 29
0
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public SimpleIntakeSupplier_tests()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Raw
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.DeferTransformation = DeferTransformation.Indefinitely; //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)

            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
        }
Exemplo n.º 30
0
        public void Initialize()
        {
            _config = new OrchestratorConfig();
            _config.AllowTransformToAlterFields = true;

            // simple type definitions, everything string, except for fields starting with I_ (int)
            Func <string, ItemType> fldTypeFunc = key => key.StartsWith("I_") ? ItemType.Int : ItemType.String;
            var initFldTypes = new ConcurrentDictionary <string, ItemType>();
            Func <string, string> fldFormatFunc = key => string.Empty;
            var initFldFormats = new ConcurrentDictionary <string, string>();

            _typeDefs = new TypeDefinitions(fldTypeFunc, initFldTypes, fldFormatFunc, initFldFormats);
        }