internal DataProcessor(Func <Task <string> > intakeSupplierAsync, Action <string> outputConsumer, Action <int> progressHandler)
        {
            var config = new OrchestratorConfig()
            {
                ReportProgress         = true,
                ProgressInterval       = 10,
                ProgressChangedHandler = (s, e) => { if (e.Phase == Phase.Intake)
                                                     {
                                                         progressHandler(e.RecCnt);
                                                     }
                },
                PhaseFinishedHandler = (s, e) => { if (e.Phase == Phase.Intake)
                                                   {
                                                       progressHandler(e.RecCnt);
                                                   }
                },
                InputDataKind               = KindOfTextData.Delimited,
                InputFields                 = "PlaneDescription,IataCode,IcaoCode",
                AsyncIntake                 = true,
                TransformerType             = TransformerType.Universal,
                UniversalTransformer        = FilterAndReorganizeFields,
                AllowTransformToAlterFields = true,
                OutputDataKind              = KindOfTextData.Flat,
                OutputFields                = "IataCode|4,Hyphen|2,PlaneDescription|70",
                ExcludeExtraneousFields     = true
            };

            config.SetAsyncIntakeSupplier(intakeSupplierAsync);
            config.SetOutputConsumer(outputConsumer);

            Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config);
        }
예제 #2
0
        public EtlOrchestrator_tests_ArbitraryOutput()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return(true); };                                                //single record clusters
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true; // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.Arbitrary;
            _config.ArbitraryOutputDefs      = new string[] {
                "Record type is {RECTYPE},",
                " name is {NAME}",
                " and number is {NUM}.",
                " Void item here."
            };
            _config.SetOutputConsumer(l => { if (l != null)
                                             {
                                                 _resultingLines.Add(l);
                                             }
                                      });

            _resultingLines = new List <string>();
        }
예제 #3
0
        public void processEntirePipeline_SimpleConsumer_TargetAlways1()
        {
            //arrange
            _config.InputDataKind  = KindOfTextData.Raw;
            _config.IntakeSupplier = _inLine; //has SourceNo assigned in a round-robin fashion
            //default (no) transformation
            _config.RouterType     = RouterType.SourceToTarget;
            _config.OutputDataKind = KindOfTextData.Raw;
            _config.SetOutputConsumer(l => _resultingLines.Add(l));
            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted);
            counts.RowsRead.Should().Be(8);
            counts.ClustersRead.Should().Be(8);
            counts.RowsWritten.Should().Be(8);
            counts.ClustersWritten.Should().Be(8);

            _resultingLines.Count.Should().Be(9); //includes end-of-data mark
            _resultingLines[0].Should().Be("Line 01");
            _resultingLines[1].Should().Be("Line 02");
            _resultingLines[2].Should().Be("Line 03");
            _resultingLines[3].Should().Be("Line 04");
            _resultingLines[4].Should().Be("Line 05");
            _resultingLines[5].Should().Be("Line 06");
            _resultingLines[6].Should().Be("Line 07");
            _resultingLines[7].Should().Be("Line 08");
            _resultingLines[8].Should().BeNull();
        }
 public EtlOrchestrator_tests_FieldsToUse()
 {
     _config = new OrchestratorConfig
     {
         InputDataKind = KindOfTextData.Flat
     };
     _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
     _config.HeadersInFirstInputRow = true;
     _config.InputFields            = "InFld1|4,InFld2|4,InFld3|12";
     _config.TransformerType        = TransformerType.ClusterFilter;
     _config.ClusterFilterPredicate = c => true;   // no transformations, data passed as is
     _config.SetOutputConsumer((string l) => { }); //throwaay consumer, these tests do not evaluate output
 }
예제 #5
0
        public void ProcessPipeline_PassUnchangedClustersEntirePipeline_SameDataAsInput()
        {
            //The same test as the previous one, except that the entire pipeline is executed.
            // It verifies what the output at the end of the pipeline.

            //arrange
            var outputLines = new ConcurrentQueue <string>();       //output lines are set here by output consumer

            _config.ClusterboundTransformer = clstr => clstr;       // pass the same cluster to output
            _config.DeferOutput             = DeferOutput.Auto;     //restore normal setting, so that entire pipeline is processed (nothing goes to _resultingClusters, but ...
            _config.SetOutputConsumer(l => outputLines.Enqueue(l)); // ... final output is sent to outputLines).
            _config.OutputDataKind  = KindOfTextData.Keyword;
            _config.OutputKeyPrefix = "@p";                         //we want to match output to input

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(2);
            counts.RowsWritten.Should().Be(10);
            counts.ClustersWritten.Should().Be(2);

            _resultingClusters.Should().BeEmpty(); //nothing was redirected to _resultingClusters in this test

            var outLines = outputLines.ToList();

            outLines.Should().HaveCount(11); //incl. EOD mark
            outLines[0].Should().Be("@pRECTYPE=XYZ,@pNAME=Mary,@pNUM=123");
            outLines[1].Should().Be("@pRECTYPE=ABCD,@pABCD_ID=XYZ00883,@pNAME=Mary,@pNUM=223");
            outLines[8].Should().Be("@pRECTYPE=ABCD,@pABCD_ID=XYZ00883,@pNAME=Cindy,@pNUM=923");
            outLines[9].Should().Be("@pEOF"); //prefix added here as well
            outLines[10].Should().BeNull();   //EOD mark
        }
예제 #6
0
        ConcurrentQueue <Tuple <ExternalLine, int> > _resultingLines; //Item2=targetNo

        public EtlOrchestrator_tests_Router()
        {
            _config = new OrchestratorConfig();
            _config.InputDataKind = KindOfTextData.Keyword;
            var sn = 0;                                                                                                       //closure to facilitate SourceNo calculation (note that GetStringTupleSupplier calls the sourceNoEval function (provided as parameter below) exactly once per iteration)

            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).GetStringTupleSupplier(() => sn++ % 3 + 1)); //assign sourceNo in a round-robin fashion: 1,2,3,1,2,3,1,2,3
            _config.InputKeyPrefix = "@p";
            //no type definitions (everything string)
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); }; //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster      = true;                                                                   //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields = true;
            _config.SetOutputConsumer(tpl => _resultingLines.Enqueue(tpl));                                            //Item1=ExternalLine/Xrecord, Item2=targetNo

            _resultingLines = new ConcurrentQueue <Tuple <ExternalLine, int> >();
        }
        public EtlOrchestrator_tests_X12Output()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.X12
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return(true); }; //single record clusters
            _config.AllowOnTheFlyInputFields = true;                                        // to allow fields to showing trailing spaces (should matter for KW, but not X12)
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true;                                   // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.X12;
            _config.SetOutputConsumer(l => { if (l != null)
                                             {
                                                 _resultingLines.Add(l);
                                             }
                                      });

            _resultingLines = new List <string>();
        }
        public EtlOrchestrator_tests_KwOutput()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); };                     //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster      = true;                                                                                       //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true; // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.Keyword;
            _config.SetOutputConsumer(l => _resultingLines.Add(l));

            _resultingLines = new List <string>();
        }
예제 #9
0
        public void produceExtLineOfTypeXrecord_SimpleSettings_CorrectData()
        {
            //arrange
            _config.InputDataKind             = KindOfTextData.Keyword;
            _config.IntakeSupplier            = _inLine;
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.MarkerStartsCluster       = true; //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields  = true;
            _config.TransformerType           = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate    = c => true;                                                                        // no transformations, data passed as is
            _config.OutputDataKind            = KindOfTextData.XML;
            _config.SetOutputConsumer((IEnumerable <Tuple <string, object> > r) => { _resultingLines.Add(r.ToExternalLine()); }); //note that type of lambda argument needs to be defined (disambiguated)

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(10);
            counts.ClustersWritten.Should().Be(10);
            counts.RowsWritten.Should().Be(10);

            _resultingLines.Count.Should().Be(11); //10 records + EOD mark

            var xRec = _resultingLines[0];

            xRec.Should().BeOfType <Xrecord>();
            xRec.Items.Count.Should().Be(3);
            var xItm = xRec.Items[0];

            xItm.Item1.Should().Be("RECTYPE");
            xItm.Item2.Should().Be("XYZ");
            xItm = xRec.Items[1];
            xItm.Item1.Should().Be("NAME");
            xItm.Item2.Should().Be("Mary");
            xItm = xRec.Items[2];
            xItm.Item1.Should().Be("NUM");
            xItm.Item2.Should().Be("123");

            xRec = _resultingLines[2];
            xRec.Should().BeOfType <Xrecord>();
            xRec.Items.Count.Should().Be(4);
            xItm = xRec.Items[0];
            xItm.Item1.Should().Be("RECTYPE");
            xItm.Item2.Should().Be("ABCD");
            xItm = xRec.Items[1];
            xItm.Item1.Should().Be("ABCD_ID");
            xItm.Item2.Should().Be("XYZ00883");
            xItm = xRec.Items[2];
            xItm.Item1.Should().Be("NAME");
            xItm.Item2.Should().Be("Susan");
            xItm = xRec.Items[3];
            xItm.Item1.Should().Be("NUM");
            xItm.Item2.Should().Be("323");

            xRec = _resultingLines[3];
            xRec.Should().BeOfType <Xrecord>();
            xRec.Items.Count.Should().Be(4);
            xItm = xRec.Items[0];
            xItm.Item1.Should().Be("RECTYPE");
            xItm.Item2.Should().Be("ABCD");
            xItm = xRec.Items[1];
            xItm.Item1.Should().Be("ABCD_ID");
            xItm.Item2.Should().Be("XYZ00883");
            xItm = xRec.Items[2];
            xItm.Item1.Should().Be("NAME");
            xItm.Item2.Should().Be("Mary,Ann"); //commas OK
            xItm = xRec.Items[3];
            xItm.Item1.Should().Be("NUM");
            xItm.Item2.Should().Be("423");

            xRec = _resultingLines[6];
            xRec.Should().BeOfType <Xrecord>();
            xRec.Items.Count.Should().Be(4);
            xItm = xRec.Items[0];
            xItm.Item1.Should().Be("RECTYPE");
            xItm.Item2.Should().Be("ABCD");
            xItm = xRec.Items[1];
            xItm.Item1.Should().Be("ABCD_ID");
            xItm.Item2.Should().Be("XYZ00883");
            xItm = xRec.Items[2];
            xItm.Item1.Should().Be("NAME");
            xItm.Item2.Should().Be("Joan");
            xItm = xRec.Items[3];
            xItm.Item1.Should().Be("NUM");
            xItm.Item2.Should().Be("723");

            xRec = _resultingLines[8];
            xRec.Should().BeOfType <Xrecord>();
            xRec.Items.Count.Should().Be(4);
            xItm = xRec.Items[0];
            xItm.Item1.Should().Be("ABCD_ID");
            xItm.Item2.Should().Be(string.Empty);
            xItm = xRec.Items[1];
            xItm.Item1.Should().Be("NAME");
            xItm.Item2.Should().Be("Cindy");
            xItm = xRec.Items[2];
            xItm.Item1.Should().Be("NUM");
            xItm.Item2.Should().Be("923");
            xItm = xRec.Items[3];
            xItm.Item1.Should().Be("RECTYPE");
            xItm.Item2.Should().Be("ABCD");

            xRec = _resultingLines[9];
            xRec.Should().BeOfType <Xrecord>();
            xRec.Items.Count.Should().Be(1);
            xItm = xRec.Items[0];
            xItm.Item1.Should().Be("EOF");
            xItm.Item2.Should().BeNull(); //note that this is null even for XML - this is b/c EOF field exists - it was parsed on intake as string with a null value  (IOW, it is not a void item)

            xRec = _resultingLines[10];
            xRec.Should().BeNull(); //EOD mark
        }