Beispiel #1
0
        public void ProcessPipeline_PassNothingEntirePipeline_ClustersFilteredOut()
        {
            //The same test as the previous one, except that the entire pipeline is executed.
            // It verifies what happens in output if null is returned by the ClusterboundTransformer.

            //arrange
            var outputLines = new ConcurrentQueue <string>();       //output lines are sent here by output consumer

            _config.ClusterboundTransformer = clstr => null;        // pass nothing to output
            _config.DeferOutput             = DeferOutput.Auto;     //restore normal setting, so that entire pipeline is processed (nothing goes to _resultingClusters, but ...
            _config.SetOutputConsumer(l => outputLines.Enqueue(l)); // ... final output is sent to outputLines).

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(2);
            counts.RowsWritten.Should().Be(0);
            counts.ClustersWritten.Should().Be(0);

            _resultingClusters.Should().BeEmpty(); //nothing was redirected to _resultingClusters in this test

            var outLines = outputLines.ToList();

            outLines.Should().HaveCount(1); //just EOD mark
            outLines[0].Should().BeNull();  //EOD mark
        }
        public void ProcessPipeline_OutputFieldsWithExclude_CorrectData()
        {
            //arrange
            _config.OutputFields            = "NAME, DUMMY1, RECTYPE, DUMMY2, NUM";
            _config.ExcludeExtraneousFields = true;

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(2);
            counts.ClustersWritten.Should().Be(2);
            counts.RowsWritten.Should().Be(10);

            _resultingLines.Should().HaveCount(11); //incl. EOD mark (null)

            _resultingLines[0].Should().Be("NAME=Mary,RECTYPE=XYZ,NUM=123");
            _resultingLines[2].Should().Be("NAME=Susan,RECTYPE=ABCD,NUM=323");
            _resultingLines[3].Should().Be("NAME=\"Mary,Ann\",RECTYPE=ABCD,NUM=423");
            _resultingLines[6].Should().Be("NAME=Joan,RECTYPE=ABCD,NUM=723");
            _resultingLines[8].Should().Be("NAME=Cindy,RECTYPE=ABCD,NUM=923");
            _resultingLines[9].Should().Be(string.Empty); //here, none of the output fields were in the records, so all were excluded
            _resultingLines[10].Should().BeNull();
        }
Beispiel #3
0
        public void ProcessPipeline_ArbitraryOutputEscapedBracesAndBackslashes_CorrectData()
        {
            //arrange
            _config.ArbitraryOutputDefs = new string[] {
                "\\\\{Record type is {RECTYPE},",                                    //each \\ pair represents a single \
                " name \\\\{is\\\\} {NAME}.",
                " Void \\\\{item} here."
            };

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(5);
            counts.ClustersRead.Should().Be(5);
            counts.ClustersWritten.Should().Be(5);
            counts.RowsWritten.Should().Be(5);

            _resultingLines.Should().HaveCount(5);

            _resultingLines[0].Should().Be("\\{Record type is XYZ, name \\{is\\} Mary. Void \\{item} here.");
            _resultingLines[2].Should().Be("\\{Record type is ABCD, name \\{is\\} Susan   . Void \\{item} here.");
            _resultingLines[4].Should().Be("\\{Record type is , name \\{is\\} . Void \\{item} here.");
        }
        public void ProcessPipeline_OutputKeyPrefixAlsoFormat_CorrectData()
        {
            //arrange
            _config.TypeDefiner     = key => key == "NUM" ? new ItemDef(ItemType.Int, "0000") : new ItemDef(ItemType.String, null); //NUM Int - 4 digits, everything else String
            _config.ClusterMarker   = (rec, prevRec, recCnt) => true;                                                               //each record is it's own cluster
            _config.OutputKeyPrefix = "#!#";

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(10);
            counts.ClustersWritten.Should().Be(10);
            counts.RowsWritten.Should().Be(10);

            _resultingLines.Should().HaveCount(11); //incl. EOD mark (null)

            _resultingLines[0].Should().Be("#!#RECTYPE=XYZ,#!#NAME=Mary,#!#NUM=0123");
            _resultingLines[2].Should().Be("#!#RECTYPE=ABCD,#!#ABCD_ID=XYZ00883,#!#NAME=Susan,#!#NUM=0323");
            _resultingLines[3].Should().Be("#!#RECTYPE=ABCD,#!#ABCD_ID=XYZ00883,#!#NAME=\"Mary,Ann\",#!#NUM=0423");
            _resultingLines[7].Should().Be("#!#RECTYPE=ABCD,#!#ABCD_ID=XYZ00883,#!#NAME=Jane,#!#NUM=0823");
            _resultingLines[8].Should().Be("#!#ABCD_ID=XYZ00883,#!#NAME=Cindy,#!#NUM=0923,#!#RECTYPE=ABCD");
            _resultingLines[9].Should().Be("#!#EOF");
            _resultingLines[10].Should().BeNull();
        }
        public void ProcessPipeline_QuotationModeOnlyIfNeeded_CorrectData()
        {
            //arrange
            _config.ClusterMarker = (rec, prevRec, recCnt) => true; //each record is it's own cluster
            _config.QuotationMode = QuotationMode.OnlyIfNeeded;

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(10);
            counts.ClustersWritten.Should().Be(10);
            counts.RowsWritten.Should().Be(10);

            _resultingLines.Should().HaveCount(11); //incl. EOD mark (null)

            _resultingLines[0].Should().Be("RECTYPE=XYZ,NAME=Mary,NUM=123");
            _resultingLines[2].Should().Be("RECTYPE=ABCD,ABCD_ID=XYZ00883,NAME=Susan,NUM=323");
            _resultingLines[3].Should().Be("RECTYPE=ABCD,ABCD_ID=XYZ00883,NAME=\"Mary,Ann\",NUM=423");
            _resultingLines[6].Should().Be("RECTYPE=ABCD,ABCD_ID=XYZ00883,NAME=Joan,NUM=723");
            _resultingLines[8].Should().Be("ABCD_ID=XYZ00883,NAME=Cindy,NUM=923,RECTYPE=ABCD");
            _resultingLines[9].Should().Be("EOF");
            _resultingLines[10].Should().BeNull();
        }
Beispiel #6
0
        public void processPipeline_Baseline_NoErrorsAndDataCorrect()
        {
            //This is a baseline test, which does not throw exceptions

            //arrange
            _config.InputDataKind           = KindOfTextData.Delimited;
            _config.IntakeSupplier          = _inLine;
            _config.ExplicitTypeDefinitions = "AGE|I,DOB|D|M/d/yyyy";
            _config.HeadersInFirstInputRow  = true;
            _config.RetainQuotes            = true;
            _config.ClusterMarker           = (rec, prevRec, recCnt) => { return(true); }; //each record is its own cluster
            _config.MarkerStartsCluster     = true;                                        //predicate matches the first record in cluster
            _config.TransformerType         = TransformerType.Recordbound;
            _config.RecordboundTransformer  = r =>
            {
                var     asOf = new DateTime(2017, 1, 1);
                dynamic rO   = r.GetEmptyClone();
                dynamic rI   = r;
                rO.NAME = rI.FIRST_NAME + " " + rI.LAST_NAME;
                rO.DOB  = rI.DOB;
                rO.AGE  = (int)((asOf - rI.DOB).TotalDays / 365.2422); //approximation
                return(rO);
            };
            _config.AllowTransformToAlterFields = true; // otherwise, no new fields would've been allowed
            _config.OutputConsumer = (t, gc) => { };    //throwaway consumer

            var orchestrator      = new EtlOrchestrator(_config);
            var orchestratorPO    = new PrivateObject(orchestrator);
            var transformingBlock = (TransformManyBlock <KeyValCluster, KeyValCluster>)orchestratorPO.GetField("_transformingBlock");

            transformingBlock.LinkTo(_resultsExtractor, new DataflowLinkOptions {
                PropagateCompletion = true
            });

            //act
            var results = orchestrator.ExecuteAsync().Result;

            _resultsExtractor.Completion.Wait();

            //assert
            results.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted);
            results.RowsRead.Should().Be(6);
            results.ClustersRead.Should().Be(5);
            results.RowsWritten.Should().Be(0); //no output invoked during the test
            results.ClustersWritten.Should().Be(0);

            _resultingClusters.Count.Should().Be(5);
            _resultingClusters[0].Count.Should().Be(1);
            _resultingClusters[1].Count.Should().Be(1);
            _resultingClusters[4].Count.Should().Be(1);

            IRecord outRec = _resultingClusters[0][0];
            IItem   age    = outRec.GetItem("AGE");

            age.Value.Should().Be(50);
            IItem dob = outRec.GetItem("DOB");

            dob.Value.Should().Be(new DateTime(1966, 1, 3));
            dob.StringValue.Should().Be("1/3/1966");
        }
Beispiel #7
0
        public void CreateTypeDefinitions_SimpleValues_CorrectData()
        {
            //arrange
            //   fld1 & fld3 DateTime, fld2 bool, everything else Int
            var explicitDefs = "fld1|D,fld2|B,fld3|D|mm/dd/yy";
            Func <string, ItemDef> typeDefiner = fn => new ItemDef(ItemType.Int, "000");
            var orchestrator   = new EtlOrchestrator(new OrchestratorConfig());
            var orchestratorPO = new PrivateObject(orchestrator);

            //act
            var typeDefs = (TypeDefinitions)orchestratorPO.Invoke("CreateTypeDefinitions", new object[] { explicitDefs, typeDefiner });

            //assert
            typeDefs.GetFldType("fld1").Should().Be(ItemType.DateTime);
            typeDefs.GetFldFormat("fld1").Should().Be(string.Empty); //if format missing, empty string is assumed
            typeDefs.GetFldParser("fld1").Should().BeOfType(typeof(Func <string, object>));
            typeDefs.GetFldParser("fld1")("invaliddate").Should().BeOfType(typeof(DateTime));
            typeDefs.GetFldParser("fld1")("invaliddate").Should().Be(default(DateTime));

            typeDefs.GetFldType("fld2").Should().Be(ItemType.Bool);
            typeDefs.GetFldFormat("fld2").Should().Be(string.Empty);
            typeDefs.GetFldParser("fld2").Should().BeOfType(typeof(Func <string, object>));
            typeDefs.GetFldParser("fld2")("TRUE").Should().BeOfType(typeof(bool));
            typeDefs.GetFldParser("fld2")("TRUE").Should().Be(true);

            typeDefs.GetFldType("fld3").Should().Be(ItemType.DateTime);
            typeDefs.GetFldFormat("fld3").Should().Be("mm/dd/yy");

            typeDefs.GetFldType("new1").Should().Be(ItemType.Int);
            typeDefs.GetFldFormat("new2").Should().Be("000");
            typeDefs.GetFldParser("new3")(" 0014 ").Should().Be(14);
        }
        public void ProcessPipeline_X12OutputX12InputCustomFieldDelimiter_CorrectData()
        {
            //arrange
            _config.DefaultX12FieldDelimiter = '^';

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(21);
            counts.ClustersRead.Should().Be(21);
            counts.ClustersWritten.Should().Be(21);
            counts.RowsWritten.Should().Be(21);

            _resultingLines.Should().HaveCount(21);

            _resultingLines[0].Should().Be("ISA^00^          ^00^          ^01^054318936      ^01^123456789      ^020801^0900^U^00501^00000012 ^0^T^~");
            _resultingLines[1].Should().Be("GS^PO^4405197800^999999999^20101127^1719^1421^X^004010VICS");
            _resultingLines[2].Should().Be("ST^834^0001");
            _resultingLines[3].Should().Be("BGN^00^1234^20001227^0838^PT^^^2");
            _resultingLines[4].Should().Be("N1^P5^^FI^954529603");
            _resultingLines[11].Should().Be("N3^123 MAIN STREET");
            _resultingLines[15].Should().Be("DTP^348^D8^20001220");
            _resultingLines[16].Should().Be("LX^1");
            _resultingLines[20].Should().Be("IEA^1^455321165");
        }
        public void Json_RoundTrip_CorrectData(string testCase)
        {
            //arrange
            var testData     = _testDataRepo[testCase];
            var jsonText     = testData.Item1;
            var settings     = testData.Item2;
            var intakeReader = new StringReader(jsonText);

            _config.IntakeReader             = () => intakeReader;
            _config.InputDataKind            = KindOfTextData.JSON;
            _config.AllowOnTheFlyInputFields = true;
            _config.XmlJsonIntakeSettings    = settings;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true; // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.JSON;
            _config.XmlJsonOutputSettings    = settings;  //same as for intake
            _config.OutputWriter             = () => _outputWriter;

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            _outputData.ToString().Should().Be(jsonText);
        }
Beispiel #10
0
        public void processRoundRobinSources_PerRecordRouter_CorrectTargetNos()
        {
            //arrange
            _config.ExplicitTypeDefinitions = "NUM|I";                                                     // NUM field is int
            _config.RouterType   = RouterType.PerRecord;
            _config.RecordRouter = (r, c) => c.ClstrNo == 1 ? (int)(r["NUM"]) - 23 : (int)(r["NUM"]) + 27; //1st 5 recs: 100, 200, .. last 4 recs: 650, 750, ..

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(9);
            counts.ClustersRead.Should().Be(2);
            counts.RowsWritten.Should().Be(9);
            counts.ClustersWritten.Should().Be(2);

            _resultingLines.Should().HaveCount(10); //incl. EOD mark (null)
            _resultingLines.Skip(9).First().Should().BeNull();

            var targetNumbers = _resultingLines.Take(9).Select(t => t.Item2).ToList();

            targetNumbers.Should().HaveCount(9);
            targetNumbers[0].Should().Be(100);
            targetNumbers[1].Should().Be(200);
            targetNumbers[2].Should().Be(300);
            targetNumbers[3].Should().Be(400);
            targetNumbers[4].Should().Be(500);
            targetNumbers[5].Should().Be(650);
            targetNumbers[6].Should().Be(750);
            targetNumbers[7].Should().Be(850);
            targetNumbers[8].Should().Be(950);
        }
        public void ProcessPipeline_X12OutputX12Input_CorrectData()
        {
            //arrange
            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(21);
            counts.ClustersRead.Should().Be(21);
            counts.ClustersWritten.Should().Be(21);
            counts.RowsWritten.Should().Be(21);

            _resultingLines.Should().HaveCount(21);

            //in absence of field delimiter in config, the one from X12 intake (if X12) is used
            _resultingLines[0].Should().Be("ISA+00+          +00+          +01+054318936      +01+123456789      +020801+0900+U+00501+00000012 +0+T+~");
            _resultingLines[1].Should().Be("GS+PO+4405197800+999999999+20101127+1719+1421+X+004010VICS");
            _resultingLines[2].Should().Be("ST+834+0001");
            _resultingLines[3].Should().Be("BGN+00+1234+20001227+0838+PT+++2");
            _resultingLines[4].Should().Be("N1+P5++FI+954529603");
            _resultingLines[11].Should().Be("N3+123 MAIN STREET");
            _resultingLines[15].Should().Be("DTP+348+D8+20001220");
            _resultingLines[16].Should().Be("LX+1");
            _resultingLines[20].Should().Be("IEA+1+455321165");
        }
Beispiel #12
0
        public void processRoundRobinSources_PerClusterRouter_CorrectTargetNos()
        {
            //arrange
            _config.RouterType    = RouterType.PerCluster;
            _config.ClusterRouter = c => c.ClstrNo + 10; //1st cluster (5 recs) goes to target 11, the 2nd (4 recs) to 12

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(9);
            counts.ClustersRead.Should().Be(2);
            counts.RowsWritten.Should().Be(9);
            counts.ClustersWritten.Should().Be(2);

            _resultingLines.Should().HaveCount(10); //incl. EOD mark (null)
            _resultingLines.Skip(9).First().Should().BeNull();

            var targetNumbers = _resultingLines.Take(9).Select(t => t.Item2).ToList();

            targetNumbers.Should().HaveCount(9);
            targetNumbers.Where((tn, i) => i < 5).Should().OnlyContain(tn => tn == 11);
            targetNumbers.Where((tn, i) => i >= 5).Should().OnlyContain(tn => tn == 12);
        }
Beispiel #13
0
        public void processRoundRobinSources_SourceToTargetRouter_TargetNoSameAsSourceNo()
        {
            //arrange
            _config.RouterType = RouterType.SourceToTarget;

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(9);
            counts.ClustersRead.Should().Be(2);
            counts.RowsWritten.Should().Be(9);
            counts.ClustersWritten.Should().Be(2);

            _resultingLines.Should().HaveCount(10); //incl. EOD mark (null)
            _resultingLines.Skip(9).First().Should().BeNull();

            var targetNumbers = _resultingLines.Take(9).Select(t => t.Item2).ToList();

            targetNumbers.Should().HaveCount(9);
            targetNumbers.Where((tn, i) => i % 3 == 0).Should().OnlyContain(tn => tn == 1);
            targetNumbers.Where((tn, i) => i % 3 == 1).Should().OnlyContain(tn => tn == 2);
            targetNumbers.Where((tn, i) => i % 3 == 2).Should().OnlyContain(tn => tn == 3);
        }
Beispiel #14
0
        public void processRoundRobinSources_DefaultRouter_SameAsSingleTarget()
        {
            //no Router defined in config - default router is SingleTarget (every record routed to target 1)

            //arrange
            _config.OutputDataKind = KindOfTextData.XML; //if Xrecord type, then ClstrNo is populated in ExternalLine (unrelated to routing, but can't hurt)

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(9);
            counts.ClustersRead.Should().Be(2);
            counts.RowsWritten.Should().Be(9);
            counts.ClustersWritten.Should().Be(2);

            _resultingLines.Should().HaveCount(10); //incl. EOD mark (null)
            _resultingLines.Skip(9).First().Should().BeNull();

            var targetNumbers = _resultingLines.Take(9).Select(t => t.Item2).ToList();

            targetNumbers.Should().HaveCount(9);
            targetNumbers.Should().OnlyContain(tn => tn == 1); //all targetNo=1

            //Asserts below are unrelated to routing, but itcan't hurt to verify that 1st 5 records were for clstr 1 and the remaining ones for clstr 2
            var clusterNumbers = _resultingLines.Take(9).Select(t => t.Item1.ClstrNo);

            clusterNumbers.Take(5).Should().OnlyContain(cn => cn == 1);
            clusterNumbers.Skip(5).Take(4).Should().OnlyContain(cn => cn == 2);
        }
        public void CreatePipeline_BadOutputFile_InitializationError()
        {
            //arrange
            _config.OutputConsumer  = OrchestratorConfig.DefaultOutputConsumer; //to reset consumer set in ctor (and thus allow OutputFileName to take over)
            _config.OutputFileNames = @"\\BadUNCpath";

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var result = orchestrator.ExecuteAsync().Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.InitializationError);
            result.RowsRead.Should().Be(0);
            result.ClustersRead.Should().Be(0);
            result.RowsWritten.Should().Be(0);
            result.ClustersWritten.Should().Be(0);

            _resultingLines.Count.Should().Be(0);                      //nothing, the pipeline hasn't even started

            _fatalLogMsgs.Count.Should().Be(2);                        // intake init OK, but output init failed; first entry is log title box
            _fatalLogMsgs[0].Item1.Should().Be(LogEntrySeverity.None); //log title box
            var logRslt = _fatalLogMsgs[1];

            logRslt.Item1.Should().Be(LogEntrySeverity.Fatal);
            logRslt.Item2.Contains("Attempt to create output file(s) '\\\\BadUNCpath' failed").Should().BeTrue(); //text from OutputProvider class; note escaped backslashes
            logRslt.Item2.Contains("Exception of type System.IO.IOException occurred: The specified path is invalid. : '\\\\BadUNCpath'").Should().BeTrue();
            logRslt.Item3.Exception.Should().BeOfType <IOException>();
        }
        public void processJsonOutput_NoNodesDefinedAtAll_MultipleJsonContents()
        {
            //arrange
            _config.InputDataKind             = KindOfTextData.Keyword;
            _config.IntakeSupplier            = _inLine;
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            //note that this test does not have types defined, hence all fields (incl. NUM) are of String type
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return(true); }; //single record clusters
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true;                                                      // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.JSON;
            _config.XmlJsonOutputSettings    = "CollectionNODE|Root/Members,RecordNODE|Member,IndentChars|  "; //!!!notice misspellings (names are case-sensitive); pretty-print
            _config.OutputWriter             = () => _outputWriter1;

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(5);
            counts.ClustersRead.Should().Be(5);
            counts.ClustersWritten.Should().Be(5);
            counts.RowsWritten.Should().Be(5);

            //This is a special case where all 3 nodes are missing or bad.
            //It results in multiple JSON objects (technically not a valid JSON).
            _jsonOutput1.ToString().Should().Be(@"{
  ""RECTYPE"": ""XYZ"",
  ""NAME"": ""Mary"",
  ""NUM"": ""123"",
  ""DOB"": ""6/5/88""
}
{
  ""RECTYPE"": ""ABCD"",
  ""ABCD_ID"": ""XYZ00883"",
  ""NAME"": ""Mary"",
  ""NUM"": ""223""
}
{
  ""RECTYPE"": ""ABCD"",
  ""ABCD_ID"": ""XYZ00883"",
  ""NAME"": ""Susan   "",
  ""NUM"": ""323""
}
{
  ""RECTYPE"": ""ABCD"",
  ""ABCD_ID"": ""XYZ00883"",
  ""NAME"": ""Mary"",
  ""NUM"": ""423""
}
{
  ""EOF"": null
}"
                                                );
        }
Beispiel #17
0
        public void RecordInitiator_RecordInitiatorThrows_TraceBinSettingFault()
        {
            //arrange
            _config.InputDataKind           = KindOfTextData.Delimited;
            _config.IntakeSupplier          = _inLine;
            _config.ExplicitTypeDefinitions = "AGE|I,DOB|D|M/d/yyyy";
            _config.HeadersInFirstInputRow  = true;
            _config.RetainQuotes            = true;
            _config.RecordInitiator         = (rec, tb) => { if (rec.RecNo == 3)
                                                             {
                                                                 throw new MissingMemberException("Missing Member");
                                                             }
                                                             return(true); };
            _config.ClusterMarker               = (rec, prevRec, recCnt) => { return(true); }; //each record is its own cluster
            _config.MarkerStartsCluster         = true;                                        //predicate matches the first record in cluster
            _config.AllowTransformToAlterFields = true;                                        // otherwise, no new fields would've been allowed
            _config.OutputConsumer              = (t, gc) => { };                              //throwaway consumer
            Tuple <string, string, Exception> errorDetails = null;                             //Item1=origin, Item2=context, Item3=exception

            _config.ErrorOccurredHandler = (s, e) => { errorDetails = Tuple.Create(e.Origin, e.Context, e.Exception); };

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var results = orchestrator.ExecuteAsync().Result;

            //assert
            results.CompletionStatus.Should().Be(CompletionStatus.Failed);

            errorDetails.Item1.Should().Be("initiating record");
            errorDetails.Item2.Should().Be(" at line #4"); //note that line #1 is a header row (and the exception is thrown on 3rd data row)
            errorDetails.Item3.Should().BeOfType(typeof(MissingMemberException));
            errorDetails.Item3.Message.Should().Be("Missing Member");
        }
        public void GlobalCache_IncrementValue_CorrectAggregateCounter(int iterationNumber, int totalRepeats)
        {
            var dummy = iterationNumber; // to clear xUnit1026 warning

            dummy = totalRepeats;

            //arrange
            _config.GlobalCacheElements    = new string[] { "counter|0" };
            _config.IntakeInitializer      = gc => { gc.IncrementValue("counter"); return(null); };    //0 + 1 = 1
            _config.OutputInitializer      = gc => { gc.IncrementValue("counter", 2); return(null); }; //1 + 2 = 3
            _config.InputDataKind          = KindOfTextData.Delimited;
            _config.HeadersInFirstInputRow = true;
            var rawIntakeSupplier = new IntakeSupplierProvider(_intakeLines()).StringSupplier;

            _config.IntakeSupplier          = gc => { gc.IncrementValue("counter", 3); return(rawIntakeSupplier()?.ToExternalTuple()); }; //3 + 5*3 = 18  (called 5 times: 4 lines incl. header + 1 null(EOD))
            _config.IntakeDisposer          = gc => { gc.IncrementValue("counter", 4); };                                                 //18 + 4 = 22
            _config.RecordInitiator         = (r, tb) => { r.GlobalCache.IncrementValue("counter", 5); return(true); };                   //22 + 3*5 = 37
            _config.ClusterMarker           = (r, pr, n) => { r.GlobalCache.IncrementValue("counter", 6); return(false); };               //37 + 3*6 = 55  (note that a single cluster created)
            _config.TransformerType         = TransformerType.Clusterbound;
            _config.ClusterboundTransformer = c => { c.GlobalCache.IncrementValue("counter", 7); return(c); };                            //55 + 7 = 62
            _config.RouterType     = RouterType.PerCluster;
            _config.ClusterRouter  = c => { c.GlobalCache.IncrementValue("counter", 8); return(1); };                                     //62 + 8 = 70
            _config.OutputConsumer = (tpl, gc) => { gc.IncrementValue("counter", 9); };                                                   //70 + 4*9 = 106  (throwaway consumer; executed 4 times - 3 lines no header + 1 null(EOD))
            _config.OutputDisposer = gc => { gc.IncrementValue("counter", 10); };                                                         //106 + 10 = 116

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var result = orchestrator.ExecuteAsync().Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted);
            result.GlobalCache["counter"].Should().BeOfType <int>();
            result.GlobalCache["counter"].Should().Be(116); //total of all increments
        }
Beispiel #19
0
        public void processXmlOutput_NoRecordNode_DefaultName()
        {
            //arrange
            _config.InputDataKind             = KindOfTextData.Keyword;
            _config.IntakeSupplier            = _inLine;
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return(true); };                                                //single record clusters
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true;                                                      // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.XML;
            _config.XmlJsonOutputSettings    = "CollectionNODE|Root/Members,RecordNODE|Member,IndentChars|  "; //notice misspellings (names are case-sensitive); pretty-print
            _config.OutputWriter             = () => _outputWriter1;

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(5);
            counts.ClustersRead.Should().Be(5);
            counts.ClustersWritten.Should().Be(5);
            counts.RowsWritten.Should().Be(5);

            //Missing (bad) CollectionNode resulted in XML fragment (no root)
            //Missing (bad) RecordNode was substituted by default value "__record__"
            _xmlOutput1.ToString().Should().Be(@"<__record__>
  <RECTYPE>XYZ</RECTYPE>
  <NAME>Mary</NAME>
  <NUM>123</NUM>
  <DOB>6/5/88</DOB>
</__record__>
<__record__>
  <RECTYPE>ABCD</RECTYPE>
  <ABCD_ID>XYZ00883</ABCD_ID>
  <NAME>Mary</NAME>
  <NUM>223</NUM>
</__record__>
<__record__>
  <RECTYPE>ABCD</RECTYPE>
  <ABCD_ID>XYZ00883</ABCD_ID>
  <NAME>Susan   </NAME>
  <NUM>323</NUM>
</__record__>
<__record__>
  <RECTYPE>ABCD</RECTYPE>
  <ABCD_ID>XYZ00883</ABCD_ID>
  <NAME>Mary</NAME>
  <NUM>423</NUM>
</__record__>
<__record__>
  <EOF />
</__record__>"
                                               );
        }
        public void Process_IntakeFileAndSupplierPresent_SupplierWins()
        {
            //arrange
            int inCnt = 0;

            var config = new OrchestratorConfig();

            config.InputFileNames = "C:\\non-existing-file.abc";
            //config.IntakeTextSupplier = () => inCnt++ < 5 ? $"Line #{inCnt}" : null;
            config.TextIntakeSupplier = () =>
            {
                return(inCnt++ < 5 ? $"Line #{inCnt}" : null);
            };

            var orchestrator = new EtlOrchestrator(config);

            //act
            var result = orchestrator.ExecuteAsync().Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted); //note that InputFileNames would've caused InitializationError
            result.RowsRead.Should().Be(5);
            result.ClustersRead.Should().Be(5);
            result.RowsWritten.Should().Be(5);
            result.ClustersWritten.Should().Be(5);
            inCnt.Should().Be(6);
        }
        public void ProcessPipeline_MultilineLeaderContents_CorrectData()
        {
            //arrange
            _config.LeaderContents = "Leader One\r\nLeaderTwo";

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(2);
            counts.ClustersWritten.Should().Be(2);
            counts.RowsWritten.Should().Be(12);     //10 input rows + 2 trailer lines

            _resultingLines.Should().HaveCount(13); //incl. EOD mark (null)

            _resultingLines[0].Should().Be("Leader One");
            _resultingLines[1].Should().Be("LeaderTwo");
            _resultingLines[2].Should().Be("RECTYPE=XYZ,NAME=Mary,NUM=123"); //same as input, except for stripped InputKeyPrefix and insignificant whitespace
            _resultingLines[4].Should().Be("RECTYPE=ABCD,ABCD_ID=XYZ00883,NAME=Susan,NUM=323");
            _resultingLines[5].Should().Be("RECTYPE=ABCD,ABCD_ID=XYZ00883,NAME=\"Mary,Ann\",NUM=423");
            _resultingLines[8].Should().Be("RECTYPE=ABCD,ABCD_ID=XYZ00883,NAME=Joan,NUM=723");
            _resultingLines[10].Should().Be("ABCD_ID=XYZ00883,NAME=Cindy,NUM=923,RECTYPE=ABCD");
            _resultingLines[11].Should().Be("EOF");
            _resultingLines[12].Should().BeNull();
        }
        public void Process_OutputFileAndConsumererPresent_ConsumerWins()
        {
            //arrange
            int inCnt    = 0;
            var outLines = new List <Tuple <ExternalLine, int> >();

            var config = new OrchestratorConfig();

            config.InputFileNames = "C:\\non-existing-file.abc";
            config.IntakeSupplier = gc => inCnt++ < 3 ? $"Line #{inCnt}".ToExternalTuple() : null;
            config.OutputConsumer = (tpl, gc) => outLines.Add(tpl);

            var orchestrator = new EtlOrchestrator(config);

            //act
            var result = orchestrator.ExecuteAsync().Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted); //note that either InputFileNames or OutputFileNames would've caused InitializationError
            result.RowsRead.Should().Be(3);
            result.ClustersRead.Should().Be(3);
            result.RowsWritten.Should().Be(3);
            result.ClustersWritten.Should().Be(3);
            inCnt.Should().Be(4);
            outLines.Count.Should().Be(4); //incl. EOD, i.e. null
            outLines[0].Item1.Text.Should().Be("Line #1");
            outLines[0].Item2.Should().Be(1);
            outLines[2].Item1.Text.Should().Be("Line #3");
            outLines[2].Item2.Should().Be(1);
            outLines[3].Should().BeNull();
        }
        public void ProcessPipeline_QuotationModeAlways_CorrectData()
        {
            //arrange
            _config.ClusterMarker = (rec, prevRec, recCnt) => true; //each record is it's own cluster
            _config.QuotationMode = QuotationMode.Always;

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(10);
            counts.ClustersWritten.Should().Be(10);
            counts.RowsWritten.Should().Be(10);

            _resultingLines.Should().HaveCount(11); //incl. EOD mark (null)

            _resultingLines[0].Should().Be("RECTYPE=\"XYZ\",NAME=\"Mary\",NUM=\"123\"");
            _resultingLines[2].Should().Be("RECTYPE=\"ABCD\",ABCD_ID=\"XYZ00883\",NAME=\"Susan\",NUM=\"323\"");
            _resultingLines[3].Should().Be("RECTYPE=\"ABCD\",ABCD_ID=\"XYZ00883\",NAME=\"Mary,Ann\",NUM=\"423\"");
            _resultingLines[6].Should().Be("RECTYPE=\"ABCD\",ABCD_ID=\"XYZ00883\",NAME=\"Joan\",NUM=\"723\"");
            _resultingLines[8].Should().Be("ABCD_ID=\"XYZ00883\",NAME=\"Cindy\",NUM=\"923\",RECTYPE=\"ABCD\"");
            _resultingLines[9].Should().Be("EOF");
            _resultingLines[10].Should().BeNull();
        }
        public void ProcessPipeline_SimpleProcessCancel_CanceledAndIncompleteOutput(int iterationNumber, int totalRepeats)
        {
            var dummy = iterationNumber; // to clear xUnit1026 warning

            dummy = totalRepeats;

            //This test is not exact, it attempts to cancel the pipelinie while the transformer (ClusterFilterPredicate) is running.
            //On MsTests, this test ocassionally (rarely) failed (Expected Canceled, but found IntakeDepleted).

            //arrange
            _config.ClusterFilterPredicate = clstr => { Thread.Sleep(5); return(true); }; // accept every cluster, but wait 5ms to ease out cancellation
            _config.ConcurrencyLevel       = 1;                                           //timings (delays in ClusterFilterTransformer) need to cumulate for successful test

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var task = orchestrator.ExecuteAsync();

            Thread.Sleep(10);
            orchestrator.CancelExecution();
            var result = task.Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.Canceled);
            result.RowsRead.Should().BeLessOrEqualTo(10);
            result.ClustersRead.Should().BeLessOrEqualTo(2); //note there is no guarantee that the Intake phase will complete

            _resultingLines.Count.Should().BeLessThan(11);
        }
        public void ProcessPipeline_OutputFields_CorrectData()
        {
            //arrange
            _config.OutputFields = "NAME, DUMMY1, RECTYPE, DUMMY2, NUM";

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(10);
            counts.ClustersRead.Should().Be(2);
            counts.ClustersWritten.Should().Be(2);
            counts.RowsWritten.Should().Be(10);

            _resultingLines.Should().HaveCount(11); //incl. EOD mark (null)

            _resultingLines[0].Should().Be("NAME=Mary,DUMMY1,RECTYPE=XYZ,DUMMY2,NUM=123");
            _resultingLines[2].Should().Be("NAME=Susan,DUMMY1,RECTYPE=ABCD,DUMMY2,NUM=323");
            _resultingLines[3].Should().Be("NAME=\"Mary,Ann\",DUMMY1,RECTYPE=ABCD,DUMMY2,NUM=423");
            _resultingLines[6].Should().Be("NAME=Joan,DUMMY1,RECTYPE=ABCD,DUMMY2,NUM=723");
            _resultingLines[8].Should().Be("NAME=Cindy,DUMMY1,RECTYPE=ABCD,DUMMY2,NUM=923");
            _resultingLines[9].Should().Be("NAME,DUMMY1,RECTYPE,DUMMY2,NUM");
            _resultingLines[10].Should().BeNull();
        }
        public void ProcessPipeline_SimpleProcessTimeout_TimedOutAndIncompleteOutput(int iterationNumber, int totalRepeats)
        {
            var dummy = iterationNumber; // to clear xUnit1026 warning

            dummy = totalRepeats;

            //This test is not exact, it compares the amount of TimeLimit against the delay in the tranformer, i.e. ClusterFilterPredicate.
            //In xUnit, assuming transformer delay of 6ms, it consistently passes with TimeLimit values of 15ms or less.
            //On MsTests, this test ocassionally failed (Expected TimedOut, but found IntakeDepleted.) even with timeout of 10ms.

            //arrange
            _config.TimeLimit = TimeSpan.FromMilliseconds(10);                            //, so that timeout occurs
            _config.ClusterFilterPredicate = clstr => { Thread.Sleep(6); return(true); }; // accept every cluster, but wait 6ms to ease out cancellation
            _config.ConcurrencyLevel       = 1;                                           //timings (delays in ClusterFilterPredicate) need to cumulate for successful test

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var result = orchestrator.ExecuteAsync().Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.TimedOut);
            result.RowsRead.Should().BeLessOrEqualTo(10);
            result.ClustersRead.Should().BeLessOrEqualTo(2); //note there is no guarantee that the Intake phase will complete

            _resultingLines.Count.Should().BeLessThan(11);
        }
Beispiel #27
0
        public void ProcessPipeline_ArbitraryOutputTrickyConfig_CorrectData()
        {
            //arrange
            _config.ArbitraryOutputDefs = new string[] {
                "These fields have multiple tokens each, but only the first one gets substituted:",
                " rectype, name, num and eof: {RECTYPE}, {NAME}, {NUM} and {EOF};",
                " num and name stay unchanged when dummy is first{DUMMY}: {NUM} and {NAME};",
                " eof is absent from all rows, but last: {EOF} and {NAME}."                                         //but even in last row EOF value is empty
            };

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(5);
            counts.ClustersRead.Should().Be(5);
            counts.ClustersWritten.Should().Be(5);
            counts.RowsWritten.Should().Be(5);

            _resultingLines.Should().HaveCount(5);

            _resultingLines[0].Should().Be("These fields have multiple tokens each, but only the first one gets substituted: rectype, name, num and eof: XYZ, {NAME}, {NUM} and {EOF}; num and name stay unchanged when dummy is first: {NUM} and {NAME}; eof is absent from all rows, but last:  and {NAME}.");
            _resultingLines[2].Should().Be("These fields have multiple tokens each, but only the first one gets substituted: rectype, name, num and eof: ABCD, {NAME}, {NUM} and {EOF}; num and name stay unchanged when dummy is first: {NUM} and {NAME}; eof is absent from all rows, but last:  and {NAME}.");
            _resultingLines[4].Should().Be("These fields have multiple tokens each, but only the first one gets substituted: rectype, name, num and eof: , {NAME}, {NUM} and {EOF}; num and name stay unchanged when dummy is first: {NUM} and {NAME}; eof is absent from all rows, but last:  and {NAME}.");
        }
        public void CreatePipeline_MissingInputFile_InitializationErrorNoOutputInitAttempted()
        {
            //arrange
            _config.IntakeSupplier  = OrchestratorConfig.DefaultIntakeSupplier; //to reset supplier set in ctor (and thus allow InputFileNames to take over)
            _config.OutputConsumer  = OrchestratorConfig.DefaultOutputConsumer; //to reset consumer set in ctor (and thus allow OutputFileNames to take over)
            _config.InputFileNames  = "C:/non-existing_file.abc";
            _config.OutputFileNames = @"\\BadUNCpath";
            //Note that EagerInitialization is false by default!

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var result = orchestrator.ExecuteAsync().Result;

            //assert
            result.CompletionStatus.Should().Be(CompletionStatus.InitializationError);
            result.RowsRead.Should().Be(0);
            result.ClustersRead.Should().Be(0);
            result.RowsWritten.Should().Be(0);
            result.ClustersWritten.Should().Be(0);

            _resultingLines.Count.Should().Be(0);                      //nothing, the pipeline hasn't even started

            _fatalLogMsgs.Count.Should().Be(2);                        //input file failed; hence output initialization wasn't attempted in (even though it would've also failed) - 1st entry is log title box
            _fatalLogMsgs[0].Item1.Should().Be(LogEntrySeverity.None); //log title box
            var logRslt = _fatalLogMsgs[1];

            logRslt.Item1.Should().Be(LogEntrySeverity.Fatal);
            logRslt.Item2.Contains("Attempt to access input file(s) 'C:/non-existing_file.abc' failed").Should().BeTrue(); //text from IntakeProvider class
            logRslt.Item2.Contains("System.IO.FileNotFoundException").Should().BeTrue();
            logRslt.Item3.Exception.Should().BeOfType <FileNotFoundException>();
        }
Beispiel #29
0
        public void ProcessPipeline_ArbitraryOutputNullDefs_NullDefsIgnored()
        {
            //arrange
            _config.ArbitraryOutputDefs = new string[] {
                "Record type is {RECTYPE},",
                null,
                " nothing here: <<{BADKEY}>>.",
                " Void item here.",
                null
            };

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(5);
            counts.ClustersRead.Should().Be(5);
            counts.ClustersWritten.Should().Be(5);
            counts.RowsWritten.Should().Be(5);

            _resultingLines.Should().HaveCount(5);

            _resultingLines[0].Should().Be("Record type is XYZ, nothing here: <<>>. Void item here.");
            _resultingLines[2].Should().Be("Record type is ABCD, nothing here: <<>>. Void item here.");
            _resultingLines[4].Should().Be("Record type is , nothing here: <<>>. Void item here."); //EOF, tokens are empty
        }
Beispiel #30
0
        public void ProcessPipeline_RawOutputFieldsSpecified_OnlySelectedItemsMerged()
        {
            //arrange
            _config.InputDataKind = KindOfTextData.Keyword;
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLinesMF()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner  = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.OutputFields = "RECTYPE,ABCD_ID,NAME";

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(5);
            counts.ClustersRead.Should().Be(5);
            counts.ClustersWritten.Should().Be(5);
            counts.RowsWritten.Should().Be(5);

            _resultingLines.Should().HaveCount(5);

            _resultingLines[0].Should().Be("XYZMary");
            _resultingLines[1].Should().Be("ABCDXYZ00883   Mary");
            _resultingLines[2].Should().Be("ABCDXYZ00883Susan   ");
            _resultingLines[3].Should().Be("ABCDXYZ00883   Mary");
            _resultingLines[4].Should().Be(string.Empty); //EOF, no value for any of the specified keys
        }