Beispiel #1
0
        public void ProcessPipeline_RawOutputMultiItems_ItemsMerged()
        {
            //arrange
            _config.InputDataKind = KindOfTextData.Keyword;
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLinesMF()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(5);
            counts.ClustersRead.Should().Be(5);
            counts.ClustersWritten.Should().Be(5);
            counts.RowsWritten.Should().Be(5);

            _resultingLines.Should().HaveCount(5);

            _resultingLines[0].Should().Be("XYZMary123");
            _resultingLines[1].Should().Be("ABCDXYZ00883   Mary223");
            _resultingLines[2].Should().Be("ABCDXYZ00883Susan   323");
            _resultingLines[3].Should().Be("ABCDXYZ00883   Mary423  "); //trailing spaces respected
            _resultingLines[4].Should().Be(string.Empty);               //EOF, no value
        }
Beispiel #2
0
        public EtlOrchestrator_tests_ArbitraryOutput()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return(true); };                                                //single record clusters
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true; // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.Arbitrary;
            _config.ArbitraryOutputDefs      = new string[] {
                "Record type is {RECTYPE},",
                " name is {NAME}",
                " and number is {NUM}.",
                " Void item here."
            };
            _config.SetOutputConsumer(l => { if (l != null)
                                             {
                                                 _resultingLines.Add(l);
                                             }
                                      });

            _resultingLines = new List <string>();
        }
        public ClusterMarker_tests()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            var sn = 0;                                                                                                           //closure to facilitate SourceNo calculation (note that GetStringTupleSupplier calls the sourceNoEval function (provided as parameter below) exactly once per iteration)

            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).GetStringTupleSupplier(() => sn++ < 3 ? 1 : 2)); //first 3 - source 1, rest - source 2
            _config.RetainQuotes   = false;
            _config.InputKeyPrefix = "@p";
            // The DeferTransformation.Indefinitely setting below prevents Data Conveyer from linking clusteringBlock to transformingBlock.
            // Without it, transformingBlock could randomly "steal" clusters from _resultsExtractor, which would cause randowm test failures.
            _config.DeferTransformation      = DeferTransformation.Indefinitely;
            _config.AllowOnTheFlyInputFields = true;
            _config.OutputConsumer           = (t, gc) => { }; //throwaway consumer

            //prepare extraction of the results from the pipeline
            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));

            // AsserterOutput tuple: Item1=Ext, Item2=Header, Item3=Formatter, Item4=ExcFormatter
            string ShowRec(ICluster c, int idx) => idx >= c.Count ? string.Empty : c[idx]?["NUM"] +"{" + c[idx].Count + "}"; // e.g. 223{4} means record with 4 items and item NUM = 223

            AsserterOutput asserterOutputToCsv = (".csv",
                                                  "ClstrNo,NoOfRecs,Rec1,Rec2,Rec3,Rec4,Rec5,Rec6,Rec7,Rec8,Rec9,Rec10",
                                                  c => $"{ c.ClstrNo },{ c.Count },{ ShowRec(c,0) },{ ShowRec(c,1) },{ ShowRec(c,2) },{ ShowRec(c,3) },{ ShowRec(c,4) },{ ShowRec(c,5) },{ ShowRec(c,6) },{ ShowRec(c,7) },{ ShowRec(c,8) },{ ShowRec(c,9) }",
                                                  ex => ex.ToString().Split("\r\n").Select(l => "\"" + l.Replace('"', '\'') + "\"")
                                                  );

            _traceableAsserter = new TraceableAsserter <KeyValCluster>("ClusterTestFailures\\", asserterOutputToCsv);
        }
        private readonly List <string> _resultingLines; //container of the test results

        public EtlOrchestrator_tests_ProcessResult()
        {
            _fatalLogMsgs = new List <Tuple <LogEntrySeverity, string, LogEntry> >(); var mockFatalLogger = new Mock <ILogger>(); //records Fatal messages to _fatalLogMsgs (plus title box)
            mockFatalLogger.Setup(l => l.LoggingThreshold).Returns(LogEntrySeverity.Fatal);
            mockFatalLogger.Setup(l => l.Log(It.IsAny <LogEntry>()))
            .Callback((LogEntry e) => { if (e.Severity <= LogEntrySeverity.Fatal)
                                        {
                                            _fatalLogMsgs.Add(Tuple.Create(e.Severity, e.MessageOnDemand(), e));
                                        }
                      });

            _config = new OrchestratorConfig(mockFatalLogger.Object)
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); };                     //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster      = true;                                                                                       //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.OutputConsumer           = (t, gc) => _resultingLines.Add(t?.Item1.Text); // place the lines on the list to be tested/asserted


            //prepare extraction of the results from the pipeline
            _resultingLines = new List <string>();
        }
        public void ProcessPipeline_SimpleConfig_CorrectData()
        {
            //arrange
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.HeadersInFirstInputRow = true;

            var orchestrator = TestUtilities.GetTestOrchestrator(_config, "_clusteringBlock", _resultsExtractor);

            //act
            _ = orchestrator.ExecuteAsync();
            _resultsExtractor.Completion.Wait();

            //assert
            var resultingClusters = _resultingClusters.ToList();

            resultingClusters.Should().HaveCount(4); //4 single-record clusters

            ((IntakeProvider)(new PrivateAccessor(orchestrator)).GetField("_intakeProvider")).FieldsInUse.Should().HaveCount(12);

            var kvRec = resultingClusters[0][0];

            kvRec["ID"].Should().Be("1");
            kvRec["Country"].Should().Be("Papua New Guinea");
            kvRec["IATA/FAA"].Should().Be("GKA");
            kvRec["Latitude"].Should().Be("-6.081689");
            kvRec["Offset from UTC"].Should().Be("10");
            kvRec["Timezone"].Should().Be("Pacific/Port_Moresby");

            kvRec = resultingClusters[3][0];
            kvRec[0].Should().Be("4");
            kvRec[1].Should().Be("Nadzab");
            kvRec["ICAO"].Should().Be("AYNZ");
            kvRec[11].Should().Be("Pacific/Port_Moresby");
            kvRec[12].Should().BeNull();
        }
        public void ProcessIntake_RoundRobin_CorrectSourceNumbers()
        {
            //arrange
            var sn = 0;                                                                                                       //closure to facilitate SourceNo calculation (note that GetStringTupleSupplier calls the sourceNoEval function (provided as parameter below) exactly once per iteration)

            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).GetStringTupleSupplier(() => sn++ % 3 + 1)); //assign sourceNo in a round-robin fashion: 1,2,3,1,2,3,1,2,3

            var orchestrator = TestUtilities.GetTestOrchestrator(_config, "_clusteringBlock", _resultsExtractor);

            //act
            _ = orchestrator.ExecuteAsync();
            _resultsExtractor.Completion.Wait();

            //assert
            var resultingClusters = _resultingClusters.ToList();

            resultingClusters.Should().HaveCount(2);

            resultingClusters[0].Count.Should().Be(5);
            resultingClusters[0].StartRecNo.Should().Be(1);
            resultingClusters[0].StartSourceNo.Should().Be(1);
            resultingClusters[0][0].RecNo.Should().Be(1);
            resultingClusters[0][0].SourceNo.Should().Be(1);
            resultingClusters[0][1].RecNo.Should().Be(2);
            resultingClusters[0][1].SourceNo.Should().Be(2);
            resultingClusters[0][2].RecNo.Should().Be(3);
            resultingClusters[0][2].SourceNo.Should().Be(3);
            resultingClusters[0][3].RecNo.Should().Be(4);
            resultingClusters[0][3].SourceNo.Should().Be(1);
            resultingClusters[0][4].RecNo.Should().Be(5);
            resultingClusters[0][4].SourceNo.Should().Be(2);

            resultingClusters[1].Count.Should().Be(4);
            resultingClusters[1].StartRecNo.Should().Be(6);
            resultingClusters[1].StartSourceNo.Should().Be(3);
            resultingClusters[1][0].RecNo.Should().Be(6);
            resultingClusters[1][0].SourceNo.Should().Be(3);
            resultingClusters[1][1].RecNo.Should().Be(7);
            resultingClusters[1][1].SourceNo.Should().Be(1);
            resultingClusters[1][2].RecNo.Should().Be(8);
            resultingClusters[1][2].SourceNo.Should().Be(2);
            resultingClusters[1][3].RecNo.Should().Be(9);
            resultingClusters[1][3].SourceNo.Should().Be(3);
        }
        public void ProcessPipeline_TrimValues_CorrectData()
        {
            //arrange
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.HeadersInFirstInputRow = false;
            _config.RetainQuotes           = false;
            _config.TrimInputValues        = true;
            _config.InputFields            = "Year|4,Month|4,Data|12";

            var orchestrator = TestUtilities.GetTestOrchestrator(_config, "_clusteringBlock", _resultsExtractor);

            //act
            _ = orchestrator.ExecuteAsync();
            _resultsExtractor.Completion.Wait();

            //assert
            var resultingClusters = _resultingClusters.ToList();

            resultingClusters.Should().HaveCount(10);

            ((IntakeProvider)(new PrivateAccessor(orchestrator)).GetField("_intakeProvider")).FieldsInUse.Count.Should().Be(3);

            var kvRec = resultingClusters[0][0];

            kvRec.Keys[0].Should().Be("Year");
            kvRec[0].Should().Be("1966");
            kvRec["Year"].Should().Be("1966");
            kvRec.Keys[1].Should().Be("Month");
            kvRec[1].Should().Be("11");
            kvRec["Month"].Should().Be("11");
            kvRec.GetItem(2).Key.Should().Be("Data");
            kvRec[2].Should().Be("34943905");
            kvRec["Data"].Should().Be("34943905");

            kvRec = resultingClusters[9][0];
            kvRec.Keys[0].Should().Be("Year");
            kvRec[0].Should().Be("1967");
            kvRec["Year"].Should().Be("1967");
            kvRec.GetItem(1).Key.Should().Be("Month");
            kvRec[1].Should().Be("08");
            kvRec["Month"].Should().Be("08");
            kvRec.GetItem(2).Key.Should().Be("Data");
            kvRec[2].Should().Be("5308387");
            kvRec["Data"].Should().Be("5308387");

            resultingClusters[6][0]["Data"].Should().Be("279438"); //quotes are stripped
        }
        private readonly ConcurrentQueue <IReadOnlyDictionary <string, object> > _traceBinHistory; //will contain results to verify

        public EtlOrchestrator_tests_TraceBin()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.X12
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            //no type definitions (everything string)

            _traceBinHistory = new ConcurrentQueue <IReadOnlyDictionary <string, object> >();
        }
Beispiel #9
0
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public SimpleIntakeSupplier_tests()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Raw
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.DeferTransformation = DeferTransformation.Indefinitely; //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)

            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
        }
        private readonly ActionBlock <KeyValRecord> _resultsExtractor; //block to load results to container

        public EtlOrchestrator_tests_GlobalCache()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Delimited
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.TextOutputConsumer = l => { }; //throwaway consumer

            //prepare extraction of the results from the pipeline
            _resultingRecords = new List <KeyValRecord>();
            _resultsExtractor = new ActionBlock <KeyValRecord>(c => _resultingRecords.Add(c));
        }
 public EtlOrchestrator_tests_FieldsToUse()
 {
     _config = new OrchestratorConfig
     {
         InputDataKind = KindOfTextData.Flat
     };
     _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
     _config.HeadersInFirstInputRow = true;
     _config.InputFields            = "InFld1|4,InFld2|4,InFld3|12";
     _config.TransformerType        = TransformerType.ClusterFilter;
     _config.ClusterFilterPredicate = c => true;   // no transformations, data passed as is
     _config.SetOutputConsumer((string l) => { }); //throwaay consumer, these tests do not evaluate output
 }
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public EtlOrchestrator_tests_KwIntake()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes             = false;
            _config.AllowOnTheFlyInputFields = true;
            _config.DeferTransformation      = DeferTransformation.Indefinitely; //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)

            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
        }
Beispiel #13
0
        public EtlOrchestrator_tests_PropertyBin()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Delimited
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.HeadersInFirstInputRow = true;
            _config.ConcurrencyLevel       = 3;
            _config.TransformerType        = TransformerType.Clusterbound;

            _pbSnapshots = new Dictionary <string, IDictionary <string, object> >();
            _locker      = new object();
        }
        public void ProcessPipeline_X12OutputKwInputSegmentAdded_CorrectData()
        {
            //arrange
            _config.InputDataKind = KindOfTextData.Keyword;
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLinesKW()).StringSupplier);
            _config.InputKeyPrefix          = "@p";
            _config.TransformerType         = TransformerType.Clusterbound;
            _config.ClusterboundTransformer = c => {
                if ((string)c[0]["RECTYPE"] == "XYZ") // add NM1 segment after XYZ, i.e. first rec
                {
                    c.AddRecord(c[0].CreateEmptyX12Segment("NM1", 4));
                    c[c.Count - 1][3]         = "Smith";
                    c[c.Count - 1]["Elem004"] = "Lucie";
                }
                return(c);
            };

            var orchestrator = new EtlOrchestrator(_config);

            //act
            var counts = orchestrator.ExecuteAsync().Result;

            //assert
            counts.RowsRead.Should().Be(5);
            counts.ClustersRead.Should().Be(5);
            counts.ClustersWritten.Should().Be(5);
            counts.RowsWritten.Should().Be(6); // 5 + 1 (NM1 added to 1st cluster)

            _resultingLines.Count.Should().Be(6);

            _resultingLines[0].Should().Be("XYZ*Mary*123");
            _resultingLines[1].Should().Be("NM1***Smith*Lucie");
            _resultingLines[2].Should().Be("ABCD*XYZ00883*Mary*223");
            _resultingLines[3].Should().Be("ABCD*XYZ00883*Susan   *323");
            _resultingLines[4].Should().Be("ABCD*XYZ00883*Mary*423");
            _resultingLines[5].Should().Be("GE");
        }
Beispiel #15
0
        public void ProcessIntake_SimpleSupplier_SourceNoAlways1()
        {
            //arrange
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).ExternalTupleSupplier);

            var orchestrator = TestUtilities.GetTestOrchestrator(_config, "_clusteringBlock", _resultsExtractor);

            //act
            _ = orchestrator.ExecuteAsync();
            _resultsExtractor.Completion.Wait();

            //assert
            _resultingClusters.Should().HaveCount(8); // 8 single record clusters
            ValidateResultingRecords(_resultingClusters.SelectMany(c => c.Records).ToList());
        }
Beispiel #16
0
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public EtlOrchestrator_tests_ArbitraryIntake()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Arbitrary
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            //no type definitions (everything string)
            _config.ClusterMarker       = (rec, prevRec, recCnt) => { return(true); }; //each record is its own cluster
            _config.MarkerStartsCluster = true;                                        //predicate matches the first record in cluster
            _config.ArbitraryInputDefs  = new string[] { "Segment ^[^*]*", @"ISA06 (?<=^ISA\*([^*]*\*){5})([^*]*)", @"N301 (?<=^N3\*)([^*]*)" };
            _config.DeferTransformation = DeferTransformation.Indefinitely;            //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)

            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
        }
Beispiel #17
0
        ConcurrentQueue <Tuple <ExternalLine, int> > _resultingLines; //Item2=targetNo

        public EtlOrchestrator_tests_Router()
        {
            _config = new OrchestratorConfig();
            _config.InputDataKind = KindOfTextData.Keyword;
            var sn = 0;                                                                                                       //closure to facilitate SourceNo calculation (note that GetStringTupleSupplier calls the sourceNoEval function (provided as parameter below) exactly once per iteration)

            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).GetStringTupleSupplier(() => sn++ % 3 + 1)); //assign sourceNo in a round-robin fashion: 1,2,3,1,2,3,1,2,3
            _config.InputKeyPrefix = "@p";
            //no type definitions (everything string)
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); }; //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster      = true;                                                                   //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields = true;
            _config.SetOutputConsumer(tpl => _resultingLines.Enqueue(tpl));                                            //Item1=ExternalLine/Xrecord, Item2=targetNo

            _resultingLines = new ConcurrentQueue <Tuple <ExternalLine, int> >();
        }
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public EtlOrchestrator_tests_FieldsInUse()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes   = false;
            _config.InputKeyPrefix = "@p";
            //no type definitions (everything string)
            _config.ClusterMarker       = (rec, prevRec, recCnt) => { return(true); }; //single record cluster
            _config.MarkerStartsCluster = true;                                        //predicate matches the first record in cluster          _config.AllowOnTheFlyInputFields = true;
            _config.DeferTransformation = DeferTransformation.Indefinitely;            //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)

            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
        }
        public EtlOrchestrator_tests_X12Output()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.X12
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return(true); }; //single record clusters
            _config.AllowOnTheFlyInputFields = true;                                        // to allow fields to showing trailing spaces (should matter for KW, but not X12)
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true;                                   // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.X12;
            _config.SetOutputConsumer(l => { if (l != null)
                                             {
                                                 _resultingLines.Add(l);
                                             }
                                      });

            _resultingLines = new List <string>();
        }
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public EtlOrchestrator_tests_HeadAndFootClusters()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            //no type definitions (everything string)
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); }; //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster      = true;                                                                   //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields = true;
            _config.DeferTransformation      = DeferTransformation.Indefinitely;                                       //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor)

            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
        }
        private readonly ActionBlock <KeyValCluster> _resultsExtractor;      //block to load results to container

        public EtlOrchestrator_tests_ClusterFilterTransformer()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); };                     //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster      = true;                                                                                       //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.DeferOutput              = DeferOutput.Indefinitely; //so that Output won't steal resulting clusters

            _resultingClusters = new ConcurrentQueue <KeyValCluster>();
            _resultsExtractor  = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c));
        }
        public EtlOrchestrator_tests_KwOutput()
        {
            _config = new OrchestratorConfig
            {
                InputDataKind = KindOfTextData.Keyword
            };
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier);
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.TypeDefiner              = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String
            _config.ClusterMarker            = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); };                     //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster      = true;                                                                                       //predicate matches the first record in cluster
            _config.AllowOnTheFlyInputFields = true;
            _config.TransformerType          = TransformerType.ClusterFilter;
            _config.ClusterFilterPredicate   = c => true; // no transformations, data passed as is
            _config.OutputDataKind           = KindOfTextData.Keyword;
            _config.SetOutputConsumer(l => _resultingLines.Add(l));

            _resultingLines = new List <string>();
        }
        public void ProcessPipelineCsvIntake_TrickyHeaderRow_CorrectFldNames()
        {
            //arrange
            _config.InputDataKind = KindOfTextData.Delimited;
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_csvIntakeLines()).StringSupplier);
            _config.HeadersInFirstInputRow   = true;
            _config.AllowOnTheFlyInputFields = true;

            var orchestrator = TestUtilities.GetTestOrchestrator(_config, "_clusteringBlock", _resultsDiscarder);

            //act
            _ = orchestrator.ExecuteAsync();
            _resultsDiscarder.Completion.Wait();

            //assert
            var fldNames = ((IntakeProvider)(new PrivateAccessor(orchestrator)).GetField("_intakeProvider")).FieldsInUse;

            fldNames.Count.Should().Be(12);
            fldNames[0].Should().Be("Fld001");
            fldNames[1].Should().Be("Col002");
            fldNames[2].Should().Be("Fld003");
            fldNames[3].Should().Be("Fld007");
            fldNames[4].Should().Be("Fld006");
            fldNames[5].Should().Be("Fld008");
            fldNames[6].Should().Be("Fld009");
            fldNames[7].Should().Be("Fld010");
            fldNames[8].Should().Be("Fld011");
            fldNames[9].Should().Be("Fld012");
            fldNames[10].Should().Be("Fld013");
            fldNames[11].Should().Be("Fld014");
        }
        public void ProcessPipeline_SimpleConfigPipeDelimited_CorrectData()
        {
            //arrange
            _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines().Select(l => l.Replace(',', '|'))).StringSupplier);
            _config.InputFieldSeparator       = '|';
            _config.RetainQuotes              = false;
            _config.InputKeyPrefix            = "@p";
            _config.ExcludeItemsMissingPrefix = false;
            _config.ActionOnDuplicateKey      = ActionOnDuplicateKey.IgnoreItem;
            _config.ClusterMarker             = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); }; //records having @pRECTYPE=XYZ denote start of the cluster
            _config.MarkerStartsCluster       = true;                                                                   //predicate matches the first record in cluster

            var orchestrator = TestUtilities.GetTestOrchestrator(_config, "_clusteringBlock", _resultsExtractor);

            //act
            _ = orchestrator.ExecuteAsync();
            _resultsExtractor.Completion.Wait();

            //assert
            var resultingClusters = _resultingClusters.ToList();

            resultingClusters.Should().HaveCount(2);
            resultingClusters[0].Count.Should().Be(5);
            resultingClusters[1].Count.Should().Be(7);

            var kvRec = resultingClusters[0][0];

            kvRec.Count.Should().Be(4);
            kvRec["RECTYPE"].Should().Be("XYZ");
            kvRec["BadKey"].Should().BeNull();
            kvRec["ABCD_ID"].Should().Be("XYZ00883");
            kvRec["NAME"].Should().Be("Mary| Ann");
            kvRec["@NUM"].Should().Be("123"); //note the prefix mismatch

            kvRec = resultingClusters[0][1];
            kvRec.Count.Should().Be(4);
            kvRec["RECTYPE"].Should().Be("ABCD");
            kvRec["ABCD_ID"].Should().Be("XYZ00883");
            kvRec["NAME"].Should().Be("Mary| Ann");

            kvRec = resultingClusters[1][0];
            kvRec["RECTYPE"].Should().Be("XYZ");
            kvRec["ABCD_ID"].Should().Be(" XYZ00883"); //unquoted with leading whitespace
            kvRec["@NUM"].Should().Be("123");

            kvRec = resultingClusters[1][1];
            kvRec.Count.Should().Be(4);
            kvRec["RECTYPE"].Should().Be("ABCD");
            kvRec["ABCD_ID"].Should().Be(" XYZ00883");
            kvRec["NAME"].Should().Be("Mary| Ann");

            kvRec = resultingClusters[1][5];
            kvRec.Count.Should().Be(1);
            kvRec["RECTYPE"].Should().BeNull();
            kvRec["EOF"].Should().Be(string.Empty); //note the prefix mismatch

            kvRec = resultingClusters[1][6];
            kvRec.Count.Should().Be(1);
            kvRec["RECTYPE"].Should().BeNull();
            kvRec["EOF"].Should().BeNull(); //note the prefix mismatch
        }