private async Task <(ProcessResult result, string output)> ProcessX12Async(TextReader reader) { var retVal = new StringBuilder(); var config = new OrchestratorConfig() { InputDataKind = KindOfTextData.X12, AsyncIntake = true, IntakeReader = () => reader, ClusterMarker = SegmentStartsCluster, MarkerStartsCluster = true, //predicate (marker) matches the first record in cluster TransformerType = TransformerType.Universal, AllowTransformToAlterFields = true, UniversalTransformer = ExtractNeededElements, OutputDataKind = KindOfTextData.JSON, XmlJsonOutputSettings = "RecordNode|,IndentChars| ", OutputWriter = () => new StringWriter(retVal) }; ProcessResult result; using (var orchtr = OrchestratorCreator.GetEtlOrchestrator(config)) { result = await orchtr.ExecuteAsync(); } return(result, retVal.ToString()); }
internal FileProcessor(string inFile, string outLocation) { var outFileBody = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile); var outFileExt = Path.GetExtension(inFile); var config = new OrchestratorConfig() { GlobalCacheElements = new string[] { "LowCnt|0", "HighCnt|0", "AllCnt|0", "IsaElems", "GsElems" }, DefaultX12SegmentDelimiter = "~\r\n", InputDataKind = KindOfTextData.X12, InputFileName = inFile, ClusterMarker = SegmentStartsCluster, MarkerStartsCluster = true, //predicate (marker) matches the first record in cluster PrependHeadCluster = true, // to contain ISA/GS segments for _high file AppendFootCluster = true, // to contain IEA/GE segments for _high file RecordInitiator = StoreIsaAndGsSegments, PropertyBinEntities = PropertyBinAttachedTo.Clusters, DeferTransformation = DeferTransformation.UntilRecordInitiation, ConcurrencyLevel = 4, TransformerType = TransformerType.Clusterbound, ClusterboundTransformer = ProcessX12Transaction, RouterType = RouterType.PerCluster, ClusterRouter = SendToLowOrHigh, OutputDataKind = KindOfTextData.X12, OutputFileNames = outFileBody + "_low" + outFileExt + "|" + outFileBody + "_high" + outFileExt //1st: less than $1,000; 2nd: $1,000 or more }; Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
public void GlobalCacheElements_VariousDataTypes_CorrectData() { //arrange _cfg.GlobalCacheElements = new string[] { "IntElem|0", "DecElem|0.", "DateElem|1/1/2011", "StrElem|abc" }; object valToGet; //act var orchtr = (EtlOrchestrator)OrchestratorCreator.GetEtlOrchestrator(_cfg); var orchtrPO = new PrivateObject(orchtr); var gc = (IGlobalCache)orchtrPO.GetField("_globalCache"); //assert gc.Count.Should().Be(4); gc.TryGet("Elem1", out valToGet).Should().BeFalse(); gc.TryGet("IntElem", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <int>(); valToGet.Should().Be(0); gc.TryGet("DecElem", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <decimal>(); valToGet.Should().Be(0m); gc.TryGet("DateElem", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <DateTime>(); valToGet.Should().Be(new DateTime(2011, 1, 1)); gc.TryGet("StrElem", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be("abc"); }
internal FileProcessor(string inFile, string outLocation) { var config = new OrchestratorConfig() { ReportProgress = true, ProgressInterval = 1000, ProgressChangedHandler = (s, e) => { if (e.Phase == Phase.Intake) { Console.Write($"\rProcessed {e.RecCnt:N0} records so far..."); } }, PhaseFinishedHandler = (s, e) => { if (e.Phase == Phase.Intake) { Console.WriteLine($"\rProcessed {e.RecCnt:N0} records. Done! "); } }, InputDataKind = KindOfTextData.Delimited, HeadersInFirstInputRow = true, InputFileName = inFile, TransformerType = TransformerType.RecordFilter, RecordFilterPredicate = r => (string)r["NPPES Provider State"] == "NJ" && ((string)r["Specialty Description"]).ToLower() == "dentist", OutputDataKind = KindOfTextData.Delimited, HeadersInFirstOutputRow = true, OutputFileName = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile) + "_NJ_dentists.csv" }; Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
internal DataProcessor(Func <Task <string> > intakeSupplierAsync, Action <string> outputConsumer, Action <int> progressHandler) { var config = new OrchestratorConfig() { ReportProgress = true, ProgressInterval = 10, ProgressChangedHandler = (s, e) => { if (e.Phase == Phase.Intake) { progressHandler(e.RecCnt); } }, PhaseFinishedHandler = (s, e) => { if (e.Phase == Phase.Intake) { progressHandler(e.RecCnt); } }, InputDataKind = KindOfTextData.Delimited, InputFields = "PlaneDescription,IataCode,IcaoCode", AsyncIntake = true, TransformerType = TransformerType.Universal, UniversalTransformer = FilterAndReorganizeFields, AllowTransformToAlterFields = true, OutputDataKind = KindOfTextData.Flat, OutputFields = "IataCode|4,Hyphen|2,PlaneDescription|70", ExcludeExtraneousFields = true }; config.SetAsyncIntakeSupplier(intakeSupplierAsync); config.SetOutputConsumer(outputConsumer); Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
public void GlobalCacheElements_NoSetting_NoElements() { //arrange object valToGet; //act var orchtr = (EtlOrchestrator)OrchestratorCreator.GetEtlOrchestrator(_cfg); var orchtrPO = new PrivateObject(orchtr); var gc = (IGlobalCache)orchtrPO.GetField("_globalCache"); //assert gc.Count.Should().Be(0); gc.TryGet("Elem1", out valToGet).Should().BeFalse(); gc.TryGet("Elem2", out valToGet).Should().BeFalse(); gc.TryGet("Elem3", out valToGet).Should().BeFalse(); }
static void Main() { Console.WriteLine("Data Conveyer process is starting (SplitByTimeZone)"); //Restore configuration named TimeZones var config = OrchestratorConfig.RestoreConfig(@"..\..\..\Common\ConfigData\TimeZones"); // To facilitate troubleshooting, logger can be enabled; like so (output will go into DataConveyer.log file): //var config = OrchestratorConfig.RestoreConfig(@"..\..\..\Common\ConfigData\TimeZones", LoggerCreator.CreateLogger(LoggerType.LogFile, "Split by Time Zone process", LogEntrySeverity.Information)); if (config == null) { Console.WriteLine("Oops! Failed to restore TimeZones config. More information in the log."); } else { //TimeZones configuration restored // We need to adjust some properties as the TimeZones configuration was specific to IncludeTimeZone // project (but also included the RecordRouter that is needed here in SplitByTimeZone). config.TransformerType = TransformerType.Recordbound; config.RecordboundTransformer = r => r; //reset transformer to its default config.RouterType = RouterType.PerRecord; config.OutputDataKind = KindOfTextData.Delimited; config.OutputFileNames = @"..\..\..\Common\Output\output_eastern.csv|..\..\..\Common\Output\output_central.csv|..\..\..\Common\Output\output_mountain.csv|..\..\..\Common\Output\output_pacific.csv|..\..\..\Common\Output\output_alaskan.csv|..\..\..\Common\Output\output_hawaiian.csv"; // Execute Data Conveyer process: ProcessResult result; using (var orchtr = OrchestratorCreator.GetEtlOrchestrator(config)) { var execTask = orchtr.ExecuteAsync(); result = execTask.Result; //sync over async } Console.WriteLine(" done!"); // Evaluate completion status: if (result.CompletionStatus == CompletionStatus.IntakeDepleted) { Console.WriteLine($"Successfully processed {result.RowsWritten} records"); } else { Console.WriteLine($"Oops! Processing resulted in unexpected status of " + result.CompletionStatus.ToString()); } } Console.Write("Press any key to exit..."); Console.ReadKey(); }
public void GlobalCacheElements_SingleSetting_OneElement() { //arrange _cfg.GlobalCacheElements = new string[] { "Elem1" }; object valToGet; //act var orchtr = (EtlOrchestrator)OrchestratorCreator.GetEtlOrchestrator(_cfg); var orchtrPO = new PrivateObject(orchtr); var gc = (IGlobalCache)orchtrPO.GetField("_globalCache"); //assert gc.Count.Should().Be(1); gc.TryGet("Elem3", out valToGet).Should().BeFalse(); gc.TryGet("Elem1", out valToGet).Should().BeTrue(); valToGet.Should().BeNull(); }
internal FileProcessor(string inFile, string outLocation) { var config = new OrchestratorConfig() { InputDataKind = KindOfTextData.Delimited, InputFields = "PlaneDescription,IataCode,IcaoCode", InputFileName = inFile, TransformerType = TransformerType.Universal, UniversalTransformer = FilterAndReorganizeFields, AllowTransformToAlterFields = true, OutputDataKind = KindOfTextData.Flat, OutputFields = "IataCode|4,Hyphen|2,PlaneDescription|70", ExcludeExtraneousFields = true, OutputFileName = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile) + ".txt" }; Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
public void GlobalCacheElements_2SimpleSettings_NoElements() { //arrange _cfg.GlobalCacheElements = new string[] { "Elem1|0", "Elem2|abc" }; object valToGet; //act var orchtr = (EtlOrchestrator)OrchestratorCreator.GetEtlOrchestrator(_cfg); var orchtrPO = new PrivateObject(orchtr); var gc = (IGlobalCache)orchtrPO.GetField("_globalCache"); //assert gc.Count.Should().Be(2); gc.TryGet("Elem1", out valToGet).Should().BeTrue(); valToGet.Should().Be(0); gc.TryGet("Elem2", out valToGet).Should().BeTrue(); valToGet.Should().Be("abc"); gc.TryGet("Elem3", out valToGet).Should().BeFalse(); }
static void Main() { Console.WriteLine("Data Conveyer process is starting (IncludeTimeZone)"); //Restore configuration named TimeZones var config = OrchestratorConfig.RestoreConfig(@"..\..\..\Common\ConfigData\TimeZones"); // To facilitate troubleshooting, logger can be enabled; like so (output will go into DataConveyer.log file): //var config = OrchestratorConfig.RestoreConfig(@"..\..\..\Common\ConfigData\TimeZones", LoggerCreator.CreateLogger(LoggerType.LogFile, "Include Time Zone process", LogEntrySeverity.Information)); if (config == null) { Console.WriteLine("Oops! Failed to restore TimeZones config. More information in the log."); } else { //TimeZones configuration restored //No need to adjust any config properties, "TimeZones" config lined up all config properties //(note that the RecordRouter function is not used when RouterType is SingleTarget). //Execute Data Conveyer process: ProcessResult result; using (var orchtr = OrchestratorCreator.GetEtlOrchestrator(config)) { var execTask = orchtr.ExecuteAsync(); result = execTask.Result; //sync over async } Console.WriteLine(" done!"); //Evaluate completion status: if (result.CompletionStatus == CompletionStatus.IntakeDepleted) { Console.WriteLine($"Successfully processed {result.RowsWritten} records"); } else { Console.WriteLine($"Oops! Processing resulted in unexpected status of " + result.CompletionStatus.ToString()); } } Console.Write("Press any key to exit..."); Console.ReadKey(); }
internal FileProcessor(string inFile, string outLocation) { var inputIsCsv = Path.GetExtension(inFile).ToLower() == ".csv"; var outFileWithoutExtension = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile); var config = new OrchestratorConfig(LoggerCreator.CreateLogger(LoggerType.LogFile, "Unbound JSON translation to/from CSV.", LogEntrySeverity.Information)) { ReportProgress = true, ProgressInterval = 1000, ProgressChangedHandler = (s, e) => { if (e.Phase == Phase.Intake) { Console.Write($"\rProcessed {e.RecCnt:N0} records so far..."); } }, PhaseFinishedHandler = (s, e) => { if (e.Phase == Phase.Intake) { Console.WriteLine($"\rProcessed {e.RecCnt:N0} records. Done! "); } }, InputFileName = inFile }; if (inputIsCsv) // CSV to UnboundJSON { config.InputDataKind = KindOfTextData.Delimited; config.HeadersInFirstInputRow = true; config.AllowTransformToAlterFields = true; config.OutputDataKind = KindOfTextData.UnboundJSON; config.XmlJsonOutputSettings = "IndentChars| "; // pretty print config.OutputFileName = outFileWithoutExtension + ".json"; } else // UnboundJSON to CSV { config.InputDataKind = KindOfTextData.UnboundJSON; config.AllowOnTheFlyInputFields = true; // TODO: consider UnboundJSON ignoring this setting like X12 config.AllowTransformToAlterFields = true; //IMPORTANT! otherwise null items will be produced! config.OutputDataKind = KindOfTextData.Delimited; config.OutputFileName = outFileWithoutExtension + ".csv"; config.HeadersInFirstOutputRow = true; } Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
public void GetEtlOrchestrator_Defaults_OrchestratorConstructed() { //This test verifies the OrchestratorCreator class (factory) //arrange var config = new OrchestratorConfig(); config.IntakeSupplier = gc => null; //dummy config.OutputConsumer = (t, gc) => { }; //dummy //act var orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); var orchestratorPO = new PrivateObject(orchestrator); //assert orchestrator.Should().BeOfType <EtlOrchestrator>(); orchestratorPO.GetField("_intakeProvider").Should().BeOfType <Mavidian.DataConveyer.Intake.RawIntakeProvider>(); //default orchestratorPO.GetField("_transformProvider").Should().BeOfType <Mavidian.DataConveyer.Transform.RecordboundTransformProvider>(); //default orchestratorPO.GetField("_outputProvider").Should().BeOfType <Mavidian.DataConveyer.Output.RawOutputProvider>(); //default }
internal FileProcessor(string inFile, string outLocation) { var config = new OrchestratorConfig() { InputDataKind = KindOfTextData.Flat, InputFileName = inFile, HeadersInFirstInputRow = true, TrimInputValues = true, InputFields = "|8,|15,|34,|41,|12,|20,|10,|48,|64,|16,|29,|23,|5,|5,|14,|38,|36,|52,|11", AllowTransformToAlterFields = true, ConcurrencyLevel = 4, TransformerType = TransformerType.Universal, UniversalTransformer = FilterRecsAndExtractFields, OutputDataKind = KindOfTextData.Arbitrary, ArbitraryOutputDefs = new string[] { "INSERT INTO MyPeople VALUES('{Key}', ", "'{LName}', ", "'{FName}', ", "'{SSN}')" }, LeaderContents = "CREATE TABLE MyPeople (ID char(12), LastName char(20), FirstName char(12), SSN char(11))\r\nGO", OutputFileName = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile) + ".sql" }; Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
public void GlobalCacheElements_VariousTrickyData_CorrectData() { //arrange _cfg.GlobalCacheElements = new string[] { "IntElem|-1", "DecElem|-32.44", "DateElem|1-JAN-11", "StrElem1|\"0\"", "StrElem2|2/30/2011", "StrElem3|\"\"a\",\"b\" and c\"", "StrElem4|\"\"a\",\"b\" and c", //ending quote is optional "StrElem5", "StrElem6|\"\"", "StrElem7|", "StrElem8|\" \"", "StrElem9| " }; object valToGet; //act var orchtr = (EtlOrchestrator)OrchestratorCreator.GetEtlOrchestrator(_cfg); var orchtrPO = new PrivateObject(orchtr); var gc = (IGlobalCache)orchtrPO.GetField("_globalCache"); //assert gc.Count.Should().Be(12); gc.TryGet("BadElem", out valToGet).Should().BeFalse(); gc.TryGet("IntElem", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <int>(); valToGet.Should().Be(-1); gc.TryGet("DecElem", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <decimal>(); valToGet.Should().Be(-32.44m); gc.TryGet("DateElem", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <DateTime>(); valToGet.Should().Be(new DateTime(2011, 1, 1)); gc.TryGet("StrElem1", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be("0"); gc.TryGet("StrElem2", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be("2/30/2011"); gc.TryGet("StrElem3", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be("\"a\",\"b\" and c"); gc.TryGet("StrElem4", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be("\"a\",\"b\" and c"); gc.TryGet("StrElem5", out valToGet).Should().BeTrue(); valToGet.Should().BeNull(); gc.TryGet("StrElem6", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be(string.Empty); gc.TryGet("StrElem7", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be(string.Empty); gc.TryGet("StrElem8", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be(" "); gc.TryGet("StrElem9", out valToGet).Should().BeTrue(); valToGet.Should().BeOfType <string>(); valToGet.Should().Be(" "); }