internal DataProcessor(Func <Task <string> > intakeSupplierAsync, Action <string> outputConsumer, Action <int> progressHandler) { var config = new OrchestratorConfig() { ReportProgress = true, ProgressInterval = 10, ProgressChangedHandler = (s, e) => { if (e.Phase == Phase.Intake) { progressHandler(e.RecCnt); } }, PhaseFinishedHandler = (s, e) => { if (e.Phase == Phase.Intake) { progressHandler(e.RecCnt); } }, InputDataKind = KindOfTextData.Delimited, InputFields = "PlaneDescription,IataCode,IcaoCode", AsyncIntake = true, TransformerType = TransformerType.Universal, UniversalTransformer = FilterAndReorganizeFields, AllowTransformToAlterFields = true, OutputDataKind = KindOfTextData.Flat, OutputFields = "IataCode|4,Hyphen|2,PlaneDescription|70", ExcludeExtraneousFields = true }; config.SetAsyncIntakeSupplier(intakeSupplierAsync); config.SetOutputConsumer(outputConsumer); Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
public void Initialize() { _config = new OrchestratorConfig(); _inLines = _intakeLines().Select(l => l.ToExternalTuple()).ToList(); _resultingLines = new List <ExternalLine>(); }
public EtlOrchestrator_tests_ArbitraryOutput() { _config = new OrchestratorConfig { InputDataKind = KindOfTextData.Keyword }; _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier); _config.RetainQuotes = false; _config.InputKeyPrefix = "@p"; _config.ExcludeItemsMissingPrefix = false; _config.ActionOnDuplicateKey = ActionOnDuplicateKey.IgnoreItem; _config.TypeDefiner = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String _config.ClusterMarker = (rec, prevRec, recCnt) => { return(true); }; //single record clusters _config.AllowOnTheFlyInputFields = true; _config.TransformerType = TransformerType.ClusterFilter; _config.ClusterFilterPredicate = c => true; // no transformations, data passed as is _config.OutputDataKind = KindOfTextData.Arbitrary; _config.ArbitraryOutputDefs = new string[] { "Record type is {RECTYPE},", " name is {NAME}", " and number is {NUM}.", " Void item here." }; _config.SetOutputConsumer(l => { if (l != null) { _resultingLines.Add(l); } }); _resultingLines = new List <string>(); }
public void Process_IntakeFileAndSupplierPresent_SupplierWins() { //arrange int inCnt = 0; var config = new OrchestratorConfig(); config.InputFileNames = "C:\\non-existing-file.abc"; //config.IntakeTextSupplier = () => inCnt++ < 5 ? $"Line #{inCnt}" : null; config.TextIntakeSupplier = () => { return(inCnt++ < 5 ? $"Line #{inCnt}" : null); }; var orchestrator = new EtlOrchestrator(config); //act var result = orchestrator.ExecuteAsync().Result; //assert result.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted); //note that InputFileNames would've caused InitializationError result.RowsRead.Should().Be(5); result.ClustersRead.Should().Be(5); result.RowsWritten.Should().Be(5); result.ClustersWritten.Should().Be(5); inCnt.Should().Be(6); }
public ClusterMarker_tests() { _config = new OrchestratorConfig { InputDataKind = KindOfTextData.Keyword }; var sn = 0; //closure to facilitate SourceNo calculation (note that GetStringTupleSupplier calls the sourceNoEval function (provided as parameter below) exactly once per iteration) _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).GetStringTupleSupplier(() => sn++ < 3 ? 1 : 2)); //first 3 - source 1, rest - source 2 _config.RetainQuotes = false; _config.InputKeyPrefix = "@p"; // The DeferTransformation.Indefinitely setting below prevents Data Conveyer from linking clusteringBlock to transformingBlock. // Without it, transformingBlock could randomly "steal" clusters from _resultsExtractor, which would cause randowm test failures. _config.DeferTransformation = DeferTransformation.Indefinitely; _config.AllowOnTheFlyInputFields = true; _config.OutputConsumer = (t, gc) => { }; //throwaway consumer //prepare extraction of the results from the pipeline _resultingClusters = new ConcurrentQueue <KeyValCluster>(); _resultsExtractor = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c)); // AsserterOutput tuple: Item1=Ext, Item2=Header, Item3=Formatter, Item4=ExcFormatter string ShowRec(ICluster c, int idx) => idx >= c.Count ? string.Empty : c[idx]?["NUM"] +"{" + c[idx].Count + "}"; // e.g. 223{4} means record with 4 items and item NUM = 223 AsserterOutput asserterOutputToCsv = (".csv", "ClstrNo,NoOfRecs,Rec1,Rec2,Rec3,Rec4,Rec5,Rec6,Rec7,Rec8,Rec9,Rec10", c => $"{ c.ClstrNo },{ c.Count },{ ShowRec(c,0) },{ ShowRec(c,1) },{ ShowRec(c,2) },{ ShowRec(c,3) },{ ShowRec(c,4) },{ ShowRec(c,5) },{ ShowRec(c,6) },{ ShowRec(c,7) },{ ShowRec(c,8) },{ ShowRec(c,9) }", ex => ex.ToString().Split("\r\n").Select(l => "\"" + l.Replace('"', '\'') + "\"") ); _traceableAsserter = new TraceableAsserter <KeyValCluster>("ClusterTestFailures\\", asserterOutputToCsv); }
public void Initialize() { _config = new OrchestratorConfig(); _config.AllowTransformToAlterFields = true; _config.PropertyBinEntities = PropertyBinAttachedTo.Clusters; // simple type definitions, everything string, except for fields starting with I_ (int) Func <string, ItemType> fldTypeFunc = key => key.StartsWith("I_") ? ItemType.Int : ItemType.String; var initFldTypes = new ConcurrentDictionary <string, ItemType>(); Func <string, string> fldFormatFunc = key => string.Empty; var initFldFormats = new ConcurrentDictionary <string, string>(); _typeDefs = new TypeDefinitions(fldTypeFunc, initFldTypes, fldFormatFunc, initFldFormats); var items1 = new IItem[] { KeyValItem.CreateItem("IDCD_ID", "71941", _typeDefs), KeyValItem.CreateItem("blah", "blahblah", _typeDefs), KeyValItem.CreateItem("I_num", 243, _typeDefs) }; var items2 = new IItem[] { KeyValItem.CreateItem("I_#", 15, _typeDefs), KeyValItem.CreateItem("Fld1", "data1", _typeDefs) }; var recs = new KeyValRecord[] { new KeyValRecord(items1, 16, 1, 0, null, null, null, _typeDefs, _config, null, ActionOnDuplicateKey.IgnoreItem), new KeyValRecord(items2, 17, 1, 0, null, null, null, _typeDefs, _config, null, ActionOnDuplicateKey.IgnoreItem) }; _cluster = new KeyValCluster(recs, 9, 16, 1, null, new Dictionary <string, object>(), _typeDefs, _config, null); //clstr# 9 starting at rec# 16; AllowTransformToAlterFields is true }
internal readonly Func <Phase, int, PhaseStatus> _processingStatusSupplier; // internal to allow ReadOnlyCluster ctor (& record specific transform providers) use the same processingStatusSupplier //Note that KeyValCluster is constructed eagerly (recList will be consumed upon object creation) internal KeyValCluster(IEnumerable <IRecord> recList, int clstrNo, int startRecNo, int startSourceNo, IGlobalCache globalCache, IDictionary <string, object> propertyBin, TypeDefinitions typeDefinitions, OrchestratorConfig config, Func <Phase, int, PhaseStatus> processingStatusSupplier) { this.ClstrNo = clstrNo; this.StartRecNo = startRecNo; this.StartSourceNo = startSourceNo; //Note that StartSourceNo should match the 1st record, but to facilitate cloning, etc. it was decided to expose it // separately as ctor parm as opposed to reading it like this: this.StartSourceNo = recList.Any() ? recList.First().SourceNo : 0; this._recordColl = new RecordCollection(recList); this.GlobalCache = globalCache; this._typeDefinitions = typeDefinitions; this._config = config; this._processingStatusSupplier = processingStatusSupplier; //Make sure all records have the ClstrNo matching the cluster they belong to: this._recordColl.ForEach(r => (r as KeyValRecord)?.SetClstrNo(clstrNo)); PropertyBin = (_config.PropertyBinEntities & PropertyBinAttachedTo.Clusters) == PropertyBinAttachedTo.Clusters ? propertyBin ?? new Dictionary <string, object>() //"reuse" PB in case of cloning, creation of ReadOnlyCluster wrapper, etc. : null; //null if Clusters flag not set in PropertyBinEntities } //ctor
internal FileProcessor(string inFile, string outLocation) { var config = new OrchestratorConfig() { ReportProgress = true, ProgressInterval = 1000, ProgressChangedHandler = (s, e) => { if (e.Phase == Phase.Intake) { Console.Write($"\rProcessed {e.RecCnt:N0} records so far..."); } }, PhaseFinishedHandler = (s, e) => { if (e.Phase == Phase.Intake) { Console.WriteLine($"\rProcessed {e.RecCnt:N0} records. Done! "); } }, InputDataKind = KindOfTextData.Delimited, HeadersInFirstInputRow = true, InputFileName = inFile, TransformerType = TransformerType.RecordFilter, RecordFilterPredicate = r => (string)r["NPPES Provider State"] == "NJ" && ((string)r["Specialty Description"]).ToLower() == "dentist", OutputDataKind = KindOfTextData.Delimited, HeadersInFirstOutputRow = true, OutputFileName = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile) + "_NJ_dentists.csv" }; Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
internal FileProcessor(string inFile, string outLocation) { var outFileBody = outLocation + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(inFile); var outFileExt = Path.GetExtension(inFile); var config = new OrchestratorConfig() { GlobalCacheElements = new string[] { "LowCnt|0", "HighCnt|0", "AllCnt|0", "IsaElems", "GsElems" }, DefaultX12SegmentDelimiter = "~\r\n", InputDataKind = KindOfTextData.X12, InputFileName = inFile, ClusterMarker = SegmentStartsCluster, MarkerStartsCluster = true, //predicate (marker) matches the first record in cluster PrependHeadCluster = true, // to contain ISA/GS segments for _high file AppendFootCluster = true, // to contain IEA/GE segments for _high file RecordInitiator = StoreIsaAndGsSegments, PropertyBinEntities = PropertyBinAttachedTo.Clusters, DeferTransformation = DeferTransformation.UntilRecordInitiation, ConcurrencyLevel = 4, TransformerType = TransformerType.Clusterbound, ClusterboundTransformer = ProcessX12Transaction, RouterType = RouterType.PerCluster, ClusterRouter = SendToLowOrHigh, OutputDataKind = KindOfTextData.X12, OutputFileNames = outFileBody + "_low" + outFileExt + "|" + outFileBody + "_high" + outFileExt //1st: less than $1,000; 2nd: $1,000 or more }; Orchestrator = OrchestratorCreator.GetEtlOrchestrator(config); }
public void Initialize() { _config = new OrchestratorConfig(new MockLogger(LogEntrySeverity.Debug)); //log all levels // simple type definitions, everything string _typeDefs = new TypeDefinitions(k => ItemType.String, new ConcurrentDictionary <string, ItemType>(), k => string.Empty, new ConcurrentDictionary <string, string>()); }
protected OutputProvider(OrchestratorConfig config, IGlobalCache globalCache) { _config = config; _globalCache = globalCache; if (config.OutputFields == null) { _fieldsToUse = null; //this will be overwritten based on actual set of fields used (SetFieldsToUse method) } else //Output fields specified in config, they will drive output { //this list of field names will not be overwritten _fieldsToUse = config.OutputFields.ListOfSingleElements(0)?.ToList(); Debug.Assert(_fieldsToUse.IsNonEmptyList()); //if specified, the list must be complete //TODO: Error message instead of the above Assert (part of "config scrubber") } _outputToWriter = _config.AsyncOutput ? _config.AsyncOutputConsumer == OrchestratorConfig.DefaultAsyncOutputConsumer && (_config.OutputWriters != null || _config.OutputFileNames != null) : _config.OutputConsumer == OrchestratorConfig.DefaultOutputConsumer && (_config.OutputWriters != null || _config.OutputFileNames != null); _initErrorOccurred = new Lazy <bool>(() => !InitOutput()); _targets = new ConcurrentDictionary <int, LineCounts>(); _lastTargetNo = 1; _atOutputStart = new SingleUseBool(); } //ctor
/// <summary> /// Factory method that returns a concrete instance of the derived class /// </summary> /// <param name="config"></param> /// <param name="globalCache"></param> /// <param name="typeDefinitions"></param> /// <param name="x12DelimitersForOutput"></param> /// <returns></returns> internal static IntakeProvider CreateProvider(OrchestratorConfig config, IGlobalCache globalCache, TypeDefinitions typeDefinitions, X12Delimiters x12DelimitersForOutput) { switch (config.InputDataKind) { case KindOfTextData.Raw: return(new RawIntakeProvider(config, globalCache, typeDefinitions)); case KindOfTextData.Keyword: return(new KwIntakeProvider(config, globalCache, typeDefinitions)); case KindOfTextData.Delimited: return(new DelimitedIntakeProvider(config, globalCache, typeDefinitions)); case KindOfTextData.Flat: return(new FlatIntakeProvider(config, globalCache, typeDefinitions)); case KindOfTextData.Arbitrary: return(new ArbitraryIntakeProvider(config, globalCache, typeDefinitions)); case KindOfTextData.X12: return(new X12IntakeProvider(config, globalCache, typeDefinitions, x12DelimitersForOutput)); case KindOfTextData.XML: case KindOfTextData.JSON: case KindOfTextData.UnboundJSON: return(new XrecordIntakeProvider(config, globalCache, typeDefinitions)); default: //TODO: Message - fatal error, undetermined type of intake data return(null); } }
private async Task <(ProcessResult result, string output)> ProcessX12Async(TextReader reader) { var retVal = new StringBuilder(); var config = new OrchestratorConfig() { InputDataKind = KindOfTextData.X12, AsyncIntake = true, IntakeReader = () => reader, ClusterMarker = SegmentStartsCluster, MarkerStartsCluster = true, //predicate (marker) matches the first record in cluster TransformerType = TransformerType.Universal, AllowTransformToAlterFields = true, UniversalTransformer = ExtractNeededElements, OutputDataKind = KindOfTextData.JSON, XmlJsonOutputSettings = "RecordNode|,IndentChars| ", OutputWriter = () => new StringWriter(retVal) }; ProcessResult result; using (var orchtr = OrchestratorCreator.GetEtlOrchestrator(config)) { result = await orchtr.ExecuteAsync(); } return(result, retVal.ToString()); }
/// <summary> /// Factory method that returns a concrete instance of the derived class /// </summary> /// <param name="config"></param> /// <param name="globalCache"></param> /// <param name="x12Delimiters"></param> /// <returns></returns> internal static OutputProvider CreateProvider(OrchestratorConfig config, IGlobalCache globalCache, X12Delimiters x12Delimiters) { switch (config.OutputDataKind) { case KindOfTextData.Raw: return(new RawOutputProvider(config, globalCache)); case KindOfTextData.Keyword: return(new KwOutputProvider(config, globalCache)); case KindOfTextData.Delimited: return(new DelimitedOutputProvider(config, globalCache)); case KindOfTextData.Flat: return(new FlatOutputProvider(config, globalCache)); case KindOfTextData.Arbitrary: return(new ArbitraryOutputProvider(config, globalCache)); case KindOfTextData.X12: return(new X12OutputProvider(config, globalCache, x12Delimiters)); case KindOfTextData.XML: var allItemTypes = ItemType.Void | ItemType.Bool | ItemType.DateTime | ItemType.Decimal | ItemType.Int | ItemType.String; return(new XrecordOutputProvider(allItemTypes, config, globalCache)); case KindOfTextData.JSON: case KindOfTextData.UnboundJSON: return(new XrecordOutputProvider(ItemType.DateTime, config, globalCache)); default: //TODO: Message - fatal error, undetermined type of output data return(null); } }
private readonly List <string> _resultingLines; //container of the test results public EtlOrchestrator_tests_ProcessResult() { _fatalLogMsgs = new List <Tuple <LogEntrySeverity, string, LogEntry> >(); var mockFatalLogger = new Mock <ILogger>(); //records Fatal messages to _fatalLogMsgs (plus title box) mockFatalLogger.Setup(l => l.LoggingThreshold).Returns(LogEntrySeverity.Fatal); mockFatalLogger.Setup(l => l.Log(It.IsAny <LogEntry>())) .Callback((LogEntry e) => { if (e.Severity <= LogEntrySeverity.Fatal) { _fatalLogMsgs.Add(Tuple.Create(e.Severity, e.MessageOnDemand(), e)); } }); _config = new OrchestratorConfig(mockFatalLogger.Object) { InputDataKind = KindOfTextData.Keyword }; _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier); _config.RetainQuotes = false; _config.InputKeyPrefix = "@p"; _config.ExcludeItemsMissingPrefix = false; _config.ActionOnDuplicateKey = ActionOnDuplicateKey.IgnoreItem; _config.TypeDefiner = key => key == "NUM" ? new ItemDef(ItemType.Int, null) : new ItemDef(ItemType.String, null); //NUM Int, everything else String _config.ClusterMarker = (rec, prevRec, recCnt) => { return((string)rec["RECTYPE"] == "XYZ"); }; //records having @pRECTYPE=XYZ denote start of the cluster _config.MarkerStartsCluster = true; //predicate matches the first record in cluster _config.AllowOnTheFlyInputFields = true; _config.TransformerType = TransformerType.ClusterFilter; _config.OutputConsumer = (t, gc) => _resultingLines.Add(t?.Item1.Text); // place the lines on the list to be tested/asserted //prepare extraction of the results from the pipeline _resultingLines = new List <string>(); }
public void Process_OutputFileAndConsumererPresent_ConsumerWins() { //arrange int inCnt = 0; var outLines = new List <Tuple <ExternalLine, int> >(); var config = new OrchestratorConfig(); config.InputFileNames = "C:\\non-existing-file.abc"; config.IntakeSupplier = gc => inCnt++ < 3 ? $"Line #{inCnt}".ToExternalTuple() : null; config.OutputConsumer = (tpl, gc) => outLines.Add(tpl); var orchestrator = new EtlOrchestrator(config); //act var result = orchestrator.ExecuteAsync().Result; //assert result.CompletionStatus.Should().Be(CompletionStatus.IntakeDepleted); //note that either InputFileNames or OutputFileNames would've caused InitializationError result.RowsRead.Should().Be(3); result.ClustersRead.Should().Be(3); result.RowsWritten.Should().Be(3); result.ClustersWritten.Should().Be(3); inCnt.Should().Be(4); outLines.Count.Should().Be(4); //incl. EOD, i.e. null outLines[0].Item1.Text.Should().Be("Line #1"); outLines[0].Item2.Should().Be(1); outLines[2].Item1.Text.Should().Be("Line #3"); outLines[2].Item2.Should().Be(1); outLines[3].Should().BeNull(); }
public void Initialize() { _config = new OrchestratorConfig(); //prepare extraction of the results from the pipeline _resultingLines = new List <string>(); _inLines = _intakeLines().Select(l => l.ToExternalTuple()).ToList(); }
private readonly ActionBlock <KeyValCluster> _resultsDiscarder; //block to intercept output from the clustering block public EtlOrchestrator_tests_FldNames() { _config = new OrchestratorConfig { DeferTransformation = DeferTransformation.Indefinitely //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor) }; _resultsDiscarder = new ActionBlock <KeyValCluster>(c => { }); // any output produced by the clustering block is irrelevant for these tests (and hence discarded) }
public void Initialize() { _config = new OrchestratorConfig(new MockLogger(LogEntrySeverity.Warning)); //prepare extraction of the results from the pipeline _resultingClusters = new List <KeyValCluster>(); _resultsExtractor = new ActionBlock <KeyValCluster>(c => _resultingClusters.Add(c)); _inLines = _intakeLines().Select(l => l.ToExternalTuple()).ToList(); }
/// <summary> /// Ctor intended to be called by the CreateProvider method of the base class /// </summary> /// <param name="typesToConvertToString">Flags set for item types to be converted to string.</param> /// <param name="config"></param> /// <param name="globalCache"></param> internal XrecordOutputProvider(ItemType typesToConvertToString, OrchestratorConfig config, IGlobalCache globalCache) : base(config, globalCache) { _typesToConvertToString = typesToConvertToString; //For XML, all types are converted to string - this way the format (if set in ItemDef) is applied on output. //For JSON, all types except for DateTime are NOT converted to string; this is because JSON natively represents data type (however, it doesn't support dates). //Note that XrecordOutputProvider could be split into 2 separate output providers (XmlOutputProvider and JsonOutputProvider), in which case //no typesToConvertToString would be needed (and it would directly fit into the strategy pattern). //However, a single XrecordOutputProvider class allows for "symmetry" with Intake, where there is a single XrecordIntakeProvider. //TODO: Consider splitting XrecordOutputProvider into XmlOutputProvider and JsonOutputProvider if performance slowdown is suspected. }
/// <summary> /// Ctor intended to be called by the CreateProvider method of the base class /// </summary> /// <param name="config"></param> /// <param name="globalCache"></param> /// <param name="typeDefinitions"></param> /// <param name="x12DelimitersForOuput"></param> internal X12IntakeProvider(OrchestratorConfig config, IGlobalCache globalCache, TypeDefinitions typeDefinitions, X12Delimiters x12DelimitersForOuput) : base(config, globalCache, typeDefinitions, sNo => sNo == 0 ? "Segment" : string.Format("Elem{0:000}", sNo)) //in case of X12, fields are named: Segment, Elem001, Elem002,... { _x12DelimitersForOutput = x12DelimitersForOuput; _x12FieldDelimiter = config.DefaultX12FieldDelimiter; if (_x12FieldDelimiter == default(char)) { _x12FieldDelimiter = '*'; } }
public void Initialize() { _config = new OrchestratorConfig(); _xmlOutput1 = new StringBuilder(); _outputWriter1 = new StringWriter(_xmlOutput1); _xmlOutput2 = new StringBuilder(); _outputWriter2 = new StringWriter(_xmlOutput2); _inLines = _intakeLines().Select(l => l.ToExternalTuple()).ToList(); }
/// <summary> /// Helper method that creates an orchestrator which intercepts output from a given block (e.g. ClusteringBlock or HoldingBlock) /// and sends it directly to the results extractor. /// Call to this method should be the last step of the arrange part for each test. /// </summary> /// <typeparam name="T">The output type of the intercepted block, e.g. KeyValCluster.</typeparam> /// <param name="config">Orchestrator configuration to crate the orchestrator for.</param> /// <param name="nameOfBlockToIntercept">Name of the last block of the pipeline as it appears in EtlOrchestrator class, e.g "_clusteringBlock" or "_holdingBlock". /// This block must not be linked to other blocks.</param> /// <param name="resultsExtractor">Action block that will be receiving clusters instead of the rest of the pipeline.</param> /// <returns>The orchestrator just created.</returns> internal static EtlOrchestrator GetTestOrchestrator <T>(OrchestratorConfig config, string nameOfBlockToIntercept, ActionBlock <T> resultsExtractor) { var orchestrator = new EtlOrchestrator(config); var orchestratorPA = new PrivateAccessor(orchestrator); var blockToIntercept = (ISourceBlock <T>)orchestratorPA.GetField(nameOfBlockToIntercept); blockToIntercept.LinkTo(resultsExtractor, new DataflowLinkOptions { PropagateCompletion = true }); return(orchestrator); }
private readonly ActionBlock <KeyValCluster> _resultsExtractor; //block to load results to container public EtlOrchestrator_tests_DelimitedIntake() { _config = new OrchestratorConfig { InputDataKind = KindOfTextData.Delimited, DeferTransformation = DeferTransformation.Indefinitely //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor) }; _resultingClusters = new ConcurrentQueue <KeyValCluster>(); _resultsExtractor = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c)); }
public void Initialize() { _config = new OrchestratorConfig(); //prepare extru of the results from the pipeline _resultingLines = new List <string>(); var sn = 0; //to assign sourceNo in a round-robin fashion: 1,2,3,1,2,3,1,2 _inLines = _intakeLines().Select(l => l.ToExternalTuple(sn++ % 3 + 1)).ToList(); }
private readonly ConcurrentQueue <IReadOnlyDictionary <string, object> > _traceBinHistory; //will contain results to verify public EtlOrchestrator_tests_TraceBin() { _config = new OrchestratorConfig { InputDataKind = KindOfTextData.X12 }; _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier); //no type definitions (everything string) _traceBinHistory = new ConcurrentQueue <IReadOnlyDictionary <string, object> >(); }
private readonly ActionBlock <KeyValCluster> _resultsExtractor; //block to load results to container public EtlOrchestrator_tests_JsonIntake() { _config = new OrchestratorConfig { DeferTransformation = DeferTransformation.Indefinitely //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor) }; _intakeReader = new StringReader(_intake); _resultingClusters = new ConcurrentQueue <KeyValCluster>(); _resultsExtractor = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c)); }
private readonly List <int> _fieldWidths; //fixed field widths /// <summary> /// Ctor intended to be called by the CreateProvider method of the base class /// </summary> /// <param name="config"></param> /// <param name="globalCache"></param> /// <param name="typeDefinitions"></param> internal FlatIntakeProvider(OrchestratorConfig config, IGlobalCache globalCache, TypeDefinitions typeDefinitions) : base(config, globalCache, typeDefinitions) { //In case field widths only are configured (no field names) and no names in the header row, allow assignment of default names: if (FieldsNamesFromConfig.IsEmptyList() && !_config.HeadersInFirstInputRow) { IncludeFieldsEnMasse(FieldsNamesFromConfig); } _fieldWidths = config.InputFields.ListOfSingleElements(1) .ToListOfInts((config.DefaultInputFieldWidth == 0) ? 10 : config.DefaultInputFieldWidth) //in case of undefined default width, 10 will be used .ToList(); }
private readonly ActionBlock <KeyValCluster> _resultsExtractor; //block to load results to container public SimpleIntakeSupplier_tests() { _config = new OrchestratorConfig { InputDataKind = KindOfTextData.Raw }; _config.SetIntakeSupplier(new IntakeSupplierProvider(_intakeLines()).StringSupplier); _config.DeferTransformation = DeferTransformation.Indefinitely; //to prevent linking clusteringBlock to transformingBlock (which could steal clusters from results extractor) _resultsExtractor = new ActionBlock <KeyValCluster>(c => _resultingClusters.Enqueue(c)); _resultingClusters = new ConcurrentQueue <KeyValCluster>(); }
public void Initialize() { _config = new OrchestratorConfig(); _config.AllowTransformToAlterFields = true; // simple type definitions, everything string, except for fields starting with I_ (int) Func <string, ItemType> fldTypeFunc = key => key.StartsWith("I_") ? ItemType.Int : ItemType.String; var initFldTypes = new ConcurrentDictionary <string, ItemType>(); Func <string, string> fldFormatFunc = key => string.Empty; var initFldFormats = new ConcurrentDictionary <string, string>(); _typeDefs = new TypeDefinitions(fldTypeFunc, initFldTypes, fldFormatFunc, initFldFormats); }