public override void InitializeRelatedTables(DataProcessor processor, TableDataBuilder table) { if (!InlineFields) { var tables = processor.TableMap.Tables; var current = tables.FirstOrDefault(t => t.Name == TableName) as TableDataBuilder; if (current != null) { if (!current.Schema.FieldsAreEqual(LookupTableBuilder.Schema) || current.GetType() != LookupTableBuilder.GetType()) { throw new InvalidOperationException("Lookup tables with the same name must have the same schema and type to be shared"); } LookupTableBuilder.Schema.ClearRelations(); LookupTableBuilder = current; } else { //Insert dimension tables in the start, since it is referenced by other tables tables.Insert(0, LookupTableBuilder); } table.Schema.Associate(LookupTableBuilder.Schema, table.Schema.Fields.Where(f => Fields.Contains(f)).ToArray(), LookupTableBuilder.Schema.Keys.Select(fp => fp.Value), true); } //Initialize realted tables in field mappers foreach (var fm in FieldMappers) { fm.InitializeRelatedTables(processor, InlineFields ? table : LookupTableBuilder); } base.InitializeRelatedTables(processor, table); }
public void Initialize(DataProcessor processor, TableDataBuilder parentTable) { foreach (var mapper in TableMappers) { mapper.Initialize(processor, parentTable); } }
public override void InitializeRelatedTables(DataProcessor processor, TableDataBuilder table) { foreach (var fm in FieldMappers.SelectMany(fms => fms)) { fm.InitializeRelatedTables(processor, table); } }
public override void Initialize(DataProcessor processor) { foreach (var fm in FieldMappers.SelectMany(fm=>fm)) { fm.Initialize(processor); } base.Initialize(processor); }
public override void Initialize(DataProcessor processor) { //Initialize realted tables in field mappers foreach (var fm in FieldMappers) { fm.Initialize(processor); } base.Initialize(processor); }
public virtual void Initialize(DataProcessor processor, TableDataBuilder parentTable) { Builders = new List<BuilderInfo>(); foreach (var def in TableDefinitions) { var fieldMappers = def.FieldMappers.ToArray(); foreach (var mapper in fieldMappers) { mapper.Initialize(processor); } var builder = fieldMappers.Length == 0 ? null : new TableDataBuilder(def.Name, fieldMappers); var builderInfo = new BuilderInfo(builder); if( builder != null ) { if (parentTable != null) { builder.LinkParentTable(parentTable); } builder.EnsureKey(KeyFactory.Default); processor.TableMap.Tables.Add(builderInfo.Builder); } foreach (var fieldMapper in fieldMappers) { fieldMapper.InitializeRelatedTables(processor, builderInfo.Builder); } if (def.TableMappers != null) { foreach (var childTable in def.TableMappers) { builderInfo.Children.Add(childTable); childTable.Initialize(processor, builder ?? parentTable); } } Builders.Add(builderInfo); } }
public override void Initialize(DataProcessor processor) { Key.Initialize(processor); _labelLoaders = new List<LabelLoader>(); if (Labels != null) { foreach (var label in Labels) { var loader = new LabelLoader(label.Value); loader.LabelProvider.Initialize(processor); _labelLoaders.Add(loader); } } base.Initialize(processor); }
public void InitializeRelatedTables(DataProcessor processor, TableDataBuilder table) { }
public void Initialize(DataProcessor processor) { }
public virtual void Initialize(DataProcessor processor) { _fields = CreateFields().ToArray(); }
public void Initialize(DataProcessor processor) { Lookup = processor.FieldLookup; }
public void Run() { if (Status != JobStatus.Pending) throw new InvalidOperationException("Job is not pending"); try { Directory.CreateDirectory(TempDirectory); File.WriteAllText(Path.Combine(TempDirectory, "specification.json"), Specification.ToString()); SetStatus(JobStatus.Preparing); Specification.Initialize(this); var source = Specification.CreateDataSource(); PostProcessors = Specification.CreatePostProcessors().ToArray(); var jobDirectory = TempDirectory; //The processors will consume data from this collection; var items = new BlockingCollection<object>(ExecutionSettings.DataSourceBufferSize); //Create the processors var processors = Enumerable.Range(0, ExecutionSettings.ProcessingThreads).Select(i => { IItemFieldLookup lookup = null; try { lookup = new ItemDatabaseFieldLookup(Database.GetDatabase(ExecutionSettings.DatabaseName), Specification.DefaultLanguage, ExecutionSettings.FieldCacheSize); } catch (Exception ex) { Log.Error("Error initailizing item field lookup", ex, this); } var proc = new DataProcessor(Specification.CreateRootMapper()) { BatchSize = ExecutionSettings.BatchSize, FieldLookup = lookup }; var exporter = Specification.CreateExporter(jobDirectory) as CsvExporter; //Move PartititionPrefix so this cast isn't necessary if (exporter == null) { exporter = new CsvExporter(jobDirectory); } exporter.PartitionPrefix = "~" + i + "_"; exporter.KeepOutput = true; //Don't delete the job's main directory proc.BatchWriter = new TableDataBatchWriter(exporter) { SyncLock = this, MaximumSize = ExecutionSettings.SizeLimit }; proc.Initialize(); return proc; }).ToArray(); var hasUpdates = false; //Allow post processors to validate their conditions (if any). This allows the job to fail before data is processed //Allow post processors to filter data source for updates foreach (var pp in PostProcessors) { pp.Validate(processors[0].Tables, Specification); if (pp.UpdateDataSource(processors[0].Tables, source)) { if (hasUpdates) { throw new InvalidOperationException("Only one post processor can update the data source"); } hasUpdates = true; } } EstimatedClientItemCount = source.Count; //Start the processors var processingThreads = processors.Select(p => { var t = new Thread(() => { try { p.Process(items.GetConsumingEnumerable()); } catch (Exception ex) { LastException = ex; SetStatus(JobStatus.Failed, ex.Message); } }); t.Start(); return t; }).ToArray(); if (Status == JobStatus.Failed) { throw LastException; } SetStatus(JobStatus.Running); source.ItemLoaded += (sender, args) => { if (StatusUpdateFrequency <= 0 || ItemsProcessed % StatusUpdateFrequency == 0) { OnProgress(); } ItemsProcessed = args; RowsCreated = processors.Sum(p => p.RowsCreated); }; //Add items to the collection that the processors consume foreach (var item in source) { if (Status != JobStatus.Running) { break; } if (processors.Any(p => p.BatchWriter.End)) { break; } items.Add(item); } items.CompleteAdding(); //Wait for processors to finish foreach (var p in processingThreads) { p.Join(); } RowsCreated = processors.Sum(p => p.RowsCreated); if (Status == JobStatus.Running) { //Now we know how many items we got for sure. Update progress to 100% EstimatedClientItemCount = ItemsProcessed; SetStatus(JobStatus.Merging); using (var csvWriter = Specification.CreateExporter(TempDirectory)) { var tables = MergedTableData.FromTableSets(processors.Select(p => p.Tables)).ToArray(); var w = csvWriter as CsvExporter; if (w == null || w.KeepOutput) { tables = csvWriter.Export(tables).ToArray(); } File.WriteAllText(Path.Combine(jobDirectory, "schema.json"), tables.Select(t => t.Schema).Serialize()); foreach (var postProcessor in PostProcessors) { CurrentPostProcessor = postProcessor; SetStatus(JobStatus.PostProcessing, postProcessor.Name); postProcessor.Process(jobDirectory, tables, Specification); } CurrentPostProcessor = null; foreach (var proc in processors) { SizeLimitExceeded = SizeLimitExceeded || proc.BatchWriter.End; proc.BatchWriter.Dispose(); } SetStatus(JobStatus.Completing); } SetStatus(JobStatus.Completed); } } catch (Exception ex) { Log.Error("Job failed", ex, this); LastException = ex; SetStatus(JobStatus.Failed, ex.ToString()); } try { OnJobEnded(); } catch (Exception ex) { Log.Error("Exception occured after job ended", ex, this); LastException = ex; } EndDate = DateTime.Now; try { if (Status == JobStatus.Canceled || Status == JobStatus.Failed) { Delete(); } } catch (Exception ex) { Log.Error("Exception occured while deleting job", ex, this); } }