Esempio n. 1
0
        public void Run()
        {
            if (Status != JobStatus.Pending) throw new InvalidOperationException("Job is not pending");

            try
            {
                Directory.CreateDirectory(TempDirectory);

                File.WriteAllText(Path.Combine(TempDirectory, "specification.json"), Specification.ToString());

                SetStatus(JobStatus.Preparing);

                Specification.Initialize(this);

                var source = Specification.CreateDataSource();                

                PostProcessors = Specification.CreatePostProcessors().ToArray();

                var jobDirectory = TempDirectory;

                //The processors will consume data from this collection;
                var items = new BlockingCollection<object>(ExecutionSettings.DataSourceBufferSize);

                //Create the processors
                var processors = Enumerable.Range(0, ExecutionSettings.ProcessingThreads).Select(i =>
                {
                    IItemFieldLookup lookup = null;
                    try
                    {
                        lookup = new ItemDatabaseFieldLookup(Database.GetDatabase(ExecutionSettings.DatabaseName),
                            Specification.DefaultLanguage,
                            ExecutionSettings.FieldCacheSize);
                    }
                    catch (Exception ex)
                    {
                        Log.Error("Error initailizing item field lookup", ex, this);
                    }

                    var proc = new DataProcessor(Specification.CreateRootMapper())
                    {
                        BatchSize = ExecutionSettings.BatchSize,                        
                        FieldLookup = lookup
                    };

                    var exporter =  Specification.CreateExporter(jobDirectory) as CsvExporter; //Move PartititionPrefix so this cast isn't necessary
                    if (exporter == null)
                    {
                        exporter = new CsvExporter(jobDirectory);                        
                    }
                    exporter.PartitionPrefix = "~" + i + "_";
                    exporter.KeepOutput = true; //Don't delete the job's main directory
                    proc.BatchWriter = new TableDataBatchWriter(exporter)                    
                    {
                        SyncLock = this,
                        MaximumSize = ExecutionSettings.SizeLimit
                    };

                    proc.Initialize();

                    return proc;
                }).ToArray();



                var hasUpdates = false;                
                //Allow post processors to validate their conditions (if any). This allows the job to fail before data is processed
                //Allow post processors to filter data source for updates
                foreach (var pp in PostProcessors)
                {
                    pp.Validate(processors[0].Tables, Specification);
                    if (pp.UpdateDataSource(processors[0].Tables, source))
                    {
                        if (hasUpdates)
                        {
                            throw new InvalidOperationException("Only one post processor can update the data source");
                        }
                        hasUpdates = true;
                    }
                }

                EstimatedClientItemCount = source.Count;
                
                //Start the processors
                var processingThreads = processors.Select(p =>
                {
                    var t = new Thread(() =>
                    {
                        try
                        {
                            p.Process(items.GetConsumingEnumerable());
                        }
                        catch (Exception ex)
                        {
                            LastException = ex;
                            SetStatus(JobStatus.Failed, ex.Message);
                        }
                    });
                    t.Start();
                    return t;
                }).ToArray();


                if (Status == JobStatus.Failed)
                {
                    throw LastException;
                }
                
                SetStatus(JobStatus.Running);


                source.ItemLoaded += (sender, args) =>
                {
                    if (StatusUpdateFrequency <= 0 || ItemsProcessed % StatusUpdateFrequency == 0)
                    {
                        OnProgress();
                    }
                    ItemsProcessed = args;
                    RowsCreated = processors.Sum(p => p.RowsCreated);
                };                

                //Add items to the collection that the processors consume
                foreach (var item in source)
                {
                    if (Status != JobStatus.Running)
                    {
                        break;
                    }
                    
                    if (processors.Any(p => p.BatchWriter.End))
                    {
                        break;
                    }
                    items.Add(item);
                }
                items.CompleteAdding();

                //Wait for processors to finish
                foreach (var p in processingThreads)
                {
                    p.Join();
                }

                RowsCreated = processors.Sum(p => p.RowsCreated);

                if (Status == JobStatus.Running)
                {
                    //Now we know how many items we got for sure. Update progress to 100%
                    EstimatedClientItemCount = ItemsProcessed;

                    SetStatus(JobStatus.Merging);

                    using (var csvWriter = Specification.CreateExporter(TempDirectory))
                    {

                        var tables = MergedTableData.FromTableSets(processors.Select(p => p.Tables)).ToArray();

                        
                        var w = csvWriter as CsvExporter;
                        if (w == null || w.KeepOutput)
                        {                            
                            tables = csvWriter.Export(tables).ToArray();
                        }


                        File.WriteAllText(Path.Combine(jobDirectory, "schema.json"),
                            tables.Select(t => t.Schema).Serialize());


                        
                        foreach (var postProcessor in PostProcessors)
                        {
                            CurrentPostProcessor = postProcessor;
                            SetStatus(JobStatus.PostProcessing, postProcessor.Name);

                            postProcessor.Process(jobDirectory, tables, Specification);
                        }
                        CurrentPostProcessor = null;

                        foreach (var proc in processors)
                        {                            
                            SizeLimitExceeded = SizeLimitExceeded || proc.BatchWriter.End;
                            proc.BatchWriter.Dispose();
                        }

                        SetStatus(JobStatus.Completing);
                    }
                    SetStatus(JobStatus.Completed);
                }

            }
            catch (Exception ex)
            {
                Log.Error("Job failed", ex, this);
                LastException = ex;
                SetStatus(JobStatus.Failed, ex.ToString());
            }
            
            try
            {
                OnJobEnded();
            }
            catch (Exception ex)
            {
                Log.Error("Exception occured after job ended", ex, this);
                LastException = ex;
            }
            EndDate = DateTime.Now;

            try
            {
                if (Status == JobStatus.Canceled || Status == JobStatus.Failed)
                {
                    Delete();
                }
            }
            catch (Exception ex)
            {
                Log.Error("Exception occured while deleting job", ex, this);
            }
        }