private void UpdateField(string prefix, string fieldToUpdate, EdgarTaskState state, int value) { string field = prefix + fieldToUpdate; int currentValue = (int)state.Dataset.GetType().GetProperty(field).GetValue(state.Dataset); state.Dataset.GetType().GetProperty(field).SetValue(state.Dataset, value); state.DatasetSharedRepo.UpdateEdgarDataset(state.Dataset, field); }
private EdgarTaskState[] LoadCalTxtNum(EdgarDataset ds, IEdgarDatasetsRepository repo, ConcurrentDictionary <string, int> subs, ConcurrentDictionary <string, int> tags, ConcurrentDictionary <string, int> dims) { List <EdgarTaskState> states = new List <EdgarTaskState>(); List <Task> tasks = new List <Task>(); //process calc file EdgarTaskState stateCalc = new EdgarTaskState(EdgarDatasetCalculation.FILE_NAME, ds, repo); states.Add(stateCalc); calcParser.Submissions = subs; calcParser.Tags = tags; log.Info("Datasetid " + ds.Id.ToString() + " -- starting calcService.Process(...)"); tasks.Add(Task.Factory.StartNew(() => calcParser.Process(stateCalc, false, EdgarDatasetCalculation.FILE_NAME, "Calculations")) //false --> to avoid to have too many threads ); //process text file EdgarTaskState stateText = new EdgarTaskState(EdgarDatasetText.FILE_NAME, ds, repo); states.Add(stateText); textParser.Dimensions = dims; textParser.Submissions = subs; textParser.Tags = tags; log.Info("Datasetid " + ds.Id.ToString() + " -- starting textService.Process(...)"); tasks.Add(Task.Factory.StartNew(() => { textParser.Process(stateText, true, EdgarDatasetText.FILE_NAME, "Texts"); for (int i = 0; i < MAX_TRIALS; i++) { if (!string.IsNullOrEmpty(stateText.FileNameToReprocess)) { string filee = stateText.FileNameToReprocess.Split('\\').Last(); textParser.Process(stateText, true, stateText.FileNameToReprocess, "Texts"); } } })); //Process num file EdgarTaskState stateNum = new EdgarTaskState(EdgarDatasetNumber.FILE_NAME, ds, repo); states.Add(stateNum); numParser.Dimensions = dims; numParser.Submissions = subs; numParser.Tags = tags; log.Info("Datasetid " + ds.Id.ToString() + " -- starting numService.Process(...)"); tasks.Add(Task.Factory.StartNew(() => { bool parallel = ConfigurationManager.AppSettings["run_num_in_parallel"] == "true"; numParser.Process(stateNum, parallel, EdgarDatasetNumber.FILE_NAME, "Numbers"); })); Task.WaitAll(tasks.ToArray()); return(states.ToArray()); }
public virtual void Process(EdgarTaskState state, bool processInParallel, string fileToProcess, string fieldToUpdate) { try { Stopwatch watch = System.Diagnostics.Stopwatch.StartNew(); Log.Info("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- BEGIN PROCESS"); int savedInDb; if (!IsAlreadyProcessed(state, fieldToUpdate, out savedInDb)) { string cacheFolder = ConfigurationManager.AppSettings["cache_folder"]; string tsvFileName = state.Dataset.RelativePath.Replace("/", "\\").Replace(".zip", "") + "\\" + fileToProcess; string filepath = cacheFolder + tsvFileName; string[] allLines = File.ReadAllLines(filepath); string header = allLines[0]; UpdateTotalField(state, fieldToUpdate, allLines.Length - 1); ConcurrentBag <int> missing; if (savedInDb == 0) { missing = null; } else { missing = GetMissingLines(state.Dataset.Id, allLines.Length - 1); } ProcessFile(missing, fileToProcess, fieldToUpdate, state, allLines, header, cacheFolder, tsvFileName, processInParallel); Log.Info("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- Process finished, updating dataset status"); savedInDb = state.DatasetSharedRepo.GetCount <T>(state.Dataset.Id); UpdateProcessedField(state, fieldToUpdate, savedInDb); } else { state.FileNameToReprocess = null; Log.Info("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- The complete file is already processed"); } watch.Stop(); TimeSpan ts = watch.Elapsed; string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10); Log.Info("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- END PROCESS -- time: " + elapsedTime); state.ResultOk = true; } catch (Exception ex) { state.ResultOk = false; state.Exception = new EdgarDatasetException(fileToProcess, ex); Log.Fatal("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- Error: " + ex.Message, ex); } }
private bool IsAlreadyProcessed(EdgarTaskState state, string fieldToUpdate, out int savedInDb) { using (IEdgarDatasetsRepository repo = new EdgarRepository()) { savedInDb = repo.GetCount <T>(state.Dataset.Id); int processed = (int)state.Dataset.GetType().GetProperty("Processed" + fieldToUpdate).GetValue(state.Dataset); if (savedInDb != processed) { UpdateProcessedField(state, fieldToUpdate, savedInDb); } int total = (int)state.Dataset.GetType().GetProperty("Total" + fieldToUpdate).GetValue(state.Dataset); return(savedInDb == processed && processed == total && total != 0); } }
private EdgarTaskState[] LoadSubTagDim(EdgarDataset ds, IEdgarDatasetsRepository repo) { List <EdgarTaskState> states = new List <EdgarTaskState>(); EdgarTaskState stateSubs, stateTag, stateDim; stateSubs = new EdgarTaskState(EdgarDatasetSubmission.FILE_NAME, ds, repo); stateTag = new EdgarTaskState(EdgarDatasetTag.FILE_NAME, ds, repo); stateDim = new EdgarTaskState(EdgarDatasetDimension.FILE_NAME, ds, repo); states.Add(stateSubs); states.Add(stateTag); states.Add(stateDim); IList <Task> tasks = new List <Task>(); log.Info("Datasetid " + ds.Id.ToString() + " -- starting submissionService.Process(...)"); tasks.Add(Task.Factory.StartNew(() => submissionParser.Process(stateSubs, false, EdgarDatasetSubmission.FILE_NAME, "Submissions")//false --> to avoid to have too many threads )); log.Info("Datasetid " + ds.Id.ToString() + " -- starting tagService.Process(...)"); tasks.Add(Task.Factory.StartNew(() => { if (ConfigurationManager.AppSettings["run_tag_in_parallel"] == "true") { tagParser.Process(stateTag, true, EdgarDatasetTag.FILE_NAME, "Tags"); } else { tagParser.Process(stateTag, false, EdgarDatasetTag.FILE_NAME, "Tags"); } })); log.Info("Datasetid " + ds.Id.ToString() + " -- starting dimensionService.Process(...)"); tasks.Add(Task.Factory.StartNew(() => { dimensionParser.Process(stateDim, false, EdgarDatasetDimension.FILE_NAME, "Dimensions"); })); Task.WaitAll(tasks.ToArray()); return(states.ToArray()); }
private EdgarTaskState[] LoadRenPre(EdgarDataset ds, IEdgarDatasetsRepository repo, ConcurrentDictionary <string, int> subs, ConcurrentDictionary <string, int> tags, ConcurrentDictionary <string, int> nums, ConcurrentDictionary <string, int> texts) { List <EdgarTaskState> states = new List <EdgarTaskState>(); List <Task> tasks = new List <Task>(); EdgarTaskState stateRen = new EdgarTaskState(EdgarDatasetRender.FILE_NAME, ds, repo); states.Add(stateRen); EdgarTaskState statePre = new EdgarTaskState(EdgarDatasetPresentation.FILE_NAME, ds, repo); states.Add(statePre); tasks.Add(Task.Factory.StartNew(() => { renderingParser.Subs = subs; log.Info("Datasetid " + ds.Id.ToString() + " -- starting renderingService.Process(...)"); //Presentations has a relationship to renders renderingParser.Process(stateRen, true, EdgarDatasetRender.FILE_NAME, "Renders"); presentationParser.Subs = subs; presentationParser.Tags = tags; log.Info("Datasetid " + ds.Id.ToString() + " -- loading all rens for presentationService.Process(...)"); presentationParser.Renders = renderingParser.GetAsConcurrent(ds.Id); presentationParser.Nums = nums; presentationParser.Texts = texts; log.Info("Datasetid " + ds.Id.ToString() + " -- starting presentationService.Process(...)"); if (ConfigurationManager.AppSettings["run_pre_in_parallel"] == "true") { presentationParser.Process(statePre, true, EdgarDatasetPresentation.FILE_NAME, "Presentations"); //parallel execution } else { presentationParser.Process(statePre, false, EdgarDatasetPresentation.FILE_NAME, "Presentations");//sequential execution } } )); Task.WaitAll(tasks.ToArray()); return(states.ToArray()); }
public abstract void ProcessFile(ConcurrentBag <int> missing, string fileToProcess, string fieldToUpdate, EdgarTaskState state, string[] allLines, string header, string cacheFolder, string tsvFileName, bool processInParallel);
private void UpdateTotalField(EdgarTaskState state, string fieldToUpdate, int value) { UpdateField("Total", fieldToUpdate, state, value); }
private void UpdateProcessedField(EdgarTaskState state, string fieldToUpdate, int value) { UpdateField("Processed", fieldToUpdate, state, value); }