Ejemplo n.º 1
0
        private void UpdateField(string prefix, string fieldToUpdate, EdgarTaskState state, int value)
        {
            string field        = prefix + fieldToUpdate;
            int    currentValue = (int)state.Dataset.GetType().GetProperty(field).GetValue(state.Dataset);

            state.Dataset.GetType().GetProperty(field).SetValue(state.Dataset, value);
            state.DatasetSharedRepo.UpdateEdgarDataset(state.Dataset, field);
        }
        private EdgarTaskState[] LoadCalTxtNum(EdgarDataset ds, IEdgarDatasetsRepository repo, ConcurrentDictionary <string, int> subs, ConcurrentDictionary <string, int> tags, ConcurrentDictionary <string, int> dims)
        {
            List <EdgarTaskState> states = new List <EdgarTaskState>();
            List <Task>           tasks  = new List <Task>();

            //process calc file
            EdgarTaskState stateCalc = new EdgarTaskState(EdgarDatasetCalculation.FILE_NAME, ds, repo);

            states.Add(stateCalc);
            calcParser.Submissions = subs;
            calcParser.Tags        = tags;
            log.Info("Datasetid " + ds.Id.ToString() + " -- starting  calcService.Process(...)");
            tasks.Add(Task.Factory.StartNew(() =>
                                            calcParser.Process(stateCalc, false, EdgarDatasetCalculation.FILE_NAME, "Calculations")) //false --> to avoid to have too many threads
                      );

            //process text file
            EdgarTaskState stateText = new EdgarTaskState(EdgarDatasetText.FILE_NAME, ds, repo);

            states.Add(stateText);
            textParser.Dimensions  = dims;
            textParser.Submissions = subs;
            textParser.Tags        = tags;
            log.Info("Datasetid " + ds.Id.ToString() + " -- starting  textService.Process(...)");
            tasks.Add(Task.Factory.StartNew(() =>
            {
                textParser.Process(stateText, true, EdgarDatasetText.FILE_NAME, "Texts");
                for (int i = 0; i < MAX_TRIALS; i++)
                {
                    if (!string.IsNullOrEmpty(stateText.FileNameToReprocess))
                    {
                        string filee = stateText.FileNameToReprocess.Split('\\').Last();
                        textParser.Process(stateText, true, stateText.FileNameToReprocess, "Texts");
                    }
                }
            }));

            //Process num file
            EdgarTaskState stateNum = new EdgarTaskState(EdgarDatasetNumber.FILE_NAME, ds, repo);

            states.Add(stateNum);
            numParser.Dimensions  = dims;
            numParser.Submissions = subs;
            numParser.Tags        = tags;
            log.Info("Datasetid " + ds.Id.ToString() + " -- starting  numService.Process(...)");
            tasks.Add(Task.Factory.StartNew(() =>
            {
                bool parallel = ConfigurationManager.AppSettings["run_num_in_parallel"] == "true";
                numParser.Process(stateNum, parallel, EdgarDatasetNumber.FILE_NAME, "Numbers");
            }));

            Task.WaitAll(tasks.ToArray());
            return(states.ToArray());
        }
Ejemplo n.º 3
0
        public virtual void Process(EdgarTaskState state, bool processInParallel, string fileToProcess, string fieldToUpdate)
        {
            try
            {
                Stopwatch watch = System.Diagnostics.Stopwatch.StartNew();
                Log.Info("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- BEGIN PROCESS");
                int savedInDb;
                if (!IsAlreadyProcessed(state, fieldToUpdate, out savedInDb))
                {
                    string   cacheFolder = ConfigurationManager.AppSettings["cache_folder"];
                    string   tsvFileName = state.Dataset.RelativePath.Replace("/", "\\").Replace(".zip", "") + "\\" + fileToProcess;
                    string   filepath    = cacheFolder + tsvFileName;
                    string[] allLines    = File.ReadAllLines(filepath);
                    string   header      = allLines[0];

                    UpdateTotalField(state, fieldToUpdate, allLines.Length - 1);

                    ConcurrentBag <int> missing;
                    if (savedInDb == 0)
                    {
                        missing = null;
                    }
                    else
                    {
                        missing = GetMissingLines(state.Dataset.Id, allLines.Length - 1);
                    }

                    ProcessFile(missing, fileToProcess, fieldToUpdate, state, allLines, header, cacheFolder, tsvFileName, processInParallel);

                    Log.Info("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- Process finished, updating dataset status");
                    savedInDb = state.DatasetSharedRepo.GetCount <T>(state.Dataset.Id);
                    UpdateProcessedField(state, fieldToUpdate, savedInDb);
                }
                else
                {
                    state.FileNameToReprocess = null;
                    Log.Info("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- The complete file is already processed");
                }

                watch.Stop();
                TimeSpan ts          = watch.Elapsed;
                string   elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
                Log.Info("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- END PROCESS -- time: " + elapsedTime);
                state.ResultOk = true;
            }
            catch (Exception ex)
            {
                state.ResultOk  = false;
                state.Exception = new EdgarDatasetException(fileToProcess, ex);
                Log.Fatal("Datasetid " + state.Dataset.Id.ToString() + " -- " + fileToProcess + " -- Error: " + ex.Message, ex);
            }
        }
Ejemplo n.º 4
0
 private bool IsAlreadyProcessed(EdgarTaskState state, string fieldToUpdate, out int savedInDb)
 {
     using (IEdgarDatasetsRepository repo = new EdgarRepository())
     {
         savedInDb = repo.GetCount <T>(state.Dataset.Id);
         int processed = (int)state.Dataset.GetType().GetProperty("Processed" + fieldToUpdate).GetValue(state.Dataset);
         if (savedInDb != processed)
         {
             UpdateProcessedField(state, fieldToUpdate, savedInDb);
         }
         int total = (int)state.Dataset.GetType().GetProperty("Total" + fieldToUpdate).GetValue(state.Dataset);
         return(savedInDb == processed && processed == total && total != 0);
     }
 }
        private EdgarTaskState[] LoadSubTagDim(EdgarDataset ds, IEdgarDatasetsRepository repo)
        {
            List <EdgarTaskState> states = new List <EdgarTaskState>();
            EdgarTaskState        stateSubs, stateTag, stateDim;

            stateSubs = new EdgarTaskState(EdgarDatasetSubmission.FILE_NAME, ds, repo);
            stateTag  = new EdgarTaskState(EdgarDatasetTag.FILE_NAME, ds, repo);
            stateDim  = new EdgarTaskState(EdgarDatasetDimension.FILE_NAME, ds, repo);
            states.Add(stateSubs);
            states.Add(stateTag);
            states.Add(stateDim);
            IList <Task> tasks = new List <Task>();

            log.Info("Datasetid " + ds.Id.ToString() + " -- starting  submissionService.Process(...)");
            tasks.Add(Task.Factory.StartNew(() =>
                                            submissionParser.Process(stateSubs, false, EdgarDatasetSubmission.FILE_NAME, "Submissions")//false --> to avoid to have too many threads
                                            ));

            log.Info("Datasetid " + ds.Id.ToString() + " -- starting  tagService.Process(...)");
            tasks.Add(Task.Factory.StartNew(() =>
            {
                if (ConfigurationManager.AppSettings["run_tag_in_parallel"] == "true")
                {
                    tagParser.Process(stateTag, true, EdgarDatasetTag.FILE_NAME, "Tags");
                }
                else
                {
                    tagParser.Process(stateTag, false, EdgarDatasetTag.FILE_NAME, "Tags");
                }
            }));

            log.Info("Datasetid " + ds.Id.ToString() + " -- starting  dimensionService.Process(...)");
            tasks.Add(Task.Factory.StartNew(() =>
            {
                dimensionParser.Process(stateDim, false, EdgarDatasetDimension.FILE_NAME, "Dimensions");
            }));

            Task.WaitAll(tasks.ToArray());
            return(states.ToArray());
        }
        private EdgarTaskState[] LoadRenPre(EdgarDataset ds, IEdgarDatasetsRepository repo, ConcurrentDictionary <string, int> subs, ConcurrentDictionary <string, int> tags, ConcurrentDictionary <string, int> nums, ConcurrentDictionary <string, int> texts)
        {
            List <EdgarTaskState> states = new List <EdgarTaskState>();
            List <Task>           tasks  = new List <Task>();

            EdgarTaskState stateRen = new EdgarTaskState(EdgarDatasetRender.FILE_NAME, ds, repo);

            states.Add(stateRen);

            EdgarTaskState statePre = new EdgarTaskState(EdgarDatasetPresentation.FILE_NAME, ds, repo);

            states.Add(statePre);

            tasks.Add(Task.Factory.StartNew(() =>
            {
                renderingParser.Subs = subs;
                log.Info("Datasetid " + ds.Id.ToString() + " -- starting  renderingService.Process(...)");
                //Presentations has a relationship to renders
                renderingParser.Process(stateRen, true, EdgarDatasetRender.FILE_NAME, "Renders");
                presentationParser.Subs = subs;
                presentationParser.Tags = tags;
                log.Info("Datasetid " + ds.Id.ToString() + " -- loading all rens for presentationService.Process(...)");
                presentationParser.Renders = renderingParser.GetAsConcurrent(ds.Id);
                presentationParser.Nums    = nums;
                presentationParser.Texts   = texts;
                log.Info("Datasetid " + ds.Id.ToString() + " -- starting  presentationService.Process(...)");
                if (ConfigurationManager.AppSettings["run_pre_in_parallel"] == "true")
                {
                    presentationParser.Process(statePre, true, EdgarDatasetPresentation.FILE_NAME, "Presentations"); //parallel execution
                }
                else
                {
                    presentationParser.Process(statePre, false, EdgarDatasetPresentation.FILE_NAME, "Presentations");//sequential execution
                }
            }
                                            ));
            Task.WaitAll(tasks.ToArray());
            return(states.ToArray());
        }
Ejemplo n.º 7
0
 public abstract void ProcessFile(ConcurrentBag <int> missing, string fileToProcess, string fieldToUpdate, EdgarTaskState state, string[] allLines, string header, string cacheFolder, string tsvFileName, bool processInParallel);
Ejemplo n.º 8
0
 private void UpdateTotalField(EdgarTaskState state, string fieldToUpdate, int value)
 {
     UpdateField("Total", fieldToUpdate, state, value);
 }
Ejemplo n.º 9
0
 private void UpdateProcessedField(EdgarTaskState state, string fieldToUpdate, int value)
 {
     UpdateField("Processed", fieldToUpdate, state, value);
 }