Ejemplo n.º 1
0
        public void ProcessAllRecords()
        {
            string[] emailRecepients = null;

            try
            {
                if (HasRecords)
                {
                    //update job status to worker - in progress
                    TextExtractorLog.RaiseUpdate(String.Format("Updating record status to {0}. [WorkspaceArtifactId: {1}, ExtractorSetArtifactID: {2}]", Constant.ExtractorSetStatus.IN_PROGRESS_WORKER_PROCESSING, WorkspaceArtifactId, ExtractorSetArtifactId));
                    ArtifactQueries.UpdateRdoStringFieldValue(ServicesMgr, ExecutionIdentity.CurrentUser, WorkspaceArtifactId, Constant.Guids.ObjectType.ExtractorSet, Constant.Guids.Fields.ExtractorSet.Status, ExtractorSetArtifactId, Constant.ExtractorSetStatus.IN_PROGRESS_WORKER_PROCESSING);

                    var countForLogging = 1;
                    foreach (var workerQueueRecord in Records)
                    {
                        TextExtractorLog.RaiseUpdate(String.Format("Processing record - {0}. [WorkspaceArtifactId: {1}, ExtractorSetArtifactID: {2}, DocumentArtifactId: {3}]", countForLogging++, WorkspaceArtifactId, ExtractorSetArtifactId, workerQueueRecord.DocumentArtifactId));

                        //check for ExtractorSet cancellation
                        var extractorSet = ArtifactFactory.GetInstanceOfExtractorSet(ExecutionIdentity.CurrentUser, WorkspaceArtifactId, ExtractorSetArtifactId);
                        emailRecepients = extractorSet.EmailRecepients;

                        if (extractorSet.IsCancellationRequested())
                        {
                            TextExtractorLog.RaiseUpdate("Cancellation Requested.");

                            //delete all remaining records in current batch worker queue because the ExtractorSet is cancelled
                            TextExtractorLog.RaiseUpdate("Deleting all the remaining records in current worker queue batch.");
                            SqlQueryHelper.RemoveBatchFromQueue(EddsDbContext, BatchTableName);

                            return;
                        }

                        try
                        {
                            workerQueueRecord.Process();
                        }
                        catch (Exception ex)
                        {
                            var errorContext = String.Format("An error occured when processing the document. [WorkspaceArtifactId: {0}, ExtractorSetArtifactId: {1}, DocumentArtifactId: {2}]", workerQueueRecord.WorkspaceArtifactId, workerQueueRecord.ExtractorSetArtifactId, workerQueueRecord.DocumentArtifactId);

                            //log error message to ErrorLog table
                            ErrorLogModel.InsertRecord(errorContext, ex, workerQueueRecord.QueueId, workerQueueRecord.WorkspaceArtifactId);

                            //Update TextExtractorErrors field on the Document object incase of error
                            var    exceptionMessage = ExceptionMessageFormatter.GetInnerMostExceptionMessage(ex);
                            String extractorSetName = ArtifactQueries.GetExtractorSetNameForArtifactId(ServicesMgr, ExecutionIdentity, WorkspaceArtifactId, workerQueueRecord.ExtractorSetArtifactId);
                            var    fieldValue       = String.Format(Constant.ErrorMessages.DOCUMENT_ERROR_ENCOUNTERED, exceptionMessage, extractorSetName);
                            ArtifactQueries.AppendToDocumentLongTextFieldValue(ServicesMgr, ExecutionIdentity, workerQueueRecord.WorkspaceArtifactId, workerQueueRecord.DocumentArtifactId, Constant.Guids.Fields.Document.TextExtractorErrors, fieldValue);

                            //Update ExtractorSet Details field
                            ArtifactQueries.UpdateExtractorSetDetails(ServicesMgr, ExecutionIdentity, workerQueueRecord.WorkspaceArtifactId, workerQueueRecord.ExtractorSetArtifactId, Constant.ExtractorSetStatus.DetailMessages.COMPLETE_WITH_ERRORS_DETAILS);
                        }
                        finally
                        {
                            //delete current record from worker queue
                            SqlQueryHelper.RemoveRecordFromWorkerQueue(EddsDbContext, workerQueueRecord.QueueTableName, workerQueueRecord.QueueId);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                throw new CustomExceptions.TextExtractorException("An error occured when processing records in the worker queue.", ex);
            }
            finally
            {
                VerifyAndUpdateWorkerStatusToComplete();
                if (emailRecepients != null)
                {
                    VerifyAndSendEmailForExtractorSetComplete(emailRecepients);
                }
            }
        }
        // Virtual for testing purpose
        public virtual Boolean Process(ExtractorSetDocument extractorSetDocument, ExtractorSet extractorSet)
        {
            if (extractorSetDocument == null)
            {
                throw new ArgumentNullException("extractorSetDocument");
            }
            if (extractorSetDocument.TextSource == null)
            {
                throw new CustomExceptions.TextExtractorException(Constant.ErrorMessages.EXTRACTOR_SET_SOURCE_LONG_TEXT_FIELD_IS_EMPTY);
            }

            var documentUpdatedWithExtractedText = false;

            ExtractorSetHistory extractorSetHistory = null;

            var errorContext = String.Format("An error occured when extracting text for field. [WorkspaceArtifactId: {0}, DocumentArtifactId: {1}, TextExtractorFieldArtifactId: {2}]", WorkspaceArtifactId, extractorSetDocument.ArtifactId, ArtifactId);

            try
            {
                string historyStartMarkerName = null;
                string historyStopMarkerName  = null;
                string historyMarkerType      = null;

                switch (TargetRule.MarkerEnum)
                {
                case Constant.MarkerEnum.RegEx:
                    historyStartMarkerName = RegExStartMarker.Name;
                    historyStopMarkerName  = (RegExStopMarker == null) ? null : RegExStopMarker.Name;
                    historyMarkerType      = "Regular Expression";
                    break;

                case Constant.MarkerEnum.PlainText:
                    historyStartMarkerName = StartMarker;
                    historyStopMarkerName  = StopMarker;
                    historyMarkerType      = "Plain Text";
                    break;
                }

                extractorSetHistory = new ExtractorSetHistory(ServicesMgr, ExecutionIdentity, ArtifactQueries, extractorSet.ArtifactId, extractorSetDocument.ArtifactId, DestinationField.ArtifactID, WorkspaceArtifactId, TargetName, historyStartMarkerName, historyStopMarkerName, historyMarkerType);

                if (!String.IsNullOrEmpty(this.StopMarker))
                {
                    TextExtractionUtility.StopMarker = this.StopMarker;
                }

                var extractedText = TextExtractionUtility.ExtractText(extractorSetDocument.TextSource, StartMarker, StopMarker, TargetRule);

                if (TextExtractionUtility.IsMarkerFound == false)
                {
                    //create extractor set history record
                    extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_MARKER_NOT_FOUND);
                }
                else
                {
                    if (String.IsNullOrEmpty(extractedText))
                    {
                        //create extractor set history record
                        extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_TEXT_NOT_FOUND);
                    }
                    else
                    {
                        //update Document field with extracted text
                        ArtifactQueries.UpdateDocumentTextFieldValue(ServicesMgr, ExecutionIdentity.CurrentUser, WorkspaceArtifactId, extractorSetDocument.ArtifactId, DestinationField.ArtifactID, extractedText);

                        //check if text is truncated
                        if (TextExtractionUtility.IsTextTruncated)
                        {
                            //Update TextExtractorDetails field on the Document object if extracted text is truncated
                            var fieldValue = String.Format(Constant.TextExtractorDetailsMessages.TRUNCATED, ArtifactQueries.GetFieldNameForArtifactId(ServicesMgr, ExecutionIdentity, WorkspaceArtifactId, DestinationField.ArtifactID));

                            ArtifactQueries.AppendToDocumentLongTextFieldValue(ServicesMgr, ExecutionIdentity, WorkspaceArtifactId, extractorSetDocument.ArtifactId, Constant.Guids.Fields.Document.TextExtractorDetails, fieldValue);
                        }

                        //create extractor set history record
                        extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_TEXT_EXTRACTED);
                        documentUpdatedWithExtractedText = true;
                    }
                }
            }
            catch (Exception ex)
            {
                //create extractor set history record
                if (extractorSetHistory != null)
                {
                    extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.ERROR, ExceptionMessageFormatter.GetInnerMostExceptionMessage(ex));
                }
                else
                {
                    throw new Exception("An error occured when creating Extractor Set History record.");
                }

                //log error message to ErrorLog table
                ErrorLogModel.InsertRecord(errorContext, ex, ArtifactId, WorkspaceArtifactId);
            }

            return(documentUpdatedWithExtractedText);
        }