public WorkerQueueRecord(ISqlQueryHelper sqlSqlQueryHelper, IArtifactQueries artifactArtifactQueries, IDBContext eddsDbContext, IServicesMgr servicesMgr, ArtifactFactory artifactArtifactFactory, ExecutionIdentity executionExecutionIdentity, Int32 queueId, Int32 workspaceArtifactId, Int32 queueStatus, Int32 agentId, Int32 extractorSetArtifactId, Int32 documentArtifactId, Int32 extractorProfileArtifactId, Int32 sourceLongTextFieldArtifactId, ExtractorSetReporting textExtractorJobReporting, ExtractorSet extractorSet, ExtractorProfile extractorProfile)
        {
            SqlQueryHelper  = sqlSqlQueryHelper;
            ArtifactQueries = artifactArtifactQueries;

            QueueId                       = queueId;
            WorkspaceArtifactId           = workspaceArtifactId;
            QueueStatus                   = queueStatus;
            AgentId                       = agentId;
            ExtractorSetArtifactId        = extractorSetArtifactId;
            DocumentArtifactId            = documentArtifactId;
            ExtractorProfileArtifactId    = extractorProfileArtifactId;
            SourceLongTextFieldArtifactId = sourceLongTextFieldArtifactId;
            TextExtractorJobReporting     = textExtractorJobReporting;
            ArtifactFactory               = artifactArtifactFactory;

            QueueTableName    = Constant.Tables.WorkerQueue;
            EddsDbContext     = eddsDbContext;
            ServicesMgr       = servicesMgr;
            ExecutionIdentity = executionExecutionIdentity;

            ExtractorSet     = extractorSet;
            ExtractorProfile = extractorProfile;
        }
Exemplo n.º 2
0
        public void ProcessAllTargetTexts(ExtractorSetDocument textExtractorDocument, ExtractorSet extractorSet)
        {
            if (ExtractorTargetTexts.Count > 0)
            {
                foreach (var currentExtractorTargetText in ExtractorTargetTexts)
                {
                    var extractorTargetText = (ExtractorTargetText)currentExtractorTargetText;

                    //check for ExtractorSet cancellation
                    if (extractorSet.IsCancellationRequested())
                    {
                        return;
                    }

                    try
                    {
                        var valueUpdated = extractorTargetText.Process(textExtractorDocument, extractorSet);

                        IncrementValuesUpdated(valueUpdated);
                    }
                    catch (Exception ex)
                    {
                        var errorContext = string.Format("An error occured when processing field for ExtractorTargetText [WorkspaceArtifactId: {0}, DocumentArtifactId: {1}, ExtractorTargetText_ArtifactId: {2}, ExtractorTargetText_TargetName: {3}]", WorkspaceArtifactId, textExtractorDocument.ArtifactId, extractorTargetText.ArtifactId, extractorTargetText.TargetName);

                        //log error message to ErrorLog table
                        ErrorLogModel.InsertRecord(errorContext, ex, ArtifactId, WorkspaceArtifactId);
                    }
                }
            }
        }
        // Virtual for testing purpose
        public virtual Boolean Process(ExtractorSetDocument extractorSetDocument, ExtractorSet extractorSet)
        {
            if (extractorSetDocument == null)
            {
                throw new ArgumentNullException("extractorSetDocument");
            }
            if (extractorSetDocument.TextSource == null)
            {
                throw new CustomExceptions.TextExtractorException(Constant.ErrorMessages.EXTRACTOR_SET_SOURCE_LONG_TEXT_FIELD_IS_EMPTY);
            }

            var documentUpdatedWithExtractedText = false;

            ExtractorSetHistory extractorSetHistory = null;

            var errorContext = String.Format("An error occured when extracting text for field. [WorkspaceArtifactId: {0}, DocumentArtifactId: {1}, TextExtractorFieldArtifactId: {2}]", WorkspaceArtifactId, extractorSetDocument.ArtifactId, ArtifactId);

            try
            {
                string historyStartMarkerName = null;
                string historyStopMarkerName  = null;
                string historyMarkerType      = null;

                switch (TargetRule.MarkerEnum)
                {
                case Constant.MarkerEnum.RegEx:
                    historyStartMarkerName = RegExStartMarker.Name;
                    historyStopMarkerName  = (RegExStopMarker == null) ? null : RegExStopMarker.Name;
                    historyMarkerType      = "Regular Expression";
                    break;

                case Constant.MarkerEnum.PlainText:
                    historyStartMarkerName = StartMarker;
                    historyStopMarkerName  = StopMarker;
                    historyMarkerType      = "Plain Text";
                    break;
                }

                extractorSetHistory = new ExtractorSetHistory(ServicesMgr, ExecutionIdentity, ArtifactQueries, extractorSet.ArtifactId, extractorSetDocument.ArtifactId, DestinationField.ArtifactID, WorkspaceArtifactId, TargetName, historyStartMarkerName, historyStopMarkerName, historyMarkerType);

                if (!String.IsNullOrEmpty(this.StopMarker))
                {
                    TextExtractionUtility.StopMarker = this.StopMarker;
                }

                var extractedText = TextExtractionUtility.ExtractText(extractorSetDocument.TextSource, StartMarker, StopMarker, TargetRule);

                if (TextExtractionUtility.IsMarkerFound == false)
                {
                    //create extractor set history record
                    extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_MARKER_NOT_FOUND);
                }
                else
                {
                    if (String.IsNullOrEmpty(extractedText))
                    {
                        //create extractor set history record
                        extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_TEXT_NOT_FOUND);
                    }
                    else
                    {
                        //update Document field with extracted text
                        ArtifactQueries.UpdateDocumentTextFieldValue(ServicesMgr, ExecutionIdentity.CurrentUser, WorkspaceArtifactId, extractorSetDocument.ArtifactId, DestinationField.ArtifactID, extractedText);

                        //check if text is truncated
                        if (TextExtractionUtility.IsTextTruncated)
                        {
                            //Update TextExtractorDetails field on the Document object if extracted text is truncated
                            var fieldValue = String.Format(Constant.TextExtractorDetailsMessages.TRUNCATED, ArtifactQueries.GetFieldNameForArtifactId(ServicesMgr, ExecutionIdentity, WorkspaceArtifactId, DestinationField.ArtifactID));

                            ArtifactQueries.AppendToDocumentLongTextFieldValue(ServicesMgr, ExecutionIdentity, WorkspaceArtifactId, extractorSetDocument.ArtifactId, Constant.Guids.Fields.Document.TextExtractorDetails, fieldValue);
                        }

                        //create extractor set history record
                        extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_TEXT_EXTRACTED);
                        documentUpdatedWithExtractedText = true;
                    }
                }
            }
            catch (Exception ex)
            {
                //create extractor set history record
                if (extractorSetHistory != null)
                {
                    extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.ERROR, ExceptionMessageFormatter.GetInnerMostExceptionMessage(ex));
                }
                else
                {
                    throw new Exception("An error occured when creating Extractor Set History record.");
                }

                //log error message to ErrorLog table
                ErrorLogModel.InsertRecord(errorContext, ex, ArtifactId, WorkspaceArtifactId);
            }

            return(documentUpdatedWithExtractedText);
        }