예제 #1
0
        public void ProcessAllTargetTexts(ExtractorSetDocument textExtractorDocument, ExtractorSet extractorSet)
        {
            if (ExtractorTargetTexts.Count > 0)
            {
                foreach (var currentExtractorTargetText in ExtractorTargetTexts)
                {
                    var extractorTargetText = (ExtractorTargetText)currentExtractorTargetText;

                    //check for ExtractorSet cancellation
                    if (extractorSet.IsCancellationRequested())
                    {
                        return;
                    }

                    try
                    {
                        var valueUpdated = extractorTargetText.Process(textExtractorDocument, extractorSet);

                        IncrementValuesUpdated(valueUpdated);
                    }
                    catch (Exception ex)
                    {
                        var errorContext = string.Format("An error occured when processing field for ExtractorTargetText [WorkspaceArtifactId: {0}, DocumentArtifactId: {1}, ExtractorTargetText_ArtifactId: {2}, ExtractorTargetText_TargetName: {3}]", WorkspaceArtifactId, textExtractorDocument.ArtifactId, extractorTargetText.ArtifactId, extractorTargetText.TargetName);

                        //log error message to ErrorLog table
                        ErrorLogModel.InsertRecord(errorContext, ex, ArtifactId, WorkspaceArtifactId);
                    }
                }
            }
        }
        public void Process()
        {
            var errorContext = "An error occured when extracting text for field.";

            try
            {
                errorContext += String.Format(" [WorkspaceArtifactId: {0}, DocumentArtifactId: {1}]", WorkspaceArtifactId, DocumentArtifactId);

                //retreive text source on Document object
                String textSource = ArtifactQueries.GetDocumentTextFieldValue(ServicesMgr, ExecutionIdentity, WorkspaceArtifactId, DocumentArtifactId, SourceLongTextFieldArtifactId);

                if (textSource == null)
                {
                    //update TextExtractorDetails field on Document object
                    String extractorSetName = ArtifactQueries.GetExtractorSetNameForArtifactId(ServicesMgr, ExecutionIdentity.CurrentUser, WorkspaceArtifactId, ExtractorSet.ArtifactId);
                    var    fieldValue       = string.Format(Constant.ErrorMessages.DOCUMENT_ERROR_ENCOUNTERED, Constant.ErrorMessages.EXTRACTOR_SET_SOURCE_LONG_TEXT_FIELD_IS_EMPTY, extractorSetName);
                    ArtifactQueries.AppendToDocumentLongTextFieldValue(ServicesMgr, ExecutionIdentity.CurrentUser, WorkspaceArtifactId, DocumentArtifactId, Constant.Guids.Fields.Document.TextExtractorErrors, fieldValue);

                    //Update ExtractorSet Details field
                    ArtifactQueries.UpdateExtractorSetDetails(ServicesMgr, ExecutionIdentity.CurrentUser, WorkspaceArtifactId, ExtractorSet.ArtifactId, Constant.ExtractorSetStatus.DetailMessages.COMPLETE_WITH_ERRORS_DETAILS);
                }
                else
                {
                    var textExtractorDocument = new ExtractorSetDocument(DocumentArtifactId, textSource).GetInstance();

                    //extract text and update fields.
                    ExtractorProfile.ProcessAllTargetTexts(textExtractorDocument, ExtractorSet);

                    //update the value for reporting
                    ExtractorSet.ExtractorSetReporting.SetNumberOfUpdatesWithValues(WorkspaceArtifactId, ExtractorSetArtifactId, ExtractorProfile.NumberOfTargetTextsWithValues);
                }

                // Reset the count because it's being done on one instance of ExtractorProfile
                ExtractorProfile.ResetNumberOfTargetTextsWithValues();
            }
            catch (Exception ex)
            {
                throw new CustomExceptions.TextExtractorException(errorContext, ex);
            }
        }
        // Virtual for testing purpose
        public virtual Boolean Process(ExtractorSetDocument extractorSetDocument, ExtractorSet extractorSet)
        {
            if (extractorSetDocument == null)
            {
                throw new ArgumentNullException("extractorSetDocument");
            }
            if (extractorSetDocument.TextSource == null)
            {
                throw new CustomExceptions.TextExtractorException(Constant.ErrorMessages.EXTRACTOR_SET_SOURCE_LONG_TEXT_FIELD_IS_EMPTY);
            }

            var documentUpdatedWithExtractedText = false;

            ExtractorSetHistory extractorSetHistory = null;

            var errorContext = String.Format("An error occured when extracting text for field. [WorkspaceArtifactId: {0}, DocumentArtifactId: {1}, TextExtractorFieldArtifactId: {2}]", WorkspaceArtifactId, extractorSetDocument.ArtifactId, ArtifactId);

            try
            {
                string historyStartMarkerName = null;
                string historyStopMarkerName  = null;
                string historyMarkerType      = null;

                switch (TargetRule.MarkerEnum)
                {
                case Constant.MarkerEnum.RegEx:
                    historyStartMarkerName = RegExStartMarker.Name;
                    historyStopMarkerName  = (RegExStopMarker == null) ? null : RegExStopMarker.Name;
                    historyMarkerType      = "Regular Expression";
                    break;

                case Constant.MarkerEnum.PlainText:
                    historyStartMarkerName = StartMarker;
                    historyStopMarkerName  = StopMarker;
                    historyMarkerType      = "Plain Text";
                    break;
                }

                extractorSetHistory = new ExtractorSetHistory(ServicesMgr, ExecutionIdentity, ArtifactQueries, extractorSet.ArtifactId, extractorSetDocument.ArtifactId, DestinationField.ArtifactID, WorkspaceArtifactId, TargetName, historyStartMarkerName, historyStopMarkerName, historyMarkerType);

                if (!String.IsNullOrEmpty(this.StopMarker))
                {
                    TextExtractionUtility.StopMarker = this.StopMarker;
                }

                var extractedText = TextExtractionUtility.ExtractText(extractorSetDocument.TextSource, StartMarker, StopMarker, TargetRule);

                if (TextExtractionUtility.IsMarkerFound == false)
                {
                    //create extractor set history record
                    extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_MARKER_NOT_FOUND);
                }
                else
                {
                    if (String.IsNullOrEmpty(extractedText))
                    {
                        //create extractor set history record
                        extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_TEXT_NOT_FOUND);
                    }
                    else
                    {
                        //update Document field with extracted text
                        ArtifactQueries.UpdateDocumentTextFieldValue(ServicesMgr, ExecutionIdentity.CurrentUser, WorkspaceArtifactId, extractorSetDocument.ArtifactId, DestinationField.ArtifactID, extractedText);

                        //check if text is truncated
                        if (TextExtractionUtility.IsTextTruncated)
                        {
                            //Update TextExtractorDetails field on the Document object if extracted text is truncated
                            var fieldValue = String.Format(Constant.TextExtractorDetailsMessages.TRUNCATED, ArtifactQueries.GetFieldNameForArtifactId(ServicesMgr, ExecutionIdentity, WorkspaceArtifactId, DestinationField.ArtifactID));

                            ArtifactQueries.AppendToDocumentLongTextFieldValue(ServicesMgr, ExecutionIdentity, WorkspaceArtifactId, extractorSetDocument.ArtifactId, Constant.Guids.Fields.Document.TextExtractorDetails, fieldValue);
                        }

                        //create extractor set history record
                        extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.COMPLETE_TEXT_EXTRACTED);
                        documentUpdatedWithExtractedText = true;
                    }
                }
            }
            catch (Exception ex)
            {
                //create extractor set history record
                if (extractorSetHistory != null)
                {
                    extractorSetHistory.CreateRecord(Constant.ExtractionSetHistoryStatus.ERROR, ExceptionMessageFormatter.GetInnerMostExceptionMessage(ex));
                }
                else
                {
                    throw new Exception("An error occured when creating Extractor Set History record.");
                }

                //log error message to ErrorLog table
                ErrorLogModel.InsertRecord(errorContext, ex, ArtifactId, WorkspaceArtifactId);
            }

            return(documentUpdatedWithExtractedText);
        }