/// <summary> /// Method to get the documents from loadfile /// </summary> /// <param name="correlationId">Correlation id</param> /// <param name="recordText">record text</param> /// <param name="documentCtrlNbr">document control number</param> /// <param name="imageFileList">imagefile list</param> /// <param name="textFileList">text file list</param> /// <param name="recordParserLog">recordParser Log</param> /// <returns>list of DocumentDetail</returns> public List<DocumentDetail> GetDocuments(string correlationId, string recordText, string documentCtrlNbr, List<string> textFileList, out JobWorkerLog<LoadFileDocumentParserLogInfo> recordParserLog) { recordText.ShouldNotBe(null); List<DocumentDetail> documentDetailList = new List<DocumentDetail>(); recordParserLog = null; RVWDocumentBEO document = null; string missingNativeFile = null; List<string> missingImageFiles = new List<string>(); bool isMissingContent = false; List<string> missingContentFiles = new List<string>(); List<string> misMatchedFields = new List<string>(); List<string> misMatchedFieldsMessage = new List<string>(); Int32 importedImagesCount = 0; #region Parse Record Text var recordTokenizer = new RecordTokenizer(m_ColumnDelimiter, m_QuoteCharacter); var fields = recordTokenizer.ParseRecord(recordText); #endregion List<RVWDocumentFieldBEO> matchingKeyField = null; //For Overlay //Get document document = ConsturctDocument(correlationId, fields, textFileList, ref matchingKeyField, out missingNativeFile, missingImageFiles, out isMissingContent, missingContentFiles, misMatchedFields, misMatchedFieldsMessage, out importedImagesCount); if (m_JobParameter.IsAppend) { // Assign DCN document.DocumentControlNumber = documentCtrlNbr; //1) Construct Native Set var nativeSetDocument = GetDocumentForNativeSet(document); var doc = new DocumentDetail(); doc.CorrelationId = correlationId; doc.docType = DocumentsetType.NativeSet; doc.document = nativeSetDocument; doc.ConversationIndex = document.ConversationIndex; doc.IsNewDocument = true; //Add Native Document documentDetailList.Add(doc); //2) Construct Image Set if (m_JobParameter.IsImportImages && !string.IsNullOrEmpty(m_JobParameter.ImageSetId)) { var imageSetDocument = GetDocumentForImageSet(document, m_JobParameter.ImageSetId); imageSetDocument.IsImageFilesNotAssociated = !(importedImagesCount > 0 || missingImageFiles.Any()); var docImg = new DocumentDetail(); docImg.CorrelationId = correlationId; docImg.docType = DocumentsetType.ImageSet; docImg.document = imageSetDocument; docImg.IsNewDocument = true; //Add Image Document documentDetailList.Add(docImg); } } else { //Send original document to Search worker var doc = new DocumentDetail(); doc.document = document; doc.ConversationIndex = document.ConversationIndex; #region Create a unique file name for extracted content file doc.document.DocumentBinary.FileList.ForEach(x => x.Path = (x.Type.ToLower() == Constants.TEXT_FILE_TYPE.ToLower()) ? string.Format("{0}?id={1}", x.Path, Guid.NewGuid().ToString()) : x.Path); #endregion doc.CorrelationId = correlationId; doc.OverlayMatchingField = matchingKeyField; doc.document.IsImageFilesNotAssociated = !(importedImagesCount > 0 || missingImageFiles.Any()); documentDetailList.Add(doc); } //3) Construct Log #region Log var imageMappingKey=string.Empty; if (m_JobParameter.IsImportImages && m_JobParameter.LoadFile.ImageFile != null) { imageMappingKey = fields[m_JobParameter.LoadFile.ImageFile.ImageMatchingFieldId]; } recordParserLog = ConstructLog(correlationId, true, document.DocumentId, missingNativeFile, missingImageFiles, isMissingContent, missingContentFiles, importedImagesCount, misMatchedFields, documentCtrlNbr, document.CrossReferenceFieldValue, misMatchedFieldsMessage, imageMappingKey); #endregion var firstDoc = documentDetailList.FirstOrDefault(); if (firstDoc != null) { firstDoc.document.ImportMessage = recordParserLog.LogInfo.Message; } return documentDetailList; }
protected override void BeginWork() { try { base.BeginWork(); m_Parameters = GetImportBEO(BootParameters); m_Parameters.ShouldNotBe(null); m_LoadFileUri = new Uri(m_Parameters.Locations.First()); m_ColumnDelimiter = (char)m_Parameters.LoadFile.ColumnDelimiter; m_QuoteCharacter = (char)m_Parameters.LoadFile.QuoteCharacter; m_NewlineCharacter = (char)m_Parameters.LoadFile.NewlineDelimiter; m_RecordTokenizer = new RecordTokenizer(m_ColumnDelimiter, m_QuoteCharacter); m_EncodingType = Encoding.GetEncoding(m_Parameters.LoadFile.EncodingType); m_IsFirstLineHeader = m_Parameters.LoadFile.IsFirstLineHeader; var loadFilePath = HttpUtility.UrlDecode(m_LoadFileUri.OriginalString); ReportToDirector("LoadFileParser works on load file {0}", loadFilePath); m_StreamReader = new StreamReader(loadFilePath, m_EncodingType); #region Dataset Detaills m_Parameters.DatasetId.ShouldBeGreaterThan(0); m_Dataset = DataSetBO.GetDataSetDetailForDataSetId(m_Parameters.DatasetId); var matterDetails = MatterDAO.GetMatterDetails(m_Parameters.MatterId.ToString()); matterDetails.ShouldNotBe(null); m_Dataset.Matter = matterDetails; var searchServerDetails = ServerDAO.GetSearchServer(matterDetails.SearchServer.Id); searchServerDetails.ShouldNotBe(null); m_Dataset.Matter.SearchServer = searchServerDetails; m_DatasetPath = m_Dataset.CompressedFileExtractionLocation; #endregion if (m_Parameters != null && m_Parameters.IsImportImages && m_Parameters.LoadFile.ImageFile != null && m_Parameters.LoadFile.ImageFile.ImageExtractionOption == LoadFileImageExtractionOption.HelperFile) { var imageHelperFileName = m_Parameters.LoadFile.ImageFile.HelperFileName; ReportToDirector("LoadFileParser uses image helper file {0}", imageHelperFileName); _imageHelperFileParser = new HelperFileParser(this, imageHelperFileName); } if (m_Parameters != null && m_Parameters.LoadFile.ContentFile != null && m_Parameters.LoadFile.ContentFile.TextExtractionOption == LoadFileTextExtractionOption.HelperFile) { var contentHelperFileName = m_Parameters.LoadFile.ContentFile.HelperFileName; ReportToDirector("LoadFileParser uses content (text) helper file {0}", contentHelperFileName); TextHelperFile = new HelperFile(this, contentHelperFileName); } if (null != m_Parameters && null != m_Parameters.LoadFile && null != m_Parameters.LoadFile.ContentFile && null != m_Parameters.LoadFile.ContentFile.LoadFileContentField) { m_ContentFieldNumber = Convert.ToInt32(m_Parameters.LoadFile.ContentFile.LoadFileContentField); } _uniqueThreadString = Guid.NewGuid().ToString().Replace("-", "").ToUpper(); SetMessageBatchSize(m_Parameters); } catch (Exception ex) { //Send log to Log Pipe LogMessage(false, Constants.ParserFailureMessageOnInitialize); ex.Trace(); ReportToDirector("Exception in LoadFileParser.BeginWork", ex.ToDebugString()); throw; } }
protected override void BeginWork() { try { base.BeginWork(); m_Parameters = GetImportBEO(BootParameters); m_Parameters.ShouldNotBe(null); m_LoadFileUri = new Uri(m_Parameters.Locations.First()); m_ColumnDelimiter = (char) m_Parameters.LoadFile.ColumnDelimiter; m_QuoteCharacter = (char) m_Parameters.LoadFile.QuoteCharacter; m_NewlineCharacter = (char) m_Parameters.LoadFile.NewlineDelimiter; m_RecordTokenizer = new RecordTokenizer(m_ColumnDelimiter, m_QuoteCharacter); m_EncodingType = Encoding.GetEncoding(m_Parameters.LoadFile.EncodingType); m_IsFirstLineHeader = m_Parameters.LoadFile.IsFirstLineHeader; var loadFilePath = HttpUtility.UrlDecode(m_LoadFileUri.OriginalString); ReportToDirector("LoadFileParser works on load file {0}", loadFilePath); m_StreamReader = new StreamReader(loadFilePath, m_EncodingType); #region Dataset Detaills m_Parameters.DatasetId.ShouldBeGreaterThan(0); m_Dataset = DataSetBO.GetDataSetDetailForDataSetId(m_Parameters.DatasetId); var matterDetails = MatterDAO.GetMatterDetails(m_Parameters.MatterId.ToString()); matterDetails.ShouldNotBe(null); m_Dataset.Matter = matterDetails; var searchServerDetails = ServerDAO.GetSearchServer(matterDetails.SearchServer.Id); searchServerDetails.ShouldNotBe(null); m_Dataset.Matter.SearchServer = searchServerDetails; m_DatasetPath = m_Dataset.CompressedFileExtractionLocation; #endregion if (m_Parameters != null && m_Parameters.IsImportImages && m_Parameters.LoadFile.ImageFile != null && m_Parameters.LoadFile.ImageFile.ImageExtractionOption == LoadFileImageExtractionOption.HelperFile) { var imageHelperFileName = m_Parameters.LoadFile.ImageFile.HelperFileName; ReportToDirector("LoadFileParser uses image helper file {0}", imageHelperFileName); _imageHelperFileParser =new HelperFileParser(this,imageHelperFileName); } if (m_Parameters != null && m_Parameters.LoadFile.ContentFile != null && m_Parameters.LoadFile.ContentFile.TextExtractionOption == LoadFileTextExtractionOption.HelperFile) { var contentHelperFileName = m_Parameters.LoadFile.ContentFile.HelperFileName; ReportToDirector("LoadFileParser uses content (text) helper file {0}", contentHelperFileName); TextHelperFile = new HelperFile(this, contentHelperFileName); } if (null != m_Parameters && null != m_Parameters.LoadFile && null != m_Parameters.LoadFile.ContentFile && null != m_Parameters.LoadFile.ContentFile.LoadFileContentField) { m_ContentFieldNumber = Convert.ToInt32(m_Parameters.LoadFile.ContentFile.LoadFileContentField); } _uniqueThreadString = Guid.NewGuid().ToString().Replace("-", "").ToUpper(); SetMessageBatchSize(m_Parameters); } catch (Exception ex) { //Send log to Log Pipe LogMessage(false, Constants.ParserFailureMessageOnInitialize); ex.Trace(); ReportToDirector("Exception in LoadFileParser.BeginWork", ex.ToDebugString()); throw; } }