/// <summary> /// Get Near Duplicate Results from DB /// </summary> private List <Data.NearDuplicationResultInfo> GetNearDuplicationResult() { //Send result documents to next worker var nearDuplicationResultDocumentList = new List <Data.NearDuplicationResultInfo>(); //Get Near duplication document results var nearDuplicationDocumentResult = _nearDuplicationAdapter.GetNearDuplicationResult(_jobParameter.MatterId); var currentBatchDocumentCount = 0; var nearDupeFamilyId = string.Empty; //ClusterSort_FamilySort foreach (var document in nearDuplicationDocumentResult) //Iterate document result and send as batch { currentBatchDocumentCount++; _processedDocumentResult++; var isMasterDocumentInNearDuplicationGroup = IsMasterDocumentInNearDuplicationGroup(document, document.IsMaster, ref nearDupeFamilyId); var data = new Data.NearDuplicationResultInfo { DocumentId = GetDocumentRefernceIdFromNearDuplicationResultDocumentId(document.DocumentId), DocumentSort = document.DocumentSort, ClusterSort = document.ClusterSort, FamilySort = document.FamilySort, IsMaster = isMasterDocumentInNearDuplicationGroup, Source = document.Source, Similarity = document.Similarity }; nearDuplicationResultDocumentList.Add(data); if (currentBatchDocumentCount < Constants.NearDuplicationJobBatchSize) { continue; } //Send to Data pipe SendDataPipe(nearDuplicationResultDocumentList); currentBatchDocumentCount = 0; nearDuplicationResultDocumentList.Clear(); } return(nearDuplicationResultDocumentList); }
/// <summary> /// Get Near Duplicate Results from DB /// </summary> private List<Data.NearDuplicationResultInfo> GetNearDuplicationResult() { //Send result documents to next worker var nearDuplicationResultDocumentList = new List<Data.NearDuplicationResultInfo>(); //Get Near duplication document results var nearDuplicationDocumentResult = _nearDuplicationAdapter.GetNearDuplicationResult(_jobParameter.MatterId); var currentBatchDocumentCount = 0; var nearDupeFamilyId = string.Empty; //ClusterSort_FamilySort foreach (var document in nearDuplicationDocumentResult) //Iterate document result and send as batch { currentBatchDocumentCount++; _processedDocumentResult++; var isMasterDocumentInNearDuplicationGroup = IsMasterDocumentInNearDuplicationGroup(document, document.IsMaster, ref nearDupeFamilyId); var data = new Data.NearDuplicationResultInfo { DocumentId = GetDocumentRefernceIdFromNearDuplicationResultDocumentId(document.DocumentId), DocumentSort = document.DocumentSort, ClusterSort = document.ClusterSort, FamilySort = document.FamilySort, IsMaster = isMasterDocumentInNearDuplicationGroup, Source = document.Source, Similarity = document.Similarity }; nearDuplicationResultDocumentList.Add(data); if (currentBatchDocumentCount < Constants.NearDuplicationJobBatchSize) continue; //Send to Data pipe SendDataPipe(nearDuplicationResultDocumentList); currentBatchDocumentCount = 0; nearDuplicationResultDocumentList.Clear(); } return nearDuplicationResultDocumentList; }
/// <summary> /// Construct field for Document to Insert/Update in Database /// </summary> /// <param name="resultDocument"></param> private void ConstructDocumentFieldsForVault(NearDuplicationResultInfo resultDocument) { //Master AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_IsMaster, (resultDocument.IsMaster ? "Y" : "N"), _fieldIdNdIsMaster); //Sort AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_Sort, resultDocument.DocumentSort.ToString(CultureInfo.InvariantCulture), _fieldIdNdSort); //FamilyId string documentNearDuplicationFamilyId = resultDocument.ClusterSort.ToString(CultureInfo.InvariantCulture) + "_" + resultDocument.FamilySort.ToString(CultureInfo.InvariantCulture); AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_FamilyID, documentNearDuplicationFamilyId, _fieldIdNdFamilyId); //ClusterId AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_ClusterID, resultDocument.ClusterSort.ToString(CultureInfo.InvariantCulture), _fieldIdNdClusterId); //Similarity AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_Similarity, resultDocument.Similarity, _fieldIdNdSimilarity); }