コード例 #1
0
        /// <summary>
        /// Get Near Duplicate Results from DB
        /// </summary>
        private List <Data.NearDuplicationResultInfo> GetNearDuplicationResult()
        {
            //Send result documents to next worker
            var nearDuplicationResultDocumentList = new List <Data.NearDuplicationResultInfo>();
            //Get Near duplication document results
            var nearDuplicationDocumentResult = _nearDuplicationAdapter.GetNearDuplicationResult(_jobParameter.MatterId);

            var currentBatchDocumentCount = 0;
            var nearDupeFamilyId          = string.Empty;           //ClusterSort_FamilySort

            foreach (var document in nearDuplicationDocumentResult) //Iterate document result and send as batch
            {
                currentBatchDocumentCount++;
                _processedDocumentResult++;
                var isMasterDocumentInNearDuplicationGroup = IsMasterDocumentInNearDuplicationGroup(document, document.IsMaster, ref nearDupeFamilyId);
                var data = new Data.NearDuplicationResultInfo
                {
                    DocumentId   = GetDocumentRefernceIdFromNearDuplicationResultDocumentId(document.DocumentId),
                    DocumentSort = document.DocumentSort,
                    ClusterSort  = document.ClusterSort,
                    FamilySort   = document.FamilySort,
                    IsMaster     = isMasterDocumentInNearDuplicationGroup,
                    Source       = document.Source,
                    Similarity   = document.Similarity
                };
                nearDuplicationResultDocumentList.Add(data);
                if (currentBatchDocumentCount < Constants.NearDuplicationJobBatchSize)
                {
                    continue;
                }
                //Send to Data pipe
                SendDataPipe(nearDuplicationResultDocumentList);
                currentBatchDocumentCount = 0;
                nearDuplicationResultDocumentList.Clear();
            }
            return(nearDuplicationResultDocumentList);
        }
コード例 #2
0
        /// <summary>
        /// Get Near Duplicate Results from DB
        /// </summary>
        private List<Data.NearDuplicationResultInfo> GetNearDuplicationResult()
        {
            //Send result documents to next worker
            var nearDuplicationResultDocumentList = new List<Data.NearDuplicationResultInfo>();
            //Get Near duplication document results
            var nearDuplicationDocumentResult = _nearDuplicationAdapter.GetNearDuplicationResult(_jobParameter.MatterId);

            var currentBatchDocumentCount = 0;
            var nearDupeFamilyId = string.Empty; //ClusterSort_FamilySort
            foreach (var document in nearDuplicationDocumentResult) //Iterate document result and send as batch
            {
                currentBatchDocumentCount++;
                _processedDocumentResult++;
                var isMasterDocumentInNearDuplicationGroup = IsMasterDocumentInNearDuplicationGroup(document, document.IsMaster, ref nearDupeFamilyId);
                var data = new Data.NearDuplicationResultInfo
                {
                    DocumentId = GetDocumentRefernceIdFromNearDuplicationResultDocumentId(document.DocumentId),
                    DocumentSort = document.DocumentSort,
                    ClusterSort = document.ClusterSort,
                    FamilySort = document.FamilySort,
                    IsMaster = isMasterDocumentInNearDuplicationGroup,
                    Source = document.Source,
                    Similarity = document.Similarity
                };
                nearDuplicationResultDocumentList.Add(data);
                if (currentBatchDocumentCount < Constants.NearDuplicationJobBatchSize) continue;
                //Send to Data pipe
                SendDataPipe(nearDuplicationResultDocumentList);
                currentBatchDocumentCount = 0;
                nearDuplicationResultDocumentList.Clear();
            }
            return nearDuplicationResultDocumentList;
        }
コード例 #3
0
 /// <summary>
 ///     Construct field for Document to Insert/Update in Database
 /// </summary>
 /// <param name="resultDocument"></param>
 private void ConstructDocumentFieldsForVault(NearDuplicationResultInfo resultDocument)
 {
     //Master
     AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_IsMaster,
         (resultDocument.IsMaster ? "Y" : "N"), _fieldIdNdIsMaster);
     //Sort
     AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_Sort,
         resultDocument.DocumentSort.ToString(CultureInfo.InvariantCulture), _fieldIdNdSort);
     //FamilyId
     string documentNearDuplicationFamilyId = resultDocument.ClusterSort.ToString(CultureInfo.InvariantCulture) +
                                              "_" +
                                              resultDocument.FamilySort.ToString(CultureInfo.InvariantCulture);
     AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_FamilyID,
         documentNearDuplicationFamilyId, _fieldIdNdFamilyId);
     //ClusterId
     AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_ClusterID,
         resultDocument.ClusterSort.ToString(CultureInfo.InvariantCulture),
         _fieldIdNdClusterId);
     //Similarity
     AddNearDuplicationFields(resultDocument.DocumentId, EVSystemFields.ND_Similarity,
         resultDocument.Similarity, _fieldIdNdSimilarity);
 }