private static MPIIdentifier GetIdentifierScore(MPIIdentifier incoming, MPIIdentifier existing) { Contract.Requires(_mpiConfiguration != null && incoming != null && existing != null); if (IsNullEmptyOrUnknown(incoming.Value) || IsNullEmptyOrUnknown(existing.Value)) { //can't compare so return 0 (identifier has no bearing on match score) return(new MPIIdentifier { IdentifierName = existing.IdentifierName, Score = 0, Value = existing.Value, }); } //get weights for this identifier //TODO: var weightRecord = _mpiConfiguration.IdentifierMatchWeights.First(w => w.Key.Equals(incoming.Identifier)).Value; var weightRecord = new MPIIdentifierWeight { Identifier = incoming.IdentifierName, MatchWeight = 1, NonMatchWeight = -1, }; //default to no match double identifierScore; //use fuzzy matching to determine how similar search vector is to candidate vector var similarityScore = GetSimilarityScore(incoming.IdentifierName, incoming, existing); if (similarityScore == 0) { identifierScore = weightRecord.NonMatchWeight; } else if (similarityScore == 1) { identifierScore = weightRecord.MatchWeight; } else { //if strings are neither identical nor different, adjust match weight by degree of similarity (0 to 1) identifierScore = weightRecord.MatchWeight * similarityScore; } return(new MPIIdentifier { IdentifierName = existing.IdentifierName, Value = existing.Value, Score = identifierScore }); }
//Min: 0 Max: 1 private static double GetSimilarityScore(string identifier, MPIIdentifier incoming, MPIIdentifier existing) { var incomingValue = incoming.Value; var existingValue = existing.Value; // if either vector elements have null values, treat as non match if (incomingValue == null || existingValue == null) { return(0); } switch (incoming.MatchType) { case MatchType.StringMatchUsingJaroDistance: return(CompareStringsUsingJaro(incomingValue, existingValue)); case MatchType.DateMatch: return(CompareDates(incomingValue, existingValue)); case MatchType.GenderMatch: return(CompareGenders(incomingValue, existingValue)); } return(0); }