public void doAllCheck(MatchingResult.VerificationStatus vs, int vnm, int vam, MatchingResult result) { Console.Out.WriteLine(result.verificationStatus); Console.Out.WriteLine(result.namePart); Console.Out.WriteLine(result.addressPart); Assert.AreEqual(vs, result.verificationStatus); Assert.AreEqual(vnm, (int) result.nameMatchingResult); Assert.AreEqual(vam, (int)(result.addressMatchingResult)); }
//other methods public MatchingResult Match() { nameSet = new List<String>(); registrationAddressSet = new AddressSet(); mailAddressSet = new AddressSet(); MatchingResult result = new MatchingResult(); maskedAddressLine = transactionAddressLine.Mask(); transactionAddressLine = transactionAddressLine.PrepareString(WORDSTOREMOVE); ChangeNullStringToEmpty(leaderEntry); //Przygotowanie zbioru imion GetWordsFromField(nameSet,leaderEntry.firstName.PrepareString(WORDSTOREMOVE)); GetWordsFromField(nameSet, leaderEntry.lastName.PrepareString(WORDSTOREMOVE)); ArrangeAddressSet(registrationAddressSet, leaderEntry.street, leaderEntry.postCode, leaderEntry.houseNumber, leaderEntry.apartmentNumber, leaderEntry.city); ArrangeAddressSet(mailAddressSet, leaderEntry.streetMail, leaderEntry.postCodeMail, leaderEntry.houseNumberMail, leaderEntry.apartmentNumberMail, leaderEntry.cityMail); MatchName(result); double registrationAddressMatchingResult; String registrationAddressMasked = maskedAddressLine; String registrationAddressLeftovers = MatchAddress(out registrationAddressMatchingResult, registrationAddressSet, ref registrationAddressMasked); double mailAddressMatchingResult; String mailAddressMasked = maskedAddressLine; String mailAddressLeftOvers = MatchAddress(out mailAddressMatchingResult, mailAddressSet, ref mailAddressMasked); if (registrationAddressMatchingResult > mailAddressMatchingResult) { result.addressMatchingResult = registrationAddressMatchingResult; addressMatchingKind = AddressMatchingKind.REGISTRATION; transactionAddressLine = registrationAddressLeftovers; maskedAddressLine = registrationAddressMasked; } else { result.addressMatchingResult = mailAddressMatchingResult; addressMatchingKind = AddressMatchingKind.MAIL; transactionAddressLine = mailAddressLeftOvers; maskedAddressLine = mailAddressMasked; } moreThanTwoWordsLeft = transactionAddressLine.Split(' ').Where(x => x.Length > 1).Count() >= 2; int maximumNameIndex = maskedAddressLine.LastIndexOf('#'); int minimumAddressIndex = maskedAddressLine.IndexOf('%'); String between = string.Empty; if (maximumNameIndex > minimumAddressIndex) { //Tu bym obniżył współczynnik matchingu bo coś jest nie tak i wysłałbym cały adres result.namePart = String.Empty; result.addressPart = String.Empty; } else { between = originalAddressLine.Mid(maximumNameIndex + 1, minimumAddressIndex - maximumNameIndex - 1); //originalAddressLine.Substring(maximumNameIndex+1, minimumAddressIndex - maximumNameIndex -1); } if (!Regex.IsMatch(between, @"[a-zA-Z/d]+")) //w środku nie ma żadnych znaków alfanumerycznych { result.namePart = originalAddressLine.Left(maximumNameIndex + 1); result.addressPart = originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); } else // w środku są znaki alfanumeryczne { if (!Regex.IsMatch(between, @"[a-zA-Z]+")) //w środku są same cyfry - na pewno nie jest to część imienia i nazwiska { result.namePart = originalAddressLine.Left(maximumNameIndex + 1); result.addressPart = between + originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); } else if (!Regex.IsMatch(between, @"[/d]+")) //w środku są same litery { //sprawdzam czy zaczyna się to od myślnika - zwykle jest to drugi człon imienia i nazwiska if(Regex.IsMatch(between,@"([A-Za-z0-9]+\.)+[A-Za-z0-9]+")) { Regex rx = new Regex(@"([A-Za-z0-9]+\.)+[A-Za-z0-9]+"); MatchCollection mx = rx.Matches(between); result.namePart = originalAddressLine.Left(maximumNameIndex + 1) + between.Left(mx[mx.Count - 1].Index + mx[mx.Count - 1].Length); result.addressPart = between.Right(between.Length - mx[mx.Count - 1].Index - mx[mx.Count - 1].Length) + originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); } //sprawdzam czy zaczyna się to od myślnika - zwykle jest to drugi człon imienia i nazwiska else if (between.Replace(" ", "").StartsWith("-")) { String[] split = between.Split(new char[] { ' ' },StringSplitOptions.RemoveEmptyEntries); if(split.Count() > 0) { result.namePart = originalAddressLine.Left(maximumNameIndex + 1) + split[0]; result.addressPart = String.Join(" ", split.Skip(1).Take(split.Count() - 1).ToArray())+ " " + originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); } else { result.namePart = originalAddressLine.Left(maximumNameIndex + 1) + between; result.addressPart = originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); } } //sprawdzam czy zaczyna się to od przecinka - zwykle kończy on człon imienia i nazwiska i rozpoczyna adres else if (between.Replace(" ", "").StartsWith(",")) { result.namePart = originalAddressLine.Left(maximumNameIndex + 1); result.addressPart = between + originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); } else { //jak zawiera skrót, to wszystko po lewej do name, od skrótu począwszy do addres bool abbrFound = false; String[] skroty = new String[] { "OS", "AL", "PL", "UL","ULICA","OSIEDLE","ALEJA","PLAC" }; foreach (String skrot in skroty) { String exact = String.Empty; if (between.ContainsWord(skrot, ref exact)) { String temp = between.ReplaceWords("|", skrot); result.namePart = originalAddressLine.Left(maximumNameIndex + 1) + " " + temp.Left(temp.IndexOf("|")); result.addressPart = exact + temp.Right(temp.Length - temp.IndexOf("|") - 1) + " " + originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); abbrFound = true; break; } } if (!abbrFound)//w środku same litery, nie zaczyna się od myślnika ani od przecinka - do adresu { //STARA OBSŁUGA : DO ADRESU //result.namePart = originalAddressLine.Left(maximumNameIndex + 1); //result.addressPart = between + originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); //NOWA OBSŁUGA : DO NAZWISKA result.namePart = originalAddressLine.Left(maximumNameIndex + 1) + between; result.addressPart = originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); } } } else { // mamy litery i cyfry // od cyfry począwszy w prawo do adresu // przed cyframi do imienia int firstOccurrenceOfDigit = between.IndexOfAny(new char [] {'0','1','2','3','4','5','6','7','8','9'}); result.namePart = originalAddressLine.Left(maximumNameIndex + 1) + between.Left(firstOccurrenceOfDigit); result.addressPart = between.Right(between.Length - firstOccurrenceOfDigit) + originalAddressLine.Right(originalAddressLine.Length - minimumAddressIndex); } } //TU PRZEROBIC, ABY ZGADZALO SIE Z TYM CO PL WYSYLAL DZIS DO BANKU if (result.nameMatchingResult == 100 && result.addressMatchingResult == 100) { result.verificationStatus = MatchingResult.VerificationStatus.POSITIVE; //entry.status = 2; } else if (result.nameMatchingResult == 100 || (result.nameMatchingResult > 49 && result.addressMatchingResult > 49)) { result.verificationStatus = MatchingResult.VerificationStatus.TOFURTHERVERIFICATION; //entry.status = 1; } else { result.verificationStatus = MatchingResult.VerificationStatus.NEGATIVE; //entry.status = 3; } if (result.verificationStatus == MatchingResult.VerificationStatus.POSITIVE && moreThanTwoWordsLeft) { result.verificationStatus = MatchingResult.VerificationStatus.TOFURTHERVERIFICATION; } //ostatnia weryfikacja - do tej pory zapewnione jest, że wszystkie człony imienia i nazwiska znajdujące się w polach //fistName i lastName znajdują się w linii adresowej //teraz sprawdzam czy wszystkie czlony namePart znajdują się w polach firstName i lastName if (result.verificationStatus == MatchingResult.VerificationStatus.POSITIVE) { Regex rx = new Regex("[A-Za-ząęćóźżśńłĄĘÓŹŻŚŃŁ]+", RegexOptions.CultureInvariant); foreach (Match mt in rx.Matches(result.namePart)) { if (!nameSet.Contains(mt.Value.PrepareString())) { result.verificationStatus = MatchingResult.VerificationStatus.TOFURTHERVERIFICATION; break; } } } //Ostateczne wyczyszczenie - usunięcie wielokrotnych spacji i przycięcie result.namePart = result.namePart.RemoveMultipleWhiteSpaces().Trim(); result.addressPart = result.addressPart.RemoveMultipleWhiteSpaces().Trim(); return result; }
private void MatchName(MatchingResult result) { double nameElementWeight = nameSet.Count > 0 ? 100.0 / nameSet.Count : 0; if (nameElementWeight > 0) { Regex rx; foreach (var word in nameSet) { rx = new Regex(@"(^|[\s])" + word + @"([\s]|$)", RegexOptions.IgnoreCase); if (rx.Matches(transactionAddressLine).Count > 0) { transactionAddressLine = transactionAddressLine.RemoveWords(word).RemoveMultipleWhiteSpaces(); maskedAddressLine = MaskString(word, '#', maskedAddressLine); result.nameMatchingResult += nameElementWeight; } } } else { result.nameMatchingResult = 0; } }