public static void FindAddressDuplicates(ref DataTable ADuplicateAddresses, bool AExactMatchNumber) { TDBTransaction Transaction = null; DataTable ReturnTable = ADuplicateAddresses.Clone(); PLocationTable Locations = new PLocationTable(); TProgressTracker.InitProgressTracker(DomainManager.GClientID.ToString(), Catalog.GetString("Checking for duplicate addresses")); DBAccess.GDBAccessObj.GetNewOrExistingAutoReadTransaction(IsolationLevel.ReadCommitted, ref Transaction, delegate { // get all locations from database, // except for those without a Locality, Street Name and Address as these are too vague to make a match string Query = "SELECT p_location.* FROM p_location" + " WHERE (p_location.p_locality_c is NOT NULL AND p_location.p_locality_c <> '')" + " OR (p_location.p_street_name_c is NOT NULL AND p_location.p_street_name_c <> '')" + " OR (p_location.p_address_3_c is NOT NULL AND p_location.p_address_3_c <> '')"; DBAccess.GDBAccessObj.SelectDT(Locations, Query, Transaction); // create a list of tables grouped by country codes List <DataTable>LocationDataTables = Locations.AsEnumerable() .GroupBy(row => row[PLocationTable.GetCountryCodeDBName()]) .Select(g => g.CopyToDataTable()) .ToList(); DataTable BlankCountryLocations = Locations.Clone(); // create another table that contains all locations without a valid country code for (int i = 0; i < LocationDataTables.Count; i++) { // this helps the time left feature to be more accurate from the start LocationDataTables[i].DefaultView.Sort = PLocationTable.GetPostalCodeDBName() + " DESC"; LocationDataTables[i] = LocationDataTables[i].DefaultView.ToTable(); if (string.IsNullOrEmpty(LocationDataTables[i].Rows[0]["p_country_code_c"].ToString()) || (LocationDataTables[i].Rows[0]["p_country_code_c"].ToString() == "99")) { foreach (DataRow Row in LocationDataTables[i].Rows) { BlankCountryLocations.Rows.Add((object[])Row.ItemArray.Clone()); } } } Int64 TotalCalculations = 0; Int64 CompletedCalculations = 0; decimal PercentageCompleted = 0; // calculate number of calculations required for this check for (int i = 0; i < LocationDataTables.Count; i++) { if (LocationDataTables[i].Rows.Count > 0) { TotalCalculations += ((Int64)LocationDataTables[i].Rows.Count) * ((Int64)LocationDataTables[i].Rows.Count - 1) / 2; // if not table containing invalid country codes if (!string.IsNullOrEmpty(LocationDataTables[i].Rows[0]["p_country_code_c"].ToString()) && (LocationDataTables[i].Rows[0]["p_country_code_c"].ToString() != "99")) { TotalCalculations += BlankCountryLocations.Rows.Count; } } } Int64 TimeLeft; int MinutesLeft; int SecondsLeft; Stopwatch time = Stopwatch.StartNew(); // begin search for possible duplicates foreach (DataTable LocationCountry in LocationDataTables) { if (LocationCountry.Rows.Count <= 0) { continue; } for (int i = 0; i < LocationCountry.Rows.Count; i++) { string AAddress = null; string[] AAddressArray = null; for (int j = i + 1; j < LocationCountry.Rows.Count; j++) { // check if two rows are a possible duplicate if (PossibleMatch(LocationCountry.Rows[i], ref AAddress, ref AAddressArray, LocationCountry.Rows[j], AExactMatchNumber)) { ReturnTable.Rows.Add(new object[] { LocationCountry.Rows[i][PLocationTable.GetSiteKeyDBName()], LocationCountry.Rows[i][PLocationTable.GetLocationKeyDBName()], LocationCountry.Rows[i][PLocationTable.GetLocalityDBName()], LocationCountry.Rows[i][PLocationTable.GetStreetNameDBName()], LocationCountry.Rows[i][PLocationTable.GetAddress3DBName()], LocationCountry.Rows[i][PLocationTable.GetCityDBName()], LocationCountry.Rows[i][PLocationTable.GetCountyDBName()], LocationCountry.Rows[i][PLocationTable.GetPostalCodeDBName()], LocationCountry.Rows[i][PLocationTable.GetCountryCodeDBName()], LocationCountry.Rows[j][PLocationTable.GetSiteKeyDBName()], LocationCountry.Rows[j][PLocationTable.GetLocationKeyDBName()], LocationCountry.Rows[j][PLocationTable.GetLocalityDBName()], LocationCountry.Rows[j][PLocationTable.GetStreetNameDBName()], LocationCountry.Rows[j][PLocationTable.GetAddress3DBName()], LocationCountry.Rows[j][PLocationTable.GetCityDBName()], LocationCountry.Rows[j][PLocationTable.GetCountyDBName()], LocationCountry.Rows[j][PLocationTable.GetPostalCodeDBName()], LocationCountry.Rows[j][PLocationTable.GetCountryCodeDBName()] }); } CompletedCalculations++; } // if not table containing invalid country codes if (!string.IsNullOrEmpty(LocationCountry.Rows[0]["p_country_code_c"].ToString()) && (LocationCountry.Rows[0]["p_country_code_c"].ToString() != "99")) { // compare with locations with invalid country codes for (int j = 0; j < BlankCountryLocations.Rows.Count; j++) { if (PossibleMatch(LocationCountry.Rows[i], ref AAddress, ref AAddressArray, BlankCountryLocations.Rows[j], AExactMatchNumber)) { ReturnTable.Rows.Add(new object[] { LocationCountry.Rows[i][PLocationTable.GetSiteKeyDBName()], LocationCountry.Rows[i][PLocationTable.GetLocationKeyDBName()], LocationCountry.Rows[i][PLocationTable.GetLocalityDBName()], LocationCountry.Rows[i][PLocationTable.GetStreetNameDBName()], LocationCountry.Rows[i][PLocationTable.GetAddress3DBName()], LocationCountry.Rows[i][PLocationTable.GetCityDBName()], LocationCountry.Rows[i][PLocationTable.GetCountyDBName()], LocationCountry.Rows[i][PLocationTable.GetPostalCodeDBName()], LocationCountry.Rows[i][PLocationTable.GetCountryCodeDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetSiteKeyDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetLocationKeyDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetLocalityDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetStreetNameDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetAddress3DBName()], BlankCountryLocations.Rows[j][PLocationTable.GetCityDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetCountyDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetPostalCodeDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetCountryCodeDBName()] }); } CompletedCalculations++; } } if (TProgressTracker.GetCurrentState(DomainManager.GClientID.ToString()).CancelJob) { break; } // estimate the remaining time PercentageCompleted = decimal.Divide(CompletedCalculations * 100, TotalCalculations); TimeLeft = (Int64)(time.ElapsedMilliseconds * ((100 / PercentageCompleted) - 1)); MinutesLeft = (int)TimeLeft / 60000; string OutputMessage = string.Format(Catalog.GetString("Completed: {0}%"), Math.Round(PercentageCompleted, 1)); // only show estimated time left if at least 0.1% complete if (PercentageCompleted >= (decimal)0.1) { // only show seconds if less than 10 minutes remaining if (MinutesLeft < 10) { SecondsLeft = (int)(TimeLeft % 60000) / 1000; OutputMessage += string.Format(Catalog.GetPluralString(" (approx. {0} minute and {1} seconds remaining)", " (approx. {0} minutes and {1} seconds remaining)", MinutesLeft, true), MinutesLeft, SecondsLeft); } else { OutputMessage += string.Format(Catalog.GetString(" (approx. {0} minutes remaining)"), MinutesLeft); } } TProgressTracker.SetCurrentState(DomainManager.GClientID.ToString(), OutputMessage, PercentageCompleted); } } }); TProgressTracker.FinishJob(DomainManager.GClientID.ToString()); ADuplicateAddresses = ReturnTable.Copy(); }
public static void FindAddressDuplicates(ref DataTable ADuplicateAddresses, bool AExactMatchNumber) { TDBTransaction Transaction = null; DataTable ReturnTable = ADuplicateAddresses.Clone(); PLocationTable Locations = new PLocationTable(); TProgressTracker.InitProgressTracker(DomainManager.GClientID.ToString(), Catalog.GetString("Checking for duplicate addresses")); DBAccess.GDBAccessObj.GetNewOrExistingAutoReadTransaction(IsolationLevel.ReadCommitted, ref Transaction, delegate { // get all locations from database, // except for those without a Locality, Street Name and Address as these are too vague to make a match string Query = "SELECT p_location.* FROM p_location" + " WHERE (p_location.p_locality_c is NOT NULL AND p_location.p_locality_c <> '')" + " OR (p_location.p_street_name_c is NOT NULL AND p_location.p_street_name_c <> '')" + " OR (p_location.p_address_3_c is NOT NULL AND p_location.p_address_3_c <> '')"; DBAccess.GDBAccessObj.SelectDT(Locations, Query, Transaction); // create a list of tables grouped by country codes List <DataTable> LocationDataTables = Locations.AsEnumerable() .GroupBy(row => row[PLocationTable.GetCountryCodeDBName()]) .Select(g => g.CopyToDataTable()) .ToList(); DataTable BlankCountryLocations = Locations.Clone(); // create another table that contains all locations without a valid country code for (int i = 0; i < LocationDataTables.Count; i++) { // this helps the time left feature to be more accurate from the start LocationDataTables[i].DefaultView.Sort = PLocationTable.GetPostalCodeDBName() + " DESC"; LocationDataTables[i] = LocationDataTables[i].DefaultView.ToTable(); if (string.IsNullOrEmpty(LocationDataTables[i].Rows[0]["p_country_code_c"].ToString()) || (LocationDataTables[i].Rows[0]["p_country_code_c"].ToString() == "99")) { foreach (DataRow Row in LocationDataTables[i].Rows) { BlankCountryLocations.Rows.Add((object[])Row.ItemArray.Clone()); } } } Int64 TotalCalculations = 0; Int64 CompletedCalculations = 0; decimal PercentageCompleted = 0; // calculate number of calculations required for this check for (int i = 0; i < LocationDataTables.Count; i++) { if (LocationDataTables[i].Rows.Count > 0) { TotalCalculations += ((Int64)LocationDataTables[i].Rows.Count) * ((Int64)LocationDataTables[i].Rows.Count - 1) / 2; // if not table containing invalid country codes if (!string.IsNullOrEmpty(LocationDataTables[i].Rows[0]["p_country_code_c"].ToString()) && (LocationDataTables[i].Rows[0]["p_country_code_c"].ToString() != "99")) { TotalCalculations += BlankCountryLocations.Rows.Count; } } } Int64 TimeLeft; int MinutesLeft; int SecondsLeft; Stopwatch time = Stopwatch.StartNew(); // begin search for possible duplicates foreach (DataTable LocationCountry in LocationDataTables) { if (LocationCountry.Rows.Count <= 0) { continue; } for (int i = 0; i < LocationCountry.Rows.Count && ReturnTable.Rows.Count < 500; i++) { string AAddress = null; string[] AAddressArray = null; for (int j = i + 1; j < LocationCountry.Rows.Count; j++) { // check if two rows are a possible duplicate if (PossibleMatch(LocationCountry.Rows[i], ref AAddress, ref AAddressArray, LocationCountry.Rows[j], AExactMatchNumber)) { ReturnTable.Rows.Add(new object[] { LocationCountry.Rows[i][PLocationTable.GetSiteKeyDBName()], LocationCountry.Rows[i][PLocationTable.GetLocationKeyDBName()], LocationCountry.Rows[i][PLocationTable.GetLocalityDBName()], LocationCountry.Rows[i][PLocationTable.GetStreetNameDBName()], LocationCountry.Rows[i][PLocationTable.GetAddress3DBName()], LocationCountry.Rows[i][PLocationTable.GetCityDBName()], LocationCountry.Rows[i][PLocationTable.GetCountyDBName()], LocationCountry.Rows[i][PLocationTable.GetPostalCodeDBName()], LocationCountry.Rows[i][PLocationTable.GetCountryCodeDBName()], LocationCountry.Rows[j][PLocationTable.GetSiteKeyDBName()], LocationCountry.Rows[j][PLocationTable.GetLocationKeyDBName()], LocationCountry.Rows[j][PLocationTable.GetLocalityDBName()], LocationCountry.Rows[j][PLocationTable.GetStreetNameDBName()], LocationCountry.Rows[j][PLocationTable.GetAddress3DBName()], LocationCountry.Rows[j][PLocationTable.GetCityDBName()], LocationCountry.Rows[j][PLocationTable.GetCountyDBName()], LocationCountry.Rows[j][PLocationTable.GetPostalCodeDBName()], LocationCountry.Rows[j][PLocationTable.GetCountryCodeDBName()] }); } CompletedCalculations++; } // if not table containing invalid country codes if (!string.IsNullOrEmpty(LocationCountry.Rows[0]["p_country_code_c"].ToString()) && (LocationCountry.Rows[0]["p_country_code_c"].ToString() != "99")) { // compare with locations with invalid country codes for (int j = 0; j < BlankCountryLocations.Rows.Count; j++) { if (PossibleMatch(LocationCountry.Rows[i], ref AAddress, ref AAddressArray, BlankCountryLocations.Rows[j], AExactMatchNumber)) { ReturnTable.Rows.Add(new object[] { LocationCountry.Rows[i][PLocationTable.GetSiteKeyDBName()], LocationCountry.Rows[i][PLocationTable.GetLocationKeyDBName()], LocationCountry.Rows[i][PLocationTable.GetLocalityDBName()], LocationCountry.Rows[i][PLocationTable.GetStreetNameDBName()], LocationCountry.Rows[i][PLocationTable.GetAddress3DBName()], LocationCountry.Rows[i][PLocationTable.GetCityDBName()], LocationCountry.Rows[i][PLocationTable.GetCountyDBName()], LocationCountry.Rows[i][PLocationTable.GetPostalCodeDBName()], LocationCountry.Rows[i][PLocationTable.GetCountryCodeDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetSiteKeyDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetLocationKeyDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetLocalityDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetStreetNameDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetAddress3DBName()], BlankCountryLocations.Rows[j][PLocationTable.GetCityDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetCountyDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetPostalCodeDBName()], BlankCountryLocations.Rows[j][PLocationTable.GetCountryCodeDBName()] }); } CompletedCalculations++; } } if (TProgressTracker.GetCurrentState(DomainManager.GClientID.ToString()).CancelJob) { break; } // estimate the remaining time PercentageCompleted = decimal.Divide(CompletedCalculations * 100, TotalCalculations); TimeLeft = (Int64)(time.ElapsedMilliseconds * ((100 / PercentageCompleted) - 1)); MinutesLeft = (int)TimeLeft / 60000; string OutputMessage = string.Format(Catalog.GetString("Completed: {0}%"), Math.Round(PercentageCompleted, 1)); // only show estimated time left if at least 0.1% complete if (PercentageCompleted >= (decimal)0.1) { // only show seconds if less than 10 minutes remaining if (MinutesLeft < 10) { SecondsLeft = (int)(TimeLeft % 60000) / 1000; OutputMessage += string.Format(Catalog.GetPluralString(" (approx. {0} minute and {1} seconds remaining)", " (approx. {0} minutes and {1} seconds remaining)", MinutesLeft, true), MinutesLeft, SecondsLeft); } else { OutputMessage += string.Format(Catalog.GetString(" (approx. {0} minutes remaining)"), MinutesLeft); } } TProgressTracker.SetCurrentState(DomainManager.GClientID.ToString(), OutputMessage, PercentageCompleted); } } }); TProgressTracker.FinishJob(DomainManager.GClientID.ToString()); ADuplicateAddresses = ReturnTable.Copy(); }