private int APIGenereateHashValue(APICompany company) { var companyName = company.CompanyName; var country = company.Country; var website = company.Website ?? " "; var address = company.Address ?? " "; var sme = company.SME.ToString(); var categories = ""; foreach (var item in company.SupplyChainCategories) { string code = item.SupplyChainCategoryCode.ToString().Split(".")[0]; categories = categories + code.Replace(item.SuperCategory.ToString(), ""); } var roles = ""; foreach (var item in company.SupplyChainRoles) { roles = roles + item.SupplyChainRoleCode.ToString(); } var totalString = ""; totalString = companyName + country + website + address + sme + categories + roles; var trimmedAndLowerInvariant = totalString.Trim().ToLowerInvariant(); var spaceRemoved = trimmedAndLowerInvariant.Replace(" ", ""); int hashValue = spaceRemoved.GetHashCode(); return(hashValue); }
private List <APICompany> ConvertToApiCompany( List <DataStreamCompany> dataStreamCompanies, List <SupplyChainRole> SupplyChainRoles, List <SupplyChainCategory> SuppleChainCategories) { var ApiCompanies = new List <APICompany>(); dataStreamCompanies.ForEach(item => { var company = new APICompany(); //Special for Country. { if (item.country.Equals("Netherlands")) { item.country = "The Netherlands"; } if (item.country.Equals("UK")) { item.country = "United Kingdom"; } } company.CompanyName = item.company_name; company.Country = item.country; company.Created = Convert.ToDateTime(item.last_updated.date); company.Website = item.website; company.SME = item.sme_status; company.CompanyDirectoryEntryReffNumber = item.entry_reference_number; company.Status = item.status; company.SupplyChainCategories = ConvertSupplyChainCategories(item.supply_chain_categories, SuppleChainCategories); company.SupplyChainRoles = ConvertSupplyChainRoles(item.supply_chain_roles, SupplyChainRoles); if (item.offices.Any()) { var office = item.offices.ElementAt(0); company.Address = office.address ?? ""; company.Latitude = Decimal.Parse(office.lat ?? "0"); company.Longitude = Decimal.Parse(office.lng ?? "0"); //company.Created = DateTime.Now; } else { company.Address = ""; company.Latitude = 0; company.Longitude = 0; //company.Created = DateTime.Now; } ApiCompanies.Add(company); }); return(ApiCompanies); }
public PotentialDiplicate(DataStreamCompany duplicate, DataStreamCompany match, APICompany match2) { this.Duplicate = duplicate; this.Match = match; this.Match2 = match2; }
/// <summary> /// First reduceses the datastream data by comparing it with itself and removing duplicates. /// Secund compares with apidata and again removes duplicates if stream data appears in api data. /// Special: If api data contains Reff key we know for sertain it originates from datastream /// wee want too keep those for update perpause. We dont consider the api company for matching if it contains reff key. /// </summary> /// <param name="dataStreamCompanies"></param> /// <param name="apiCompanies"></param> /// <returns></returns> private List <DataStreamCompany> FindPotentialDuplicates( List <DataStreamCompany> dataStreamCompanies, List <APICompany> apiCompanies) { var reducedDatastreamCompanies = new List <DataStreamCompany>(); var filteredDataStreamCompanies = new List <DataStreamCompany>(); foreach (var item in dataStreamCompanies) { var match = false; var matchItem = new DataStreamCompany(); //Check the item status. Skips the item if its not one of theses two. if (!item.status.Equals("Approved") && !item.status.Equals("Partner Added")) { continue; } //Filteres potential duplicated from datastreamcompanies via comparing it to itself. foreach (var x in dataStreamCompanies) { if (item.entry_reference_number.Equals(x.entry_reference_number)) { continue; } //if countries match wee will go to next step if (MatchCountries(x.country).Equals(MatchCountries(item.country))) { //If name is IN ANY WAY contined within another company we consider it a potential duplicate. //If true, Item is added to reduced and potentialduplicates. var xname = x.company_name.Trim().ToLowerInvariant(); var itemname = item.company_name.Trim().ToLowerInvariant(); if (xname.Contains(itemname)) { match = true; matchItem = x; break; } else { match = false; } } else { match = false; } } if (match == false) { reducedDatastreamCompanies.Add(item); } else { this._datastreamPotentialDuplicates.Add(new PotentialDiplicate(item, matchItem, null)); } } //filteres portential duplicates away by comparing the reduced list with companies from the API. //If y has Reff key. It origins from datasteam and we add it to filtered it because its difinitive the same and we might want to update it. //If y has no reff key and x does not appear in apiCompanies we add it to reduced reducedDatastreamCompanies.ForEach(x => { var xHash = StreamGenereateHashValue(x); var match = false; var skip = false; var matchItem = new APICompany(); foreach (var y in apiCompanies) { if (y.CompanyDirectoryEntryReffNumber == null || y.CompanyDirectoryEntryReffNumber.Equals("")) { if (x.company_name.ToLowerInvariant() == y.CompanyName.ToLowerInvariant()) { var hit = true; } //checks for exact match var yHash = APIGenereateHashValue(y); if (xHash == yHash) { y.CompanyDirectoryEntryReffNumber = x.entry_reference_number; this._exactHashMatch.Add(y); skip = true; break; } //if countries match wee will go to next step if (MatchCountries(x.country).Equals(MatchCountries(y.Country))) { //If name is IN ANY WAY contined within another company we consider it a potential duplicate. //If true, Item is added to reduced and potentialduplicates. var xname = x.company_name.Trim().ToLowerInvariant(); var yname = y.CompanyName.Trim().ToLowerInvariant(); if (xname.Contains(yname)) { match = true; matchItem = y; break; } else { match = false; } } else { match = false; } } else if (x.entry_reference_number.Equals(y.CompanyDirectoryEntryReffNumber)) { match = false; } } if (!skip)//we skip this if we have an exact math on hash value. { if (match == false) { filteredDataStreamCompanies.Add(x); } else { this._datastreamPotentialDuplicates.Add(new PotentialDiplicate(x, null, matchItem)); } } }); return(filteredDataStreamCompanies); }
/// <summary> /// Converts stream companies to API company format. /// </summary> /// <param name="streamCompanies">Commapny from stream</param> /// <returns>Formatet companies</returns> private List <APICompany> ConvertStreamCompanies( List <DataStreamCompany> streamCompanies, List <SupplyChainRole> SupplyChainroles, List <SupplyChainCategory> SuppleChainCategories ) { var convertedCompanies = new List <APICompany>(); var filteredStreamCompanies = PrintValues(streamCompanies); foreach (var item in filteredStreamCompanies) { //Check the item status. if (!item.status.Equals("Approved") && !item.status.Equals("Partner Added")) { continue; } var company = new APICompany(); //Special for Country. { if (item.country.Equals("Netherlands")) { item.country = "The Netherlands"; } if (item.country.Equals("UK")) { item.country = "United Kingdom"; } } company.CompanyName = item.company_name; company.Country = item.country; company.Website = item.website; company.SME = item.sme_status; company.CompanyDirectoryEntryReffNumber = item.entry_reference_number; company.Status = item.status; company.SupplyChainCategories = ConvertSupplyChainCategories(item.supply_chain_categories, SuppleChainCategories); company.SupplyChainRoles = ConvertSupplyChainRoles(item.supply_chain_roles, SupplyChainroles); if (item.offices.Any()) { //We are getting empty data this must be taken into consieration. var office = item.offices.ElementAt(0); try { company.Address = office.address; company.Latitude = Decimal.Parse(office.lat); company.Longitude = Decimal.Parse(office.lng); company.Created = DateTime.Now; } catch (Exception e) { //if(String.IsNullOrWhiteSpace(office.address)) // continue; company.Address = office.address; company.Latitude = 0; company.Longitude = 0; company.Created = DateTime.Now; } } else { company.Address = ""; company.Latitude = 0; company.Longitude = 0; company.Created = DateTime.Now; //continue; } convertedCompanies.Add(company); } Console.WriteLine(convertedCompanies.Count + " :convertedCompanies"); Console.ReadLine(); return(convertedCompanies); }