public Model.EditQueueItem GetItemWithDifferences(Core.Data.EditQueueItem item, POIManager cpManager, bool loadCurrentItem) { var queueItem = Model.Extensions.EditQueueItem.FromDataModel(item); //get diff between previous and edit Model.ChargePoint poiA = DeserializePOIFromJSON(queueItem.PreviousData); if (loadCurrentItem && poiA != null) { poiA = new POIManager().Get(poiA.ID); } Model.ChargePoint poiB = DeserializePOIFromJSON(queueItem.EditData); queueItem.Differences = cpManager.CheckDifferences(poiA, poiB, useObjectCompare: true); return(queueItem); }
public async Task <List <ChargePoint> > DeDuplicateList(List <ChargePoint> cpList, bool updateDuplicate, CoreReferenceData coreRefData, ImportReport report, bool allowDupeWithDifferentOperator = false, bool fetchExistingFromAPI = false) { var stopWatch = new Stopwatch(); stopWatch.Start(); var poiManager = new POIManager(); //get list of all current POIs (in relevant countries) including most delisted ones int[] countryIds = (from poi in cpList where poi.AddressInfo.Country != null select poi.AddressInfo.Country.ID).Distinct().ToArray(); APIRequestParams filters = new APIRequestParams { CountryIDs = countryIds, MaxResults = 1000000, EnableCaching = true, SubmissionStatusTypeID = 0 }; List <ChargePoint> masterListCollection = new List <ChargePoint>(); if (fetchExistingFromAPI) { // fetch from API masterListCollection = await new OCMClient(false).GetLocations(new SearchFilters { CountryIDs = countryIds, MaxResults = 1000000, EnableCaching = true, SubmissionStatusTypeIDs = new int[0] }); } else { // use local database masterListCollection = poiManager.GetChargePoints(filters); } var spec = new i4o.IndexSpecification <ChargePoint>() .Add(i => i.DataProviderID) .Add(i => i.DataProvidersReference) ; var masterList = new i4o.IndexSet <ChargePoint>(masterListCollection, spec); List <ChargePoint> masterListCopy = new List <ChargePoint>(); foreach (var tmp in masterList) { //fully copy of master list item so we have before/after masterListCopy.Add(JsonConvert.DeserializeObject <ChargePoint>(JsonConvert.SerializeObject(tmp))); } //if we failed to get a master list, quit with no result if (masterListCollection.Count == 0) { return(new List <ChargePoint>()); } List <ChargePoint> duplicateList = new List <ChargePoint>(); List <ChargePoint> updateList = new List <ChargePoint>(); ChargePoint previousCP = null; //for each item to be imported, deduplicate by adding to updateList only the items which we don't already haves var cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude); int poiProcessed = 0; int totalPOI = cpListSortedByPos.Count(); Stopwatch dupeIdentWatch = new Stopwatch(); dupeIdentWatch.Start(); foreach (var item in cpListSortedByPos) { var itemGeoPos = new System.Device.Location.GeoCoordinate(item.AddressInfo.Latitude, item.AddressInfo.Longitude); //item is duplicate if we already seem to have it based on Data Providers reference or approx position match var dupeList = masterList.Where(c => ( // c.DataProvider != null && c.DataProviderID == item.DataProviderID && c.DataProvidersReference == item.DataProvidersReference) || (c.AddressInfo.Title == item.AddressInfo.Title && c.AddressInfo.AddressLine1 == item.AddressInfo.AddressLine1 && c.AddressInfo.Postcode == item.AddressInfo.Postcode) || (GeoManager.IsClose(c.AddressInfo.Latitude, c.AddressInfo.Longitude, item.AddressInfo.Latitude, item.AddressInfo.Longitude) && new System.Device.Location.GeoCoordinate(c.AddressInfo.Latitude, c.AddressInfo.Longitude).GetDistanceTo(itemGeoPos) < DUPLICATE_DISTANCE_METERS) //meters distance apart ); if (dupeList.Any()) { if (updateDuplicate) { //if updating duplicates, get exact matching duplicate based on provider reference and update/merge with this item to update status/merge properties var updatedItem = dupeList.FirstOrDefault(d => d.DataProviderID == (item.DataProvider != null ? item.DataProvider.ID : item.DataProviderID) && d.DataProvidersReference == item.DataProvidersReference); if (updatedItem != null) { //only merge/update from live published items if (updatedItem.SubmissionStatus.IsLive == (bool?)true || updatedItem.SubmissionStatus.ID == (int)StandardSubmissionStatusTypes.Delisted_RemovedByDataProvider || updatedItem.SubmissionStatus.ID == (int)StandardSubmissionStatusTypes.Delisted_NotPublicInformation) { //item is an exact match from same data provider //overwrite existing with imported data (use import as master) //updatedItem = poiManager.PreviewPopulatedPOIFromModel(updatedItem); MergeItemChanges(item, updatedItem, false); updateList.Add(updatedItem); } } if (updatedItem == null) { //duplicates are not exact match //TODO: resolve whether imported data should change duplicate //merge new properties from imported item //if (item.StatusType != null) updatedItem.StatusType = item.StatusType; //updateList.Add(updatedItem); } } //item has one or more likely duplicates, add it to list of items to remove duplicateList.Add(item); } //mark item as duplicate if location/title exactly matches previous entry or lat/long is within DuplicateDistance meters if (previousCP != null) { //this branch is the most expensive part of dedupe: if (IsDuplicateLocation(item, previousCP, true)) { if (!duplicateList.Contains(item)) { if (allowDupeWithDifferentOperator && item.OperatorID != previousCP.OperatorID) { Log("Duplicated allowed due to different operator:" + item.AddressInfo.Title); } else { Log("Duplicated item removed:" + item.AddressInfo.Title); duplicateList.Add(item); } } } } previousCP = item; poiProcessed++; if (poiProcessed % 300 == 0) { System.Diagnostics.Debug.WriteLine("Deduplication: " + poiProcessed + " processed of " + totalPOI); } } dupeIdentWatch.Stop(); Log("De-dupe pass took " + dupeIdentWatch.Elapsed.TotalSeconds + " seconds. " + (dupeIdentWatch.Elapsed.TotalMilliseconds / cpList.Count) + "ms per item."); //remove duplicates from list to apply foreach (var dupe in duplicateList) { cpList.Remove(dupe); } Log("Duplicates removed from import:" + duplicateList.Count); //add updated items (replace duplicates with property changes) foreach (var updatedItem in updateList) { if (!cpList.Contains(updatedItem)) { cpList.Add(updatedItem); } } Log("Updated items to import:" + updateList.Count); //populate missing location info from geolocation cache if possible Stopwatch geoWatch = new Stopwatch(); geoWatch.Start(); PopulateLocationFromGeolocationCache(cpList, coreRefData); geoWatch.Stop(); Log("Populate Country from Lat/Long took " + geoWatch.Elapsed.TotalSeconds + " seconds. " + (geoWatch.Elapsed.TotalMilliseconds / cpList.Count) + "ms per item."); //final pass to catch duplicates present in data source, mark additional items as Delisted Duplicate so we have a record for them var submissionStatusDelistedDupe = coreRefData.SubmissionStatusTypes.First(s => s.ID == 1001); //delisted duplicate previousCP = null; //sort current cp list by position again cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude); //mark any duplicates in final list as delisted duplicates (submitted to api) foreach (var cp in cpListSortedByPos) { bool isDuplicate = false; if (previousCP != null) { isDuplicate = IsDuplicateLocation(cp, previousCP, false); if (isDuplicate) { cp.SubmissionStatus = submissionStatusDelistedDupe; cp.SubmissionStatusTypeID = submissionStatusDelistedDupe.ID; if (previousCP.ID > 0) { if (cp.GeneralComments == null) { cp.GeneralComments = ""; } cp.GeneralComments += " [Duplicate of OCM-" + previousCP.ID + "]"; cp.ParentChargePointID = previousCP.ID; } } } if (!isDuplicate) { previousCP = cp; } } report.Added = cpListSortedByPos.Where(cp => cp.ID == 0).ToList(); report.Updated = cpListSortedByPos.Where(cp => cp.ID > 0).ToList(); report.Duplicates = duplicateList; //TODO: add additional pass of duplicates from above //determine which POIs in our master list are no longer referenced in the import report.Delisted = masterList.Where(cp => cp.DataProviderID == report.ProviderDetails.DataProviderID && cp.SubmissionStatus != null && (cp.SubmissionStatus.IsLive == true || cp.SubmissionStatusTypeID == (int)StandardSubmissionStatusTypes.Imported_UnderReview) && !cpListSortedByPos.Any(master => master.ID == cp.ID) && !report.Duplicates.Any(master => master.ID == cp.ID) && cp.UserComments == null && cp.MediaItems == null).ToList(); //safety check to ensure we're not delisting items just because we have incomplete import data: if (cpList.Count < 50)// || (report.Delisted.Count > cpList.Count)) { report.Delisted = new List <ChargePoint>(); } //determine list of low quality POIs (incomplete address info etc) report.LowDataQuality = new List <ChargePoint>(); report.LowDataQuality.AddRange(GetLowDataQualityPOIs(report.Added)); report.LowDataQuality.AddRange(GetLowDataQualityPOIs(report.Updated)); Log("Removing " + report.LowDataQuality.Count + " low quality POIs from added/updated"); //remove references in added/updated to any low quality POIs foreach (var p in report.LowDataQuality) { report.Added.Remove(p); } foreach (var p in report.LowDataQuality) { report.Updated.Remove(p); } //remove updates which only change datelaststatusupdate var updatesToIgnore = new List <ChargePoint>(); foreach (var poi in report.Updated) { var origPOI = masterListCopy.FirstOrDefault(p => p.ID == poi.ID); var updatedPOI = poiManager.PreviewPopulatedPOIFromModel(poi); var differences = poiManager.CheckDifferences(origPOI, updatedPOI); differences.RemoveAll(d => d.Context == ".MetadataValues"); differences.RemoveAll(d => d.Context == ".DateLastStatusUpdate"); differences.RemoveAll(d => d.Context == ".UUID"); differences.RemoveAll(d => d.Context == ".DataProvider.DateLastImported"); differences.RemoveAll(d => d.Context == ".IsRecentlyVerified"); differences.RemoveAll(d => d.Context == ".DateLastVerified"); differences.RemoveAll(d => d.Context == ".UserComments"); differences.RemoveAll(d => d.Context == ".MediaItems"); if (!differences.Any()) { updatesToIgnore.Add(poi); } else { //differences exist CompareLogic compareLogic = new CompareLogic(); compareLogic.Config.MaxDifferences = 100; compareLogic.Config.IgnoreObjectTypes = false; compareLogic.Config.IgnoreUnknownObjectTypes = true; compareLogic.Config.CompareChildren = true; ComparisonResult result = compareLogic.Compare(origPOI, updatedPOI); var diffReport = new KellermanSoftware.CompareNetObjects.Reports.UserFriendlyReport(); result.Differences.RemoveAll(d => d.PropertyName == ".MetadataValues"); result.Differences.RemoveAll(d => d.PropertyName == ".DateLastStatusUpdate"); result.Differences.RemoveAll(d => d.PropertyName == ".UUID"); result.Differences.RemoveAll(d => d.PropertyName == ".DataProvider.DateLastImported"); result.Differences.RemoveAll(d => d.PropertyName == ".IsRecentlyVerified"); result.Differences.RemoveAll(d => d.PropertyName == ".DateLastVerified"); result.Differences.RemoveAll(d => d.PropertyName == ".UserComments"); result.Differences.RemoveAll(d => d.PropertyName == ".MediaItems"); System.Diagnostics.Debug.WriteLine("Difference:" + diffReport.OutputString(result.Differences)); if (!result.Differences.Any()) { updatesToIgnore.Add(poi); } } } foreach (var p in updatesToIgnore) { if (report.Unchanged == null) { report.Unchanged = new List <ChargePoint>(); } report.Unchanged.Add(p); report.Updated.Remove(p); } //TODO: if POi is a duplicate ensure imported data provider reference/URL is included as reference metadata in OCM's version of the POI stopWatch.Stop(); Log("Deduplicate List took " + stopWatch.Elapsed.TotalSeconds + " seconds"); //return final processed list ready for applying as insert/updates return(cpListSortedByPos.ToList()); }