/// <summary> /// For a list of ChargePoint objects, attempt to populate the AddressInfo (at least country) /// based on lat/lon if not already populated /// </summary> /// <param name="itemList"> </param> /// <param name="coreRefData"> </param> public void UpdateImportedPOIList(ImportReport poiResults, User user) { var submissionManager = new SubmissionManager(); int itemCount = 1; foreach (var newPOI in poiResults.Added) { Log("Importing New POI " + itemCount + ": " + newPOI.AddressInfo.ToString()); submissionManager.PerformPOISubmission(newPOI, user, false); } foreach (var updatedPOI in poiResults.Updated) { Log("Importing Updated POI " + itemCount + ": " + updatedPOI.AddressInfo.ToString()); submissionManager.PerformPOISubmission(updatedPOI, user, performCacheRefresh: false, disablePOISuperseding: true); } foreach (var delisted in poiResults.Delisted) { Log("Delisting Removed POI " + itemCount + ": " + delisted.AddressInfo.ToString()); delisted.SubmissionStatus = null; delisted.SubmissionStatusTypeID = (int)StandardSubmissionStatusTypes.Delisted_RemovedByDataProvider; submissionManager.PerformPOISubmission(delisted, user, false); } //refresh POI cache var cacheTask = Task.Run(async() => { return(await OCM.Core.Data.CacheManager.RefreshCachedData()); }); cacheTask.Wait(); //temp get all providers references for recognised duplicates /*var dupeRefs = from dupes in poiResults.Duplicates * where !String.IsNullOrEmpty(dupes.DataProvidersReference) * select dupes.DataProvidersReference; * string dupeOutput = ""; * foreach(var d in dupeRefs) * { * dupeOutput += ",'"+d+"'"; * } * System.Diagnostics.Debug.WriteLine(dupeOutput);*/ if (poiResults.ProviderDetails.DefaultDataProvider != null) { //update date last updated for this provider new DataProviderManager().UpdateDateLastImport(poiResults.ProviderDetails.DefaultDataProvider.ID); } }
/** * Generic Import Process * * Provider Properties * Import Method * Import URL/Path * Import Frequency * IsMaster * * Fetch Latest Data * * For each item * Check If Exists or Strong Duplicate, Get ID * If New, Add * if Exists Then * Prepare update, if provider supports live status, set that * What if item updated manually on OCM? * Send Update * End * Loop * * Log Exceptions * Log Count of Items Added or Modified * * Way to remove item (or log items) which no longer exist in data source? * */ public async Task <List <ChargePoint> > DeDuplicateList(List <ChargePoint> cpList, bool updateDuplicate, CoreReferenceData coreRefData, ImportReport report) { //get list of all current POIs (in relevant countries) including most delisted ones int[] countryIds = (from poi in cpList where poi.AddressInfo.Country != null select poi.AddressInfo.Country.ID).Distinct().ToArray(); SearchFilters filters = new SearchFilters { CountryIDs = countryIds, MaxResults = 1000000, EnableCaching = false, SubmissionStatusTypeIDs = new int[1] { 0 } }; List <ChargePoint> masterList = await new OCMClient(IsSandboxedAPIMode).GetLocations(filters); //new OCMClient().FindSimilar(null, 10000); //fetch all charge points regardless of status //if we failed to get a master list, quit with no result if (masterList.Count == 0) { return(new List <ChargePoint>()); } List <ChargePoint> duplicateList = new List <ChargePoint>(); List <ChargePoint> updateList = new List <ChargePoint>(); ChargePoint previousCP = null; //for each item to be imported, deduplicate by adding to updateList only the items which we don't already have var cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude); foreach (var item in cpListSortedByPos) { var itemGeoPos = new System.Device.Location.GeoCoordinate(item.AddressInfo.Latitude, item.AddressInfo.Longitude); //item is duplicate if we already seem to have it based on Data Providers reference or approx position match var dupeList = masterList.Where(c => (c.DataProvider != null && c.DataProvider.ID == item.DataProvider.ID && c.DataProvidersReference == item.DataProvidersReference) || new System.Device.Location.GeoCoordinate(c.AddressInfo.Latitude, c.AddressInfo.Longitude).GetDistanceTo(itemGeoPos) < DUPLICATE_DISTANCE_METERS //meters distance apart ); if (dupeList.Count() > 0) { if (updateDuplicate) { //if updating duplicates, get exact matching duplicate based on provider reference and update/merge with this item to update status/merge properties var updatedItem = dupeList.FirstOrDefault(d => d.DataProvider.ID == item.DataProvider.ID && d.DataProvidersReference == item.DataProvidersReference); if (updatedItem != null) { //only merge/update from live published items if (updatedItem.SubmissionStatus.IsLive == (bool?)true) { //item is an exact match from same data provider //overwrite existing with imported data (use import as master) MergeItemChanges(item, updatedItem, false); updateList.Add(updatedItem); } } if (updatedItem == null) { //duplicates are not exact match //TODO: resolve whether imported data should change duplicate //merge new properties from imported item //if (item.StatusType != null) updatedItem.StatusType = item.StatusType; //updateList.Add(updatedItem); } } //item has one or more likely duplicates, add it to list of items to remove duplicateList.Add(item); } //mark item as duplicate if location/title exactly matches previous entry or lat/long is within DuplicateDistance meters if (previousCP != null) { if (IsDuplicateLocation(item, previousCP, true)) { if (!duplicateList.Contains(item)) { System.Diagnostics.Debug.WriteLine("Duplicated item removed:" + item.AddressInfo.Title); duplicateList.Add(item); } } } previousCP = item; } //remove duplicates from list to apply foreach (var dupe in duplicateList) { cpList.Remove(dupe); } System.Diagnostics.Debug.WriteLine("Duplicate removed from import:" + duplicateList.Count); //add updated items (replace duplicates with property changes) foreach (var updatedItem in updateList) { if (!cpList.Contains(updatedItem)) { cpList.Add(updatedItem); } } System.Diagnostics.Debug.WriteLine("Updated items to import:" + updateList.Count); //populate missing location info from geolocation cache if possible PopulateLocationFromGeolocationCache(cpList, coreRefData); //final pass to catch duplicates present in data source, mark additional items as Delisted Duplicate so we have a record for them var submissionStatusDelistedDupe = coreRefData.SubmissionStatusTypes.First(s => s.ID == 1001); //delisted duplicate previousCP = null; //sort current cp list by position again cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude); //mark any duplicates in final list as delisted duplicates (submitted to api) foreach (var cp in cpListSortedByPos) { bool isDuplicate = false; if (previousCP != null) { isDuplicate = IsDuplicateLocation(cp, previousCP, false); if (isDuplicate) { cp.SubmissionStatus = submissionStatusDelistedDupe; cp.SubmissionStatusTypeID = submissionStatusDelistedDupe.ID; if (previousCP.ID > 0) { if (cp.GeneralComments == null) { cp.GeneralComments = ""; } cp.GeneralComments += " [Duplicate of OCM-" + previousCP.ID + "]"; cp.ParentChargePointID = previousCP.ID; } } } if (!isDuplicate) { previousCP = cp; } } report.Duplicates = duplicateList; //TODO: add additional pass of duplicates ffrom above //return final processed list ready for applying as insert/updates return(cpList); }
public async Task <ImportReport> PerformImport(ExportType exportType, bool fetchLiveData, APICredentials credentials, CoreReferenceData coreRefData, string outputPath, IImportProvider provider) { var p = ((BaseImportProvider)provider); p.ExportType = exportType; ImportReport resultReport = new ImportReport(); resultReport.ProviderDetails = p; try { bool loadOK = false; if (fetchLiveData && p.IsAutoRefreshed && !String.IsNullOrEmpty(p.AutoRefreshURL)) { p.Log("Loading input data from URL.."); loadOK = p.LoadInputFromURL(p.AutoRefreshURL); } else { if (p.IsStringData) { p.Log("Loading input data from file.."); loadOK = p.LoadInputFromFile(p.InputPath); } else { //binary streams pass as OK by default loadOK = true; } } if (!loadOK) { //failed to load p.Log("Failed to load input data."); throw new Exception("Failed to fetch input data"); } List <ChargePoint> duplicatesList = new List <ChargePoint>(); p.Log("Processing input.."); var list = provider.Process(coreRefData); int numAdded = 0; int numUpdated = 0; if (list.Count > 0) { p.Log("De-Deuplicating list (" + p.ProviderName + ":: " + list.Count + " Items).."); //de-duplicate and clean list based on existing data //TODO: take original and replace in final update list, setting relevant updated properties (merge) and status var finalList = await DeDuplicateList(list, true, coreRefData, resultReport); //var finalList = list; if (ImportUpdatesOnly) { finalList = finalList.Where(l => l.ID > 0).ToList(); } //finalList = client.GetLocations(new SearchFilters { MaxResults = 10000 }); //export/apply updates if (p.ExportType == ExportType.XML) { p.Log("Exporting XML.."); //output xml p.ExportXMLFile(finalList, outputPath + p.OutputNamePrefix + ".xml"); } if (p.ExportType == ExportType.CSV) { p.Log("Exporting CSV.."); //output csv p.ExportCSVFile(finalList, outputPath + p.OutputNamePrefix + ".csv"); } if (p.ExportType == ExportType.JSON) { p.Log("Exporting JSON.."); //output json p.ExportJSONFile(finalList, outputPath + p.OutputNamePrefix + ".json"); } if (p.ExportType == ExportType.API && p.IsProductionReady) { //publish list of locations to OCM via API OCMClient ocmClient = new OCMClient(IsSandboxedAPIMode); p.Log("Publishing via API.."); foreach (ChargePoint cp in finalList.Where(l => l.AddressInfo.Country != null)) { ocmClient.UpdateItem(cp, credentials); if (cp.ID == 0) { numAdded++; } else { numUpdated++; } } } if (p.ExportType == ExportType.POIModelList) { resultReport.Added = finalList.Where(cp => cp.ID == 0).ToList(); resultReport.Updated = finalList.Where(cp => cp.ID > 0).ToList(); } } p.Log("Import Processed:" + provider.GetProviderName() + " Added:" + numAdded + " Updated:" + numUpdated); } catch (Exception exp) { p.Log("Import Failed:" + provider.GetProviderName() + " ::" + exp.ToString()); } return(resultReport); }
public async Task <ImportReport> PerformImport(ExportType exportType, bool fetchLiveData, APICredentials credentials, CoreReferenceData coreRefData, string outputPath, IImportProvider provider, bool cacheInputData, bool fetchExistingFromAPI = false) { var p = ((BaseImportProvider)provider); p.ExportType = exportType; ImportReport resultReport = new ImportReport(); resultReport.ProviderDetails = p; try { bool loadOK = false; if (p.ImportInitialisationRequired && p is IImportProviderWithInit) { ((IImportProviderWithInit)provider).InitImportProvider(); } if (fetchLiveData && p.IsAutoRefreshed && !String.IsNullOrEmpty(p.AutoRefreshURL)) { Log("Loading input data from URL.."); loadOK = p.LoadInputFromURL(p.AutoRefreshURL); } else { if (p.IsStringData && !p.UseCustomReader) { Log("Loading input data from file.."); loadOK = p.LoadInputFromFile(p.InputPath); } else { //binary streams pass as OK by default loadOK = true; } } if (!loadOK) { //failed to load Log("Failed to load input data."); throw new Exception("Failed to fetch input data"); } else { if (fetchLiveData && cacheInputData) { //save input data p.SaveInputFile(p.InputPath); } } List <ChargePoint> duplicatesList = new List <ChargePoint>(); Log("Processing input.."); var list = provider.Process(coreRefData); int numAdded = 0; int numUpdated = 0; if (list.Count > 0) { if (p.MergeDuplicatePOIEquipment) { Log("Merging Equipment from Duplicate POIs"); list = MergeDuplicatePOIEquipment(list); } if (!p.IncludeInvalidPOIs) { Log("Cleaning invalid POIs"); var invalidPOIs = new List <ChargePoint>(); foreach (var poi in list) { if (!BaseImportProvider.IsPOIValidForImport(poi)) { invalidPOIs.Add(poi); } } foreach (var poi in invalidPOIs) { list.Remove(poi); } } GC.Collect(); List <ChargePoint> finalList = new List <ChargePoint>(); if (!p.SkipDeduplication) { Log("De-Deuplicating list (" + p.ProviderName + ":: " + list.Count + " Items).."); //de-duplicate and clean list based on existing data //TODO: take original and replace in final update list, setting relevant updated properties (merge) and status finalList = await DeDuplicateList(list.ToList(), true, coreRefData, resultReport, p.AllowDuplicatePOIWithDifferentOperator, fetchExistingFromAPI); //var finalList = list; } else { //skip deduplication finalList = list.ToList(); } if (ImportUpdatesOnly) { finalList = finalList.Where(l => l.ID > 0).ToList(); } //finalList = client.GetLocations(new SearchFilters { MaxResults = 10000 }); GC.Collect(); //export/apply updates if (p.ExportType == ExportType.XML) { Log("Exporting XML.."); //output xml p.ExportXMLFile(finalList, outputPath + p.OutputNamePrefix + ".xml"); } if (p.ExportType == ExportType.CSV) { Log("Exporting CSV.."); //output csv p.ExportCSVFile(finalList, outputPath + p.OutputNamePrefix + ".csv"); } if (p.ExportType == ExportType.JSON) { Log("Exporting JSON.."); //output json p.ExportJSONFile(finalList, outputPath + p.OutputNamePrefix + ".json"); } if (p.ExportType == ExportType.API && p.IsProductionReady) { //publish list of locations to OCM via API OCMClient ocmClient = new OCMClient(IsSandboxedAPIMode); Log("Publishing via API.."); foreach (ChargePoint cp in finalList.Where(l => l.AddressInfo.Country != null)) { ocmClient.UpdateItem(cp, credentials); if (cp.ID == 0) { numAdded++; } else { numUpdated++; } } } if (p.ExportType == ExportType.POIModelList) { //result report contains POI lists } } Log("Import Processed:" + provider.GetProviderName() + " Added:" + numAdded + " Updated:" + numUpdated); } catch (Exception exp) { Log("Import Failed:" + provider.GetProviderName() + " ::" + exp.ToString()); } resultReport.Log = ""; resultReport.Log += p.ProcessingLog; resultReport.Log += ImportLog; return(resultReport); }
public async Task <List <ChargePoint> > DeDuplicateList(List <ChargePoint> cpList, bool updateDuplicate, CoreReferenceData coreRefData, ImportReport report, bool allowDupeWithDifferentOperator = false, bool fetchExistingFromAPI = false) { var stopWatch = new Stopwatch(); stopWatch.Start(); var poiManager = new POIManager(); //get list of all current POIs (in relevant countries) including most delisted ones int[] countryIds = (from poi in cpList where poi.AddressInfo.Country != null select poi.AddressInfo.Country.ID).Distinct().ToArray(); APIRequestParams filters = new APIRequestParams { CountryIDs = countryIds, MaxResults = 1000000, EnableCaching = true, SubmissionStatusTypeID = 0 }; List <ChargePoint> masterListCollection = new List <ChargePoint>(); if (fetchExistingFromAPI) { // fetch from API masterListCollection = await new OCMClient(false).GetLocations(new SearchFilters { CountryIDs = countryIds, MaxResults = 1000000, EnableCaching = true, SubmissionStatusTypeIDs = new int[0] }); } else { // use local database masterListCollection = poiManager.GetChargePoints(filters); } var spec = new i4o.IndexSpecification <ChargePoint>() .Add(i => i.DataProviderID) .Add(i => i.DataProvidersReference) ; var masterList = new i4o.IndexSet <ChargePoint>(masterListCollection, spec); List <ChargePoint> masterListCopy = new List <ChargePoint>(); foreach (var tmp in masterList) { //fully copy of master list item so we have before/after masterListCopy.Add(JsonConvert.DeserializeObject <ChargePoint>(JsonConvert.SerializeObject(tmp))); } //if we failed to get a master list, quit with no result if (masterListCollection.Count == 0) { return(new List <ChargePoint>()); } List <ChargePoint> duplicateList = new List <ChargePoint>(); List <ChargePoint> updateList = new List <ChargePoint>(); ChargePoint previousCP = null; //for each item to be imported, deduplicate by adding to updateList only the items which we don't already haves var cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude); int poiProcessed = 0; int totalPOI = cpListSortedByPos.Count(); Stopwatch dupeIdentWatch = new Stopwatch(); dupeIdentWatch.Start(); foreach (var item in cpListSortedByPos) { var itemGeoPos = new System.Device.Location.GeoCoordinate(item.AddressInfo.Latitude, item.AddressInfo.Longitude); //item is duplicate if we already seem to have it based on Data Providers reference or approx position match var dupeList = masterList.Where(c => ( // c.DataProvider != null && c.DataProviderID == item.DataProviderID && c.DataProvidersReference == item.DataProvidersReference) || (c.AddressInfo.Title == item.AddressInfo.Title && c.AddressInfo.AddressLine1 == item.AddressInfo.AddressLine1 && c.AddressInfo.Postcode == item.AddressInfo.Postcode) || (GeoManager.IsClose(c.AddressInfo.Latitude, c.AddressInfo.Longitude, item.AddressInfo.Latitude, item.AddressInfo.Longitude) && new System.Device.Location.GeoCoordinate(c.AddressInfo.Latitude, c.AddressInfo.Longitude).GetDistanceTo(itemGeoPos) < DUPLICATE_DISTANCE_METERS) //meters distance apart ); if (dupeList.Any()) { if (updateDuplicate) { //if updating duplicates, get exact matching duplicate based on provider reference and update/merge with this item to update status/merge properties var updatedItem = dupeList.FirstOrDefault(d => d.DataProviderID == (item.DataProvider != null ? item.DataProvider.ID : item.DataProviderID) && d.DataProvidersReference == item.DataProvidersReference); if (updatedItem != null) { //only merge/update from live published items if (updatedItem.SubmissionStatus.IsLive == (bool?)true || updatedItem.SubmissionStatus.ID == (int)StandardSubmissionStatusTypes.Delisted_RemovedByDataProvider || updatedItem.SubmissionStatus.ID == (int)StandardSubmissionStatusTypes.Delisted_NotPublicInformation) { //item is an exact match from same data provider //overwrite existing with imported data (use import as master) //updatedItem = poiManager.PreviewPopulatedPOIFromModel(updatedItem); MergeItemChanges(item, updatedItem, false); updateList.Add(updatedItem); } } if (updatedItem == null) { //duplicates are not exact match //TODO: resolve whether imported data should change duplicate //merge new properties from imported item //if (item.StatusType != null) updatedItem.StatusType = item.StatusType; //updateList.Add(updatedItem); } } //item has one or more likely duplicates, add it to list of items to remove duplicateList.Add(item); } //mark item as duplicate if location/title exactly matches previous entry or lat/long is within DuplicateDistance meters if (previousCP != null) { //this branch is the most expensive part of dedupe: if (IsDuplicateLocation(item, previousCP, true)) { if (!duplicateList.Contains(item)) { if (allowDupeWithDifferentOperator && item.OperatorID != previousCP.OperatorID) { Log("Duplicated allowed due to different operator:" + item.AddressInfo.Title); } else { Log("Duplicated item removed:" + item.AddressInfo.Title); duplicateList.Add(item); } } } } previousCP = item; poiProcessed++; if (poiProcessed % 300 == 0) { System.Diagnostics.Debug.WriteLine("Deduplication: " + poiProcessed + " processed of " + totalPOI); } } dupeIdentWatch.Stop(); Log("De-dupe pass took " + dupeIdentWatch.Elapsed.TotalSeconds + " seconds. " + (dupeIdentWatch.Elapsed.TotalMilliseconds / cpList.Count) + "ms per item."); //remove duplicates from list to apply foreach (var dupe in duplicateList) { cpList.Remove(dupe); } Log("Duplicates removed from import:" + duplicateList.Count); //add updated items (replace duplicates with property changes) foreach (var updatedItem in updateList) { if (!cpList.Contains(updatedItem)) { cpList.Add(updatedItem); } } Log("Updated items to import:" + updateList.Count); //populate missing location info from geolocation cache if possible Stopwatch geoWatch = new Stopwatch(); geoWatch.Start(); PopulateLocationFromGeolocationCache(cpList, coreRefData); geoWatch.Stop(); Log("Populate Country from Lat/Long took " + geoWatch.Elapsed.TotalSeconds + " seconds. " + (geoWatch.Elapsed.TotalMilliseconds / cpList.Count) + "ms per item."); //final pass to catch duplicates present in data source, mark additional items as Delisted Duplicate so we have a record for them var submissionStatusDelistedDupe = coreRefData.SubmissionStatusTypes.First(s => s.ID == 1001); //delisted duplicate previousCP = null; //sort current cp list by position again cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude); //mark any duplicates in final list as delisted duplicates (submitted to api) foreach (var cp in cpListSortedByPos) { bool isDuplicate = false; if (previousCP != null) { isDuplicate = IsDuplicateLocation(cp, previousCP, false); if (isDuplicate) { cp.SubmissionStatus = submissionStatusDelistedDupe; cp.SubmissionStatusTypeID = submissionStatusDelistedDupe.ID; if (previousCP.ID > 0) { if (cp.GeneralComments == null) { cp.GeneralComments = ""; } cp.GeneralComments += " [Duplicate of OCM-" + previousCP.ID + "]"; cp.ParentChargePointID = previousCP.ID; } } } if (!isDuplicate) { previousCP = cp; } } report.Added = cpListSortedByPos.Where(cp => cp.ID == 0).ToList(); report.Updated = cpListSortedByPos.Where(cp => cp.ID > 0).ToList(); report.Duplicates = duplicateList; //TODO: add additional pass of duplicates from above //determine which POIs in our master list are no longer referenced in the import report.Delisted = masterList.Where(cp => cp.DataProviderID == report.ProviderDetails.DataProviderID && cp.SubmissionStatus != null && (cp.SubmissionStatus.IsLive == true || cp.SubmissionStatusTypeID == (int)StandardSubmissionStatusTypes.Imported_UnderReview) && !cpListSortedByPos.Any(master => master.ID == cp.ID) && !report.Duplicates.Any(master => master.ID == cp.ID) && cp.UserComments == null && cp.MediaItems == null).ToList(); //safety check to ensure we're not delisting items just because we have incomplete import data: if (cpList.Count < 50)// || (report.Delisted.Count > cpList.Count)) { report.Delisted = new List <ChargePoint>(); } //determine list of low quality POIs (incomplete address info etc) report.LowDataQuality = new List <ChargePoint>(); report.LowDataQuality.AddRange(GetLowDataQualityPOIs(report.Added)); report.LowDataQuality.AddRange(GetLowDataQualityPOIs(report.Updated)); Log("Removing " + report.LowDataQuality.Count + " low quality POIs from added/updated"); //remove references in added/updated to any low quality POIs foreach (var p in report.LowDataQuality) { report.Added.Remove(p); } foreach (var p in report.LowDataQuality) { report.Updated.Remove(p); } //remove updates which only change datelaststatusupdate var updatesToIgnore = new List <ChargePoint>(); foreach (var poi in report.Updated) { var origPOI = masterListCopy.FirstOrDefault(p => p.ID == poi.ID); var updatedPOI = poiManager.PreviewPopulatedPOIFromModel(poi); var differences = poiManager.CheckDifferences(origPOI, updatedPOI); differences.RemoveAll(d => d.Context == ".MetadataValues"); differences.RemoveAll(d => d.Context == ".DateLastStatusUpdate"); differences.RemoveAll(d => d.Context == ".UUID"); differences.RemoveAll(d => d.Context == ".DataProvider.DateLastImported"); differences.RemoveAll(d => d.Context == ".IsRecentlyVerified"); differences.RemoveAll(d => d.Context == ".DateLastVerified"); differences.RemoveAll(d => d.Context == ".UserComments"); differences.RemoveAll(d => d.Context == ".MediaItems"); if (!differences.Any()) { updatesToIgnore.Add(poi); } else { //differences exist CompareLogic compareLogic = new CompareLogic(); compareLogic.Config.MaxDifferences = 100; compareLogic.Config.IgnoreObjectTypes = false; compareLogic.Config.IgnoreUnknownObjectTypes = true; compareLogic.Config.CompareChildren = true; ComparisonResult result = compareLogic.Compare(origPOI, updatedPOI); var diffReport = new KellermanSoftware.CompareNetObjects.Reports.UserFriendlyReport(); result.Differences.RemoveAll(d => d.PropertyName == ".MetadataValues"); result.Differences.RemoveAll(d => d.PropertyName == ".DateLastStatusUpdate"); result.Differences.RemoveAll(d => d.PropertyName == ".UUID"); result.Differences.RemoveAll(d => d.PropertyName == ".DataProvider.DateLastImported"); result.Differences.RemoveAll(d => d.PropertyName == ".IsRecentlyVerified"); result.Differences.RemoveAll(d => d.PropertyName == ".DateLastVerified"); result.Differences.RemoveAll(d => d.PropertyName == ".UserComments"); result.Differences.RemoveAll(d => d.PropertyName == ".MediaItems"); System.Diagnostics.Debug.WriteLine("Difference:" + diffReport.OutputString(result.Differences)); if (!result.Differences.Any()) { updatesToIgnore.Add(poi); } } } foreach (var p in updatesToIgnore) { if (report.Unchanged == null) { report.Unchanged = new List <ChargePoint>(); } report.Unchanged.Add(p); report.Updated.Remove(p); } //TODO: if POi is a duplicate ensure imported data provider reference/URL is included as reference metadata in OCM's version of the POI stopWatch.Stop(); Log("Deduplicate List took " + stopWatch.Elapsed.TotalSeconds + " seconds"); //return final processed list ready for applying as insert/updates return(cpListSortedByPos.ToList()); }
/// <summary> /// For a list of ChargePoint objects, attempt to populate the AddressInfo (at least country) based on lat/lon if not already populated /// </summary> /// <param name="itemList"></param> /// <param name="coreRefData"></param> public void UpdateImportedPOIList(ImportReport poiResults, User user) { var submissionManager = new SubmissionManager(); int itemCount = 1; foreach (var newPOI in poiResults.Added) { Log("Importing New POI " + itemCount + ": " + newPOI.AddressInfo.ToString()); submissionManager.PerformPOISubmission(newPOI, user, false); } foreach (var updatedPOI in poiResults.Updated) { Log("Importing Updated POI " + itemCount + ": " + updatedPOI.AddressInfo.ToString()); submissionManager.PerformPOISubmission(updatedPOI, user, performCacheRefresh: false, disablePOISuperseding: true); } foreach (var delisted in poiResults.Delisted) { Log("Delisting Removed POI " + itemCount + ": " + delisted.AddressInfo.ToString()); delisted.SubmissionStatus = null; delisted.SubmissionStatusTypeID = (int)StandardSubmissionStatusTypes.Delisted_RemovedByDataProvider; submissionManager.PerformPOISubmission(delisted, user, false); } //refresh POI cache var cacheTask = Task.Run(async () => { return await OCM.Core.Data.CacheManager.RefreshCachedData(); }); cacheTask.Wait(); //temp get all providers references for recognised duplicates /*var dupeRefs = from dupes in poiResults.Duplicates where !String.IsNullOrEmpty(dupes.DataProvidersReference) select dupes.DataProvidersReference; string dupeOutput = ""; foreach(var d in dupeRefs) { dupeOutput += ",'"+d+"'"; } System.Diagnostics.Debug.WriteLine(dupeOutput);*/ if (poiResults.ProviderDetails.DefaultDataProvider != null) { //update date last updated for this provider new DataProviderManager().UpdateDateLastImport(poiResults.ProviderDetails.DefaultDataProvider.ID); } }
public async Task<ImportReport> PerformImport(ExportType exportType, bool fetchLiveData, APICredentials credentials, CoreReferenceData coreRefData, string outputPath, IImportProvider provider, bool cacheInputData) { var p = ((BaseImportProvider)provider); p.ExportType = exportType; ImportReport resultReport = new ImportReport(); resultReport.ProviderDetails = p; try { bool loadOK = false; if (p.ImportInitialisationRequired && p is IImportProviderWithInit) { ((IImportProviderWithInit)provider).InitImportProvider(); } if (fetchLiveData && p.IsAutoRefreshed && !String.IsNullOrEmpty(p.AutoRefreshURL)) { Log("Loading input data from URL.."); loadOK = p.LoadInputFromURL(p.AutoRefreshURL); } else { if (p.IsStringData && !p.UseCustomReader) { Log("Loading input data from file.."); loadOK = p.LoadInputFromFile(p.InputPath); } else { //binary streams pass as OK by default loadOK = true; } } if (!loadOK) { //failed to load Log("Failed to load input data."); throw new Exception("Failed to fetch input data"); } else { if (fetchLiveData && cacheInputData) { //save input data p.SaveInputFile(p.InputPath); } } List<ChargePoint> duplicatesList = new List<ChargePoint>(); Log("Processing input.."); var list = provider.Process(coreRefData); int numAdded = 0; int numUpdated = 0; if (list.Count > 0) { if (p.MergeDuplicatePOIEquipment) { Log("Merging Equipment from Duplicate POIs"); list = MergeDuplicatePOIEquipment(list); } if (!p.IncludeInvalidPOIs) { Log("Cleaning invalid POIs"); var invalidPOIs = new List<ChargePoint>(); foreach (var poi in list) { if (!BaseImportProvider.IsPOIValidForImport(poi)) { invalidPOIs.Add(poi); } } foreach (var poi in invalidPOIs) { list.Remove(poi); } } List<ChargePoint> finalList = new List<ChargePoint>(); if (!p.SkipDeduplication) { Log("De-Deuplicating list (" + p.ProviderName + ":: " + list.Count + " Items).."); //de-duplicate and clean list based on existing data //TODO: take original and replace in final update list, setting relevant updated properties (merge) and status finalList = await DeDuplicateList(list.ToList(), true, coreRefData, resultReport, p.AllowDuplicatePOIWithDifferentOperator); //var finalList = list; } else { //skip deduplication finalList = list.ToList(); } if (ImportUpdatesOnly) { finalList = finalList.Where(l => l.ID > 0).ToList(); } //finalList = client.GetLocations(new SearchFilters { MaxResults = 10000 }); //export/apply updates if (p.ExportType == ExportType.XML) { Log("Exporting XML.."); //output xml p.ExportXMLFile(finalList, outputPath + p.OutputNamePrefix + ".xml"); } if (p.ExportType == ExportType.CSV) { Log("Exporting CSV.."); //output csv p.ExportCSVFile(finalList, outputPath + p.OutputNamePrefix + ".csv"); } if (p.ExportType == ExportType.JSON) { Log("Exporting JSON.."); //output json p.ExportJSONFile(finalList, outputPath + p.OutputNamePrefix + ".json"); } if (p.ExportType == ExportType.API && p.IsProductionReady) { //publish list of locations to OCM via API OCMClient ocmClient = new OCMClient(IsSandboxedAPIMode); Log("Publishing via API.."); foreach (ChargePoint cp in finalList.Where(l => l.AddressInfo.Country != null)) { ocmClient.UpdateItem(cp, credentials); if (cp.ID == 0) { numAdded++; } else { numUpdated++; } } } if (p.ExportType == ExportType.POIModelList) { //result report contains POI lists } } Log("Import Processed:" + provider.GetProviderName() + " Added:" + numAdded + " Updated:" + numUpdated); } catch (Exception exp) { Log("Import Failed:" + provider.GetProviderName() + " ::" + exp.ToString()); } resultReport.Log = ""; resultReport.Log += p.ProcessingLog; resultReport.Log += ImportLog; return resultReport; }
/** * Generic Import Process Provider Properties Import Method Import URL/Path Import Frequency IsMaster Fetch Latest Data For each item Check If Exists or Strong Duplicate, Get ID If New, Add if Exists Then Prepare update, if provider supports live status, set that What if item updated manually on OCM? Send Update End Loop Log Exceptions Log Count of Items Added or Modified Way to remove item (or log items) which no longer exist in data source? * */ public async Task<List<ChargePoint>> DeDuplicateList(List<ChargePoint> cpList, bool updateDuplicate, CoreReferenceData coreRefData, ImportReport report, bool allowDupeWithDifferentOperator = false) { var stopWatch = new Stopwatch(); stopWatch.Start(); //get list of all current POIs (in relevant countries) including most delisted ones int[] countryIds = (from poi in cpList where poi.AddressInfo.Country != null select poi.AddressInfo.Country.ID).Distinct().ToArray(); APIRequestParams filters = new APIRequestParams { CountryIDs = countryIds, MaxResults = 1000000, EnableCaching = false, SubmissionStatusTypeID = 0 }; //List<ChargePoint> masterList = await new OCMClient(IsSandboxedAPIMode).GetLocations(filters); //new OCMClient().FindSimilar(null, 10000); //fetch all charge points regardless of status var poiManager = new POIManager(); List<ChargePoint> masterListCollection = poiManager.GetChargePoints(filters); //new OCMClient().FindSimilar(null, 10000); //fetch all charge points regardless of status var spec = new i4o.IndexSpecification<ChargePoint>() .Add(i => i.DataProviderID) .Add(i => i.DataProvidersReference) ; var masterList = new i4o.IndexSet<ChargePoint>(masterListCollection, spec); List<ChargePoint> masterListCopy = new List<ChargePoint>(); foreach (var tmp in masterList) { //fully copy of master list item so we have before/after masterListCopy.Add(JsonConvert.DeserializeObject<ChargePoint>(JsonConvert.SerializeObject(tmp))); } //if we failed to get a master list, quit with no result if (masterListCollection.Count == 0) return new List<ChargePoint>(); List<ChargePoint> duplicateList = new List<ChargePoint>(); List<ChargePoint> updateList = new List<ChargePoint>(); ChargePoint previousCP = null; //for each item to be imported, deduplicate by adding to updateList only the items which we don't already haves var cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude); int poiProcessed = 0; int totalPOI = cpListSortedByPos.Count(); Stopwatch dupeIdentWatch = new Stopwatch(); dupeIdentWatch.Start(); foreach (var item in cpListSortedByPos) { var itemGeoPos = new System.Device.Location.GeoCoordinate(item.AddressInfo.Latitude, item.AddressInfo.Longitude); //item is duplicate if we already seem to have it based on Data Providers reference or approx position match var dupeList = masterList.Where(c => (c.DataProvider != null && c.DataProviderID == item.DataProviderID && c.DataProvidersReference == item.DataProvidersReference) || (c.AddressInfo.Title == item.AddressInfo.Title && c.AddressInfo.AddressLine1 == item.AddressInfo.AddressLine1 && c.AddressInfo.Postcode == item.AddressInfo.Postcode) || (GeoManager.IsClose(c.AddressInfo.Latitude, c.AddressInfo.Longitude, item.AddressInfo.Latitude, item.AddressInfo.Longitude) && new System.Device.Location.GeoCoordinate(c.AddressInfo.Latitude, c.AddressInfo.Longitude).GetDistanceTo(itemGeoPos) < DUPLICATE_DISTANCE_METERS) //meters distance apart ); if (dupeList.Any()) { if (updateDuplicate) { //if updating duplicates, get exact matching duplicate based on provider reference and update/merge with this item to update status/merge properties var updatedItem = dupeList.FirstOrDefault(d => d.DataProviderID == (item.DataProvider != null ? item.DataProvider.ID : item.DataProviderID) && d.DataProvidersReference == item.DataProvidersReference); if (updatedItem != null) { //only merge/update from live published items if (updatedItem.SubmissionStatus.IsLive == (bool?)true || updatedItem.SubmissionStatus.ID == (int)StandardSubmissionStatusTypes.Delisted_RemovedByDataProvider || updatedItem.SubmissionStatus.ID == (int)StandardSubmissionStatusTypes.Delisted_NotPublicInformation) { //item is an exact match from same data provider //overwrite existing with imported data (use import as master) //updatedItem = poiManager.PreviewPopulatedPOIFromModel(updatedItem); MergeItemChanges(item, updatedItem, false); updateList.Add(updatedItem); } } if (updatedItem == null) { //duplicates are not exact match //TODO: resolve whether imported data should change duplicate //merge new properties from imported item //if (item.StatusType != null) updatedItem.StatusType = item.StatusType; //updateList.Add(updatedItem); } } //item has one or more likely duplicates, add it to list of items to remove duplicateList.Add(item); } //mark item as duplicate if location/title exactly matches previous entry or lat/long is within DuplicateDistance meters if (previousCP != null) { //this branch is the most expensive part of dedupe: if (IsDuplicateLocation(item, previousCP, true)) { if (!duplicateList.Contains(item)) { if (allowDupeWithDifferentOperator && item.OperatorID != previousCP.OperatorID) { Log("Duplicated allowed due to different operator:" + item.AddressInfo.Title); } else { Log("Duplicated item removed:" + item.AddressInfo.Title); duplicateList.Add(item); } } } } previousCP = item; poiProcessed++; if (poiProcessed % 300 == 0) { System.Diagnostics.Debug.WriteLine("Deduplication: " + poiProcessed + " processed of " + totalPOI); } } dupeIdentWatch.Stop(); Log("De-dupe pass took " + dupeIdentWatch.Elapsed.TotalSeconds + " seconds. " + (dupeIdentWatch.Elapsed.TotalMilliseconds / cpList.Count) + "ms per item."); //remove duplicates from list to apply foreach (var dupe in duplicateList) { cpList.Remove(dupe); } Log("Duplicates removed from import:" + duplicateList.Count); //add updated items (replace duplicates with property changes) foreach (var updatedItem in updateList) { if (!cpList.Contains(updatedItem)) { cpList.Add(updatedItem); } } Log("Updated items to import:" + updateList.Count); //populate missing location info from geolocation cache if possible Stopwatch geoWatch = new Stopwatch(); geoWatch.Start(); PopulateLocationFromGeolocationCache(cpList, coreRefData); geoWatch.Stop(); Log("Populate Country from Lat/Long took " + geoWatch.Elapsed.TotalSeconds + " seconds. " + (geoWatch.Elapsed.TotalMilliseconds / cpList.Count) + "ms per item."); //final pass to catch duplicates present in data source, mark additional items as Delisted Duplicate so we have a record for them var submissionStatusDelistedDupe = coreRefData.SubmissionStatusTypes.First(s => s.ID == 1001); //delisted duplicate previousCP = null; //sort current cp list by position again cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude); //mark any duplicates in final list as delisted duplicates (submitted to api) foreach (var cp in cpListSortedByPos) { bool isDuplicate = false; if (previousCP != null) { isDuplicate = IsDuplicateLocation(cp, previousCP, false); if (isDuplicate) { cp.SubmissionStatus = submissionStatusDelistedDupe; cp.SubmissionStatusTypeID = submissionStatusDelistedDupe.ID; if (previousCP.ID > 0) { if (cp.GeneralComments == null) cp.GeneralComments = ""; cp.GeneralComments += " [Duplicate of OCM-" + previousCP.ID + "]"; cp.ParentChargePointID = previousCP.ID; } } } if (!isDuplicate) { previousCP = cp; } } report.Added = cpListSortedByPos.Where(cp => cp.ID == 0).ToList(); report.Updated = cpListSortedByPos.Where(cp => cp.ID > 0).ToList(); report.Duplicates = duplicateList; //TODO: add additional pass of duplicates from above //determine which POIs in our master list are no longer referenced in the import report.Delisted = masterList.Where(cp => cp.DataProviderID == report.ProviderDetails.DataProviderID && cp.SubmissionStatus != null && (cp.SubmissionStatus.IsLive == true || cp.SubmissionStatusTypeID == (int)StandardSubmissionStatusTypes.Imported_UnderReview) && !cpListSortedByPos.Any(master => master.ID == cp.ID) && !report.Duplicates.Any(master => master.ID == cp.ID) && cp.UserComments == null && cp.MediaItems == null).ToList(); //safety check to ensure we're not delisting items just because we have incomplete import data: if (cpList.Count < 50)// || (report.Delisted.Count > cpList.Count)) { report.Delisted = new List<ChargePoint>(); } //determine list of low quality POIs (incomplete address info etc) report.LowDataQuality = new List<ChargePoint>(); report.LowDataQuality.AddRange(GetLowDataQualityPOIs(report.Added)); report.LowDataQuality.AddRange(GetLowDataQualityPOIs(report.Updated)); Log("Removing " + report.LowDataQuality.Count + " low quality POIs from added/updated"); //remove references in added/updated to any low quality POIs foreach (var p in report.LowDataQuality) { report.Added.Remove(p); } foreach (var p in report.LowDataQuality) { report.Updated.Remove(p); } //remove updates which only change datelaststatusupdate var updatesToIgnore = new List<ChargePoint>(); foreach (var poi in report.Updated) { var origPOI = masterListCopy.FirstOrDefault(p => p.ID == poi.ID); var updatedPOI = poiManager.PreviewPopulatedPOIFromModel(poi); var differences = poiManager.CheckDifferences(origPOI, updatedPOI); differences.RemoveAll(d => d.Context == ".MetadataValues"); differences.RemoveAll(d => d.Context == ".DateLastStatusUpdate"); differences.RemoveAll(d => d.Context == ".UUID"); differences.RemoveAll(d => d.Context == ".DataProvider.DateLastImported"); differences.RemoveAll(d => d.Context == ".IsRecentlyVerified"); differences.RemoveAll(d => d.Context == ".DateLastVerified"); differences.RemoveAll(d => d.Context == ".UserComments"); differences.RemoveAll(d => d.Context == ".MediaItems"); if (!differences.Any()) { updatesToIgnore.Add(poi); } else { //differences exist CompareLogic compareLogic = new CompareLogic(); compareLogic.Config.MaxDifferences = 100; compareLogic.Config.IgnoreObjectTypes = false; compareLogic.Config.IgnoreUnknownObjectTypes = true; compareLogic.Config.CompareChildren = true; ComparisonResult result = compareLogic.Compare(origPOI, updatedPOI); var diffReport = new KellermanSoftware.CompareNetObjects.Reports.UserFriendlyReport(); result.Differences.RemoveAll(d => d.PropertyName == ".MetadataValues"); result.Differences.RemoveAll(d => d.PropertyName == ".DateLastStatusUpdate"); result.Differences.RemoveAll(d => d.PropertyName == ".UUID"); result.Differences.RemoveAll(d => d.PropertyName == ".DataProvider.DateLastImported"); result.Differences.RemoveAll(d => d.PropertyName == ".IsRecentlyVerified"); result.Differences.RemoveAll(d => d.PropertyName == ".DateLastVerified"); result.Differences.RemoveAll(d => d.PropertyName == ".UserComments"); result.Differences.RemoveAll(d => d.PropertyName == ".MediaItems"); System.Diagnostics.Debug.WriteLine("Difference:" + diffReport.OutputString(result.Differences)); if (!result.Differences.Any()) { updatesToIgnore.Add(poi); } } } foreach (var p in updatesToIgnore) { if (report.Unchanged == null) report.Unchanged = new List<ChargePoint>(); report.Unchanged.Add(p); report.Updated.Remove(p); } //TODO: if POi is a duplicate ensure imported data provider reference/URL is included as reference metadata in OCM's version of the POI stopWatch.Stop(); Log("Deduplicate List took " + stopWatch.Elapsed.TotalSeconds + " seconds"); //return final processed list ready for applying as insert/updates return cpListSortedByPos.ToList(); }