예제 #1
0
        public Model.EditQueueItem GetItemWithDifferences(Core.Data.EditQueueItem item, POIManager cpManager, bool loadCurrentItem)
        {
            var queueItem = Model.Extensions.EditQueueItem.FromDataModel(item);

            //get diff between previous and edit

            Model.ChargePoint poiA = DeserializePOIFromJSON(queueItem.PreviousData);

            if (loadCurrentItem && poiA != null)
            {
                poiA = new POIManager().Get(poiA.ID);
            }
            Model.ChargePoint poiB = DeserializePOIFromJSON(queueItem.EditData);

            queueItem.Differences = cpManager.CheckDifferences(poiA, poiB, useObjectCompare: true);

            return(queueItem);
        }
예제 #2
0
        public async Task <List <ChargePoint> > DeDuplicateList(List <ChargePoint> cpList, bool updateDuplicate, CoreReferenceData coreRefData, ImportReport report, bool allowDupeWithDifferentOperator = false, bool fetchExistingFromAPI = false)
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();

            var poiManager = new POIManager();

            //get list of all current POIs (in relevant countries) including most delisted ones
            int[] countryIds = (from poi in cpList
                                where poi.AddressInfo.Country != null
                                select poi.AddressInfo.Country.ID).Distinct().ToArray();

            APIRequestParams filters = new APIRequestParams {
                CountryIDs = countryIds, MaxResults = 1000000, EnableCaching = true, SubmissionStatusTypeID = 0
            };

            List <ChargePoint> masterListCollection = new List <ChargePoint>();

            if (fetchExistingFromAPI)
            {
                // fetch from API
                masterListCollection = await new OCMClient(false).GetLocations(new SearchFilters
                {
                    CountryIDs              = countryIds,
                    MaxResults              = 1000000,
                    EnableCaching           = true,
                    SubmissionStatusTypeIDs = new int[0]
                });
            }
            else
            {
                // use local database

                masterListCollection = poiManager.GetChargePoints(filters);
            }

            var spec = new i4o.IndexSpecification <ChargePoint>()
                       .Add(i => i.DataProviderID)
                       .Add(i => i.DataProvidersReference)
            ;

            var masterList = new i4o.IndexSet <ChargePoint>(masterListCollection, spec);

            List <ChargePoint> masterListCopy = new List <ChargePoint>();

            foreach (var tmp in masterList)
            {
                //fully copy of master list item so we have before/after
                masterListCopy.Add(JsonConvert.DeserializeObject <ChargePoint>(JsonConvert.SerializeObject(tmp)));
            }

            //if we failed to get a master list, quit with no result
            if (masterListCollection.Count == 0)
            {
                return(new List <ChargePoint>());
            }

            List <ChargePoint> duplicateList = new List <ChargePoint>();
            List <ChargePoint> updateList    = new List <ChargePoint>();

            ChargePoint previousCP = null;

            //for each item to be imported, deduplicate by adding to updateList only the items which we don't already haves
            var cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude);

            int poiProcessed = 0;
            int totalPOI     = cpListSortedByPos.Count();

            Stopwatch dupeIdentWatch = new Stopwatch();

            dupeIdentWatch.Start();

            foreach (var item in cpListSortedByPos)
            {
                var itemGeoPos = new System.Device.Location.GeoCoordinate(item.AddressInfo.Latitude, item.AddressInfo.Longitude);

                //item is duplicate if we already seem to have it based on Data Providers reference or approx position match
                var dupeList = masterList.Where(c =>
                                                (
                                                    // c.DataProvider != null &&
                                                    c.DataProviderID == item.DataProviderID && c.DataProvidersReference == item.DataProvidersReference) ||
                                                (c.AddressInfo.Title == item.AddressInfo.Title && c.AddressInfo.AddressLine1 == item.AddressInfo.AddressLine1 && c.AddressInfo.Postcode == item.AddressInfo.Postcode) ||
                                                (GeoManager.IsClose(c.AddressInfo.Latitude, c.AddressInfo.Longitude, item.AddressInfo.Latitude, item.AddressInfo.Longitude) && new System.Device.Location.GeoCoordinate(c.AddressInfo.Latitude, c.AddressInfo.Longitude).GetDistanceTo(itemGeoPos) < DUPLICATE_DISTANCE_METERS) //meters distance apart
                                                );

                if (dupeList.Any())
                {
                    if (updateDuplicate)
                    {
                        //if updating duplicates, get exact matching duplicate based on provider reference and update/merge with this item to update status/merge properties
                        var updatedItem = dupeList.FirstOrDefault(d => d.DataProviderID == (item.DataProvider != null ? item.DataProvider.ID : item.DataProviderID) && d.DataProvidersReference == item.DataProvidersReference);
                        if (updatedItem != null)
                        {
                            //only merge/update from live published items
                            if (updatedItem.SubmissionStatus.IsLive == (bool?)true ||
                                updatedItem.SubmissionStatus.ID == (int)StandardSubmissionStatusTypes.Delisted_RemovedByDataProvider ||
                                updatedItem.SubmissionStatus.ID == (int)StandardSubmissionStatusTypes.Delisted_NotPublicInformation)
                            {
                                //item is an exact match from same data provider
                                //overwrite existing with imported data (use import as master)
                                //updatedItem = poiManager.PreviewPopulatedPOIFromModel(updatedItem);
                                MergeItemChanges(item, updatedItem, false);

                                updateList.Add(updatedItem);
                            }
                        }

                        if (updatedItem == null)
                        {
                            //duplicates are not exact match
                            //TODO: resolve whether imported data should change duplicate

                            //merge new properties from imported item
                            //if (item.StatusType != null) updatedItem.StatusType = item.StatusType;
                            //updateList.Add(updatedItem);
                        }
                    }

                    //item has one or more likely duplicates, add it to list of items to remove
                    duplicateList.Add(item);
                }

                //mark item as duplicate if location/title exactly matches previous entry or lat/long is within DuplicateDistance meters

                if (previousCP != null)
                {
                    //this branch is the most expensive part of dedupe:
                    if (IsDuplicateLocation(item, previousCP, true))
                    {
                        if (!duplicateList.Contains(item))
                        {
                            if (allowDupeWithDifferentOperator && item.OperatorID != previousCP.OperatorID)
                            {
                                Log("Duplicated allowed due to different operator:" + item.AddressInfo.Title);
                            }
                            else
                            {
                                Log("Duplicated item removed:" + item.AddressInfo.Title);
                                duplicateList.Add(item);
                            }
                        }
                    }
                }

                previousCP = item;

                poiProcessed++;

                if (poiProcessed % 300 == 0)
                {
                    System.Diagnostics.Debug.WriteLine("Deduplication: " + poiProcessed + " processed of " + totalPOI);
                }
            }

            dupeIdentWatch.Stop();
            Log("De-dupe pass took " + dupeIdentWatch.Elapsed.TotalSeconds + " seconds. " + (dupeIdentWatch.Elapsed.TotalMilliseconds / cpList.Count) + "ms per item.");

            //remove duplicates from list to apply
            foreach (var dupe in duplicateList)
            {
                cpList.Remove(dupe);
            }

            Log("Duplicates removed from import:" + duplicateList.Count);

            //add updated items (replace duplicates with property changes)

            foreach (var updatedItem in updateList)
            {
                if (!cpList.Contains(updatedItem))
                {
                    cpList.Add(updatedItem);
                }
            }

            Log("Updated items to import:" + updateList.Count);

            //populate missing location info from geolocation cache if possible
            Stopwatch geoWatch = new Stopwatch();

            geoWatch.Start();
            PopulateLocationFromGeolocationCache(cpList, coreRefData);
            geoWatch.Stop();
            Log("Populate Country from Lat/Long took " + geoWatch.Elapsed.TotalSeconds + " seconds. " + (geoWatch.Elapsed.TotalMilliseconds / cpList.Count) + "ms per item.");

            //final pass to catch duplicates present in data source, mark additional items as Delisted Duplicate so we have a record for them
            var submissionStatusDelistedDupe = coreRefData.SubmissionStatusTypes.First(s => s.ID == 1001); //delisted duplicate

            previousCP = null;

            //sort current cp list by position again
            cpListSortedByPos = cpList.OrderBy(c => c.AddressInfo.Latitude).ThenBy(c => c.AddressInfo.Longitude);

            //mark any duplicates in final list as delisted duplicates (submitted to api)
            foreach (var cp in cpListSortedByPos)
            {
                bool isDuplicate = false;
                if (previousCP != null)
                {
                    isDuplicate = IsDuplicateLocation(cp, previousCP, false);
                    if (isDuplicate)
                    {
                        cp.SubmissionStatus       = submissionStatusDelistedDupe;
                        cp.SubmissionStatusTypeID = submissionStatusDelistedDupe.ID;
                        if (previousCP.ID > 0)
                        {
                            if (cp.GeneralComments == null)
                            {
                                cp.GeneralComments = "";
                            }
                            cp.GeneralComments    += " [Duplicate of OCM-" + previousCP.ID + "]";
                            cp.ParentChargePointID = previousCP.ID;
                        }
                    }
                }

                if (!isDuplicate)
                {
                    previousCP = cp;
                }
            }

            report.Added      = cpListSortedByPos.Where(cp => cp.ID == 0).ToList();
            report.Updated    = cpListSortedByPos.Where(cp => cp.ID > 0).ToList();
            report.Duplicates = duplicateList; //TODO: add additional pass of duplicates from above

            //determine which POIs in our master list are no longer referenced in the import
            report.Delisted = masterList.Where(cp => cp.DataProviderID == report.ProviderDetails.DataProviderID && cp.SubmissionStatus != null && (cp.SubmissionStatus.IsLive == true || cp.SubmissionStatusTypeID == (int)StandardSubmissionStatusTypes.Imported_UnderReview) &&
                                               !cpListSortedByPos.Any(master => master.ID == cp.ID) && !report.Duplicates.Any(master => master.ID == cp.ID) &&
                                               cp.UserComments == null && cp.MediaItems == null).ToList();
            //safety check to ensure we're not delisting items just because we have incomplete import data:
            if (cpList.Count < 50)// || (report.Delisted.Count > cpList.Count))
            {
                report.Delisted = new List <ChargePoint>();
            }

            //determine list of low quality POIs (incomplete address info etc)
            report.LowDataQuality = new List <ChargePoint>();
            report.LowDataQuality.AddRange(GetLowDataQualityPOIs(report.Added));
            report.LowDataQuality.AddRange(GetLowDataQualityPOIs(report.Updated));

            Log("Removing " + report.LowDataQuality.Count + " low quality POIs from added/updated");

            //remove references in added/updated to any low quality POIs
            foreach (var p in report.LowDataQuality)
            {
                report.Added.Remove(p);
            }
            foreach (var p in report.LowDataQuality)
            {
                report.Updated.Remove(p);
            }

            //remove updates which only change datelaststatusupdate
            var updatesToIgnore = new List <ChargePoint>();

            foreach (var poi in report.Updated)
            {
                var origPOI     = masterListCopy.FirstOrDefault(p => p.ID == poi.ID);
                var updatedPOI  = poiManager.PreviewPopulatedPOIFromModel(poi);
                var differences = poiManager.CheckDifferences(origPOI, updatedPOI);
                differences.RemoveAll(d => d.Context == ".MetadataValues");
                differences.RemoveAll(d => d.Context == ".DateLastStatusUpdate");
                differences.RemoveAll(d => d.Context == ".UUID");

                differences.RemoveAll(d => d.Context == ".DataProvider.DateLastImported");
                differences.RemoveAll(d => d.Context == ".IsRecentlyVerified");
                differences.RemoveAll(d => d.Context == ".DateLastVerified");
                differences.RemoveAll(d => d.Context == ".UserComments");
                differences.RemoveAll(d => d.Context == ".MediaItems");

                if (!differences.Any())
                {
                    updatesToIgnore.Add(poi);
                }
                else
                {
                    //differences exist
                    CompareLogic compareLogic = new CompareLogic();
                    compareLogic.Config.MaxDifferences           = 100;
                    compareLogic.Config.IgnoreObjectTypes        = false;
                    compareLogic.Config.IgnoreUnknownObjectTypes = true;
                    compareLogic.Config.CompareChildren          = true;
                    ComparisonResult result = compareLogic.Compare(origPOI, updatedPOI);

                    var diffReport = new KellermanSoftware.CompareNetObjects.Reports.UserFriendlyReport();
                    result.Differences.RemoveAll(d => d.PropertyName == ".MetadataValues");
                    result.Differences.RemoveAll(d => d.PropertyName == ".DateLastStatusUpdate");
                    result.Differences.RemoveAll(d => d.PropertyName == ".UUID");
                    result.Differences.RemoveAll(d => d.PropertyName == ".DataProvider.DateLastImported");
                    result.Differences.RemoveAll(d => d.PropertyName == ".IsRecentlyVerified");
                    result.Differences.RemoveAll(d => d.PropertyName == ".DateLastVerified");
                    result.Differences.RemoveAll(d => d.PropertyName == ".UserComments");
                    result.Differences.RemoveAll(d => d.PropertyName == ".MediaItems");
                    System.Diagnostics.Debug.WriteLine("Difference:" + diffReport.OutputString(result.Differences));

                    if (!result.Differences.Any())
                    {
                        updatesToIgnore.Add(poi);
                    }
                }
            }

            foreach (var p in updatesToIgnore)
            {
                if (report.Unchanged == null)
                {
                    report.Unchanged = new List <ChargePoint>();
                }
                report.Unchanged.Add(p);
                report.Updated.Remove(p);
            }

            //TODO: if POi is a duplicate ensure imported data provider reference/URL  is included as reference metadata in OCM's version of the POI

            stopWatch.Stop();
            Log("Deduplicate List took " + stopWatch.Elapsed.TotalSeconds + " seconds");

            //return final processed list ready for applying as insert/updates
            return(cpListSortedByPos.ToList());
        }