public void that_there_are_no_duplicates_with_distinct_venues()
        {
            var lst = new List <Venue>
            {
                new Venue
                {
                    ID        = Guid.Empty.ToString(),
                    Address1  = "address1",
                    VenueName = "venue1",
                    PostCode  = "postcode1"
                },
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "address2",
                    VenueName = "venue2",
                    PostCode  = "postcode2"
                },
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "address3",
                    VenueName = "venue3",
                    PostCode  = "postcode3"
                }
            };

            var comp         = new VenueEqualityComparer();
            var uniqueGroups = lst.GroupBy(x => x, comp);

            uniqueGroups.Should().HaveCount(3);
        }
        public void that_there_are_two_distinct_addresses()
        {
            var lst = new List <Venue>
            {
                new Venue
                {
                    ID        = Guid.Empty.ToString(),
                    Address1  = "address1",
                    VenueName = "test",
                    PostCode  = "test"
                },
                new Venue
                {
                    ID        = Guid.Empty.ToString(),
                    Address1  = "test",
                    VenueName = "test",
                    PostCode  = "test"
                },
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "test",
                    VenueName = "test",
                    PostCode  = "test"
                }
            };

            var comp         = new VenueEqualityComparer();
            var uniqueGroups = lst.GroupBy(x => x, comp);

            uniqueGroups.Should().HaveCount(2);
        }
        public void can_group_by_single_venue()
        {
            var lst = new List <Venue>
            {
                new Venue
                {
                    ID        = Guid.Empty.ToString(),
                    Address1  = "address1",
                    VenueName = "venue1",
                    PostCode  = "postcode1"
                }
            };

            var comp         = new VenueEqualityComparer();
            var uniqueGroups = lst.GroupBy(x => x, comp);

            uniqueGroups.Should().HaveCount(1);
        }
        public void that_can_groupby_address_and_ukprn()
        {
            //lst1 - duplicate addresses different ukprn are grouped
            var lstDifferentUKPRN = new List <Venue>
            {
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "address1",
                    VenueName = "address1",
                    PostCode  = "postcode1",
                    UKPRN     = 1
                },
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "address1",
                    VenueName = "address1",
                    PostCode  = "postcode1",
                    UKPRN     = 2
                },
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "address1",
                    VenueName = "address1",
                    PostCode  = "postcode1",
                    UKPRN     = 3
                }
            };

            //lst2 - duplicate addresses for same ukprn are grouped
            var lst2SameUKPRN = new List <Venue>
            {
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "address1",
                    VenueName = "address1",
                    PostCode  = "postcode1",
                    UKPRN     = 1
                },
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "address1",
                    VenueName = "address1",
                    PostCode  = "postcode1",
                    UKPRN     = 1
                },
                new Venue
                {
                    ID        = Guid.NewGuid().ToString(),
                    Address1  = "address1",
                    VenueName = "address1",
                    PostCode  = "postcode1",
                    UKPRN     = 3
                }
            };

            var comp = new VenueEqualityComparer();
            var uniqueGroupsDifferentUKPRN = lstDifferentUKPRN.GroupBy(x => x, comp);
            var uniqueGroupsSameUKPRN      = lst2SameUKPRN.GroupBy(x => x, comp);

            uniqueGroupsDifferentUKPRN.Should().HaveCount(3);
            uniqueGroupsSameUKPRN.Should().HaveCount(2);
        }
예제 #5
0
        public static async Task Run(
            string input,  // Work around https://github.com/Azure/azure-functions-vs-build-sdk/issues/168
            [Inject] IConfigurationRoot configuration,
            [Inject] ICosmosDbHelper cosmosDbHelper,
            [Inject] IBlobStorageHelper blobHelper,
            [Inject] ILoggerFactory loggerFactory)
        {
            var whitelistFileName          = "ProviderWhiteList.txt";
            var venuesCollectionId         = "venues";
            var coursesCollectionId        = "courses";
            var apprenticeshipCollectionId = "apprenticeship";
            var blobContainer  = configuration["BlobStorageSettings:Container"];
            var databaseId     = configuration["CosmosDbSettings:DatabaseId"];
            var documentClient = cosmosDbHelper.GetClient();
            var updatedBy      = "ArchiveVenues";
            var logger         = loggerFactory.CreateLogger(typeof(ArchiveCourses));
            var whitelist      = await GetProviderWhiteList();

            var venueCollectionUri          = UriFactory.CreateDocumentCollectionUri(databaseId, venuesCollectionId);
            var coursesCollectionUri        = UriFactory.CreateDocumentCollectionUri(databaseId, coursesCollectionId);
            var apprenticeshipCollectionUri = UriFactory.CreateDocumentCollectionUri(databaseId, apprenticeshipCollectionId);
            var totalArchived = 0;
            var totalCoursesReferencingOldVenue      = 0;
            var totalApprenticeshipReferenceoldVenue = 0;

            using (var logStream = new MemoryStream())
                using (var logStreamWriter = new StreamWriter(logStream))
                    using (var logCsvWriter = new CsvWriter(logStreamWriter, CultureInfo.InvariantCulture))
                    {
                        // archived venues
                        logCsvWriter.WriteField("UKPRN");
                        logCsvWriter.WriteField("Archived VenueId");
                        logCsvWriter.WriteField("Archived Venue Name");
                        logCsvWriter.WriteField("Archived Venue Address1");
                        logCsvWriter.WriteField("New VenueId");
                        logCsvWriter.WriteField("New Venue Name");
                        logCsvWriter.WriteField("New Venue Address1");
                        logCsvWriter.WriteField("Course Run Id");
                        logCsvWriter.WriteField("ApprenticeshipLocation Id");
                        logCsvWriter.WriteField("Message");
                        logCsvWriter.WriteField("Type");

                        logCsvWriter.NextRecord();

                        foreach (var ukprn in whitelist)
                        {
                            try
                            {
                                int totalArchivedForProvider = 0;
                                var allVenuesForProvider     = await GetVenues(ukprn);

                                var allCoursesForProvider = await GetCourses(ukprn);

                                var allApprenticeshipsForProvider = await GetApprenticeships(ukprn);

                                //identify duplicates
                                var comp         = new VenueEqualityComparer();
                                var uniqueGroups = allVenuesForProvider.GroupBy(x => x, comp);

                                //archive duplicate venues
                                foreach (var item in uniqueGroups)
                                {
                                    //tribal venues & trival locations when venues were migrated, both locations and & venues from tribal
                                    //were migrated as seperate records even though the address was the same. The below attempts to merge the two.
                                    var migratedVenues      = item.ToList().Where(x => x.CreatedBy == "VenueMigrator" && x.UpdatedBy != updatedBy); //expecting more than one here.
                                    var tribalLocationVenue = migratedVenues.FirstOrDefault(x => x.LocationId != null);                             //Migrated Location
                                    var tribalVenue         = migratedVenues.FirstOrDefault(x => x.VenueID != 0);                                   //Migrated Venue
                                    var currentVenue        = MergeVenue(tribalLocationVenue, tribalVenue, out string venueType);

                                    //If there is no current venue, it means that either the venue was created by a previous migration
                                    //e.g. CreatedBy != VenueMigrator and all Venues must be Archived
                                    //OR
                                    //the Archiver has already archived this group e.g. UpdatedBy == "ArchiveVenues" and therefore we skip changing
                                    //this record to archived, as the duplicates have already been removed.
                                    if (currentVenue == null)
                                    {
                                        var venuesNotMigratedByMigrationProcess = item.ToList();
                                        foreach (var archivingVenue in venuesNotMigratedByMigrationProcess)
                                        {
                                            //only archive venues that haven't already been processed by archiveVenues function
                                            if (archivingVenue.UpdatedBy != updatedBy)
                                            {
                                                await ArchiveVenue(archivingVenue, ukprn);

                                                logCsvWriter.WriteField(ukprn);
                                                logCsvWriter.WriteField(archivingVenue.ID);
                                                logCsvWriter.WriteField(archivingVenue.VenueName);
                                                logCsvWriter.WriteField($"{archivingVenue.Address1},{archivingVenue.Address2}, {archivingVenue.PostCode}");
                                                logCsvWriter.WriteField("");
                                                logCsvWriter.WriteField("");
                                                logCsvWriter.WriteField("");
                                                logCsvWriter.WriteField("");
                                                logCsvWriter.WriteField(""); //ApprenticeshipLocationId
                                                logCsvWriter.WriteField($"All old Venues archived, there were {venuesNotMigratedByMigrationProcess.Count()} duplicate Venues.");
                                                logCsvWriter.WriteField("Venue");
                                                logCsvWriter.NextRecord();
                                            }
                                        }

                                        //continue to next ukprn as per the above logic
                                        continue;
                                    }

                                    var nonCurrentVenues = item.ToList().Where(x => x.ID != currentVenue.ID).ToList(); // All venues that will be archived

                                    //if there is a location venue & venue, add venue to list of non current venues
                                    //and update the currentVenue to indicate it has been merged.
                                    if (venueType == "Both")
                                    {
                                        nonCurrentVenues.Add(tribalVenue);

                                        await ReplaceMergedREcord(currentVenue);
                                    }

                                    //courses that have course runs with old venue references.
                                    var courseRunsOldVenue = allCoursesForProvider.Where(p => p.CourseRuns.Any(x => nonCurrentVenues.Where(y => Guid.Parse(y.ID) == x.VenueId).Count() > 0)).ToList();
                                    totalCoursesReferencingOldVenue += courseRunsOldVenue.Count();

                                    //apprenticeships that have locations with old venue refe
                                    var apprenticeshipsOldVenue = allApprenticeshipsForProvider.Where(p => p.ApprenticeshipLocations.Any(x => nonCurrentVenues.Where(y => Guid.Parse(y.ID) == x.LocationGuidId).Count() > 0)).ToList();
                                    totalApprenticeshipReferenceoldVenue += apprenticeshipsOldVenue.Count();

                                    Console.WriteLine($"Archiving {nonCurrentVenues.Count()} - {ukprn} - {currentVenue.Address1}");

                                    //handle archiving venue
                                    foreach (var archivingVenue in nonCurrentVenues)
                                    {
                                        await ArchiveVenue(archivingVenue, ukprn);

                                        logCsvWriter.WriteField(ukprn);
                                        logCsvWriter.WriteField(archivingVenue.ID);
                                        logCsvWriter.WriteField(archivingVenue.VenueName);
                                        logCsvWriter.WriteField($"{archivingVenue.Address1},{archivingVenue.Address2}, {archivingVenue.PostCode}");
                                        logCsvWriter.WriteField(currentVenue.ID);
                                        logCsvWriter.WriteField(currentVenue.VenueName);
                                        logCsvWriter.WriteField($"{currentVenue.Address1},{currentVenue.Address2}, {currentVenue.PostCode}");
                                        logCsvWriter.WriteField("");
                                        logCsvWriter.WriteField(""); //ApprenticeshipLocationId
                                        logCsvWriter.WriteField($"There were {nonCurrentVenues.Count()} duplicate Venues");
                                        logCsvWriter.WriteField("Venue");
                                        logCsvWriter.NextRecord();

                                        totalArchived++;
                                        totalArchivedForProvider++;

                                        //update courses that reference old venues
                                        foreach (var course in courseRunsOldVenue)
                                        {
                                            //update venue to point at new venue.
                                            course.CourseRuns.Where(p => nonCurrentVenues.Any(y => Guid.Parse(y.ID) == p.VenueId))
                                            .ToList()
                                            .ForEach(x =>
                                            {
                                                //update course instance
                                                x.VenueId   = Guid.Parse(currentVenue.ID);
                                                x.UpdatedBy = updatedBy;

                                                //log change
                                                logCsvWriter.WriteField(ukprn);
                                                logCsvWriter.WriteField(archivingVenue.ID);
                                                logCsvWriter.WriteField(archivingVenue.VenueName);
                                                logCsvWriter.WriteField($"{archivingVenue.Address1},{archivingVenue.Address2}, {archivingVenue.PostCode}");
                                                logCsvWriter.WriteField(currentVenue.ID);
                                                logCsvWriter.WriteField(currentVenue.VenueName);
                                                logCsvWriter.WriteField($"{currentVenue.Address1},{currentVenue.Address2}, {currentVenue.PostCode}");
                                                logCsvWriter.WriteField(x.CourseInstanceId);
                                                logCsvWriter.WriteField("");                                      //ApprenticeshipLocationId
                                                logCsvWriter.WriteField($"There were {nonCurrentVenues.Count()} duplicate Venues");
                                                logCsvWriter.WriteField("Course");
                                                logCsvWriter.NextRecord();
                                            });

                                            //update venue to reference currentVenue
                                            var coursedocumentLink = UriFactory.CreateDocumentUri(databaseId, coursesCollectionId, course.id.ToString());
                                            await documentClient.ReplaceDocumentAsync(coursedocumentLink, course, new RequestOptions()
                                            {
                                                PartitionKey = new Microsoft.Azure.Documents.PartitionKey(ukprn)
                                            });
                                        }

                                        //update courses that reference old venues
                                        foreach (var apprenticeship in apprenticeshipsOldVenue)
                                        {
                                            //update venue to point at new venue for locations
                                            apprenticeship.ApprenticeshipLocations.Where(p => nonCurrentVenues.Any(y => Guid.Parse(y.ID) == p.LocationGuidId))
                                            .ToList()
                                            .ForEach(x =>
                                            {
                                                //update apprenticeship location
                                                x.LocationGuidId = Guid.Parse(currentVenue.ID);
                                                x.UpdatedBy      = updatedBy;
                                                x.LocationId     = currentVenue.LocationId;

                                                //log change
                                                logCsvWriter.WriteField(ukprn);
                                                logCsvWriter.WriteField(archivingVenue.ID);
                                                logCsvWriter.WriteField(archivingVenue.VenueName);
                                                logCsvWriter.WriteField($"{archivingVenue.Address1},{archivingVenue.Address2}, {archivingVenue.PostCode}");
                                                logCsvWriter.WriteField(currentVenue.ID);
                                                logCsvWriter.WriteField(currentVenue.VenueName);
                                                logCsvWriter.WriteField($"{currentVenue.Address1},{currentVenue.Address2}, {currentVenue.PostCode}");
                                                logCsvWriter.WriteField("");                                                           //Course Instance
                                                logCsvWriter.WriteField(x.Id);
                                                logCsvWriter.WriteField($"There were {nonCurrentVenues.Count()} duplicate Venues");
                                                logCsvWriter.WriteField("Apprenticeship");
                                                logCsvWriter.NextRecord();
                                            });


                                            //update apprenticeship to reference currentvenue
                                            var apprenticeshipDocumentLink = UriFactory.CreateDocumentUri(databaseId, apprenticeshipCollectionId, apprenticeship.id.ToString());
                                            await documentClient.ReplaceDocumentAsync(apprenticeshipDocumentLink, apprenticeship, new RequestOptions());
                                        }
                                    }
                                }
                                Console.WriteLine($"Archived {totalArchivedForProvider} Venues for {ukprn}");
                                logger.LogInformation($"Archived {totalArchivedForProvider} Venues for {ukprn}");
                            }
                            catch (Exception e)
                            {
                                logger.LogError(e.Message);
                            }
                        }

                        // Upload log CSV to blob storage
                        {
                            logStreamWriter.Flush();

                            logStream.Seek(0L, SeekOrigin.Begin);

                            var blob = blobHelper.GetBlobContainer(blobContainer).GetBlockBlobReference("ArchivedVenues");
                            await blob.UploadFromStreamAsync(logStream);
                        }
                    }


            Console.WriteLine($"Total Course runs that reference an old venue: {totalCoursesReferencingOldVenue}");
            Console.WriteLine($"Total Apparenticeships that reference an old Venue {totalApprenticeshipReferenceoldVenue}");
            Console.WriteLine($"Total Archived Venues {totalArchived}");

            async Task ArchiveVenue(Venue archivingVenue, int ukprn)
            {
                //archive venue
                archivingVenue.Status      = VenueStatus.Archived;
                archivingVenue.UpdatedBy   = updatedBy;
                archivingVenue.DateUpdated = DateTime.Now;
                var documentLink = UriFactory.CreateDocumentUri(databaseId, venuesCollectionId, archivingVenue.ID.ToString());
                await documentClient.ReplaceDocumentAsync(documentLink, archivingVenue, new RequestOptions());
            }

            async Task ReplaceMergedREcord(Venue mergedRecord)
            {
                //archive venue
                mergedRecord.UpdatedBy   = updatedBy;
                mergedRecord.DateUpdated = DateTime.Now;
                var documentLink = UriFactory.CreateDocumentUri(databaseId, venuesCollectionId, mergedRecord.ID.ToString());
                await documentClient.ReplaceDocumentAsync(documentLink, mergedRecord, new RequestOptions());
            }

            Venue MergeVenue(Venue locationVenue, Venue venue, out string selectedVenue)
            {
                //default to first none null venue, location is chosen first.
                var ven = locationVenue ?? venue;

                if (locationVenue != null && venue != null)
                {
                    selectedVenue = "Both";
                }
                else if (locationVenue == null && venue != null)
                {
                    selectedVenue = "Venue";
                }
                else if (locationVenue != null && venue == null)
                {
                    selectedVenue = "Location";
                }
                else
                {
                    selectedVenue = "None";
                }

                //if there are two venues, one with a venue id & one with a location id.
                //merge them.
                if (locationVenue != null && venue != null)
                {
                    ven.VenueID = venue.VenueID;
                }

                return(ven);
            }

            async Task <List <Course> > GetCourses(int ukprn)
            {
                var courses = new List <Course>();
                //Get all courses
                string continuation = null;

                do
                {
                    var feedOptions = new FeedOptions()
                    {
                        RequestContinuation = continuation,
                        PartitionKey        = new Microsoft.Azure.Documents.PartitionKey(ukprn)
                    };

                    var queryResponse = await documentClient.CreateDocumentQuery <Course>(coursesCollectionUri, feedOptions)
                                        .Where(p => p.ProviderUKPRN == ukprn && p.CourseStatus != CourseDirectory.Models.Enums.RecordStatus.Archived)
                                        .AsDocumentQuery()
                                        .ExecuteNextAsync <Course>();

                    courses.AddRange(queryResponse.ToList());

                    continuation = queryResponse.ResponseContinuation;
                }while (continuation != null);
                return(courses);
            }

            async Task <List <Venue> > GetVenues(int ukprn)
            {
                var    venues       = new List <Venue>();
                string continuation = null;

                do
                {
                    var feedOptions = new FeedOptions()
                    {
                        RequestContinuation = continuation
                    };

                    var queryResponse = await documentClient.CreateDocumentQuery <Venue>(venueCollectionUri, feedOptions)
                                        .Where(p => p.UKPRN == ukprn && p.Status == VenueStatus.Live)
                                        .AsDocumentQuery()
                                        .ExecuteNextAsync <Venue>();

                    venues.AddRange(queryResponse.ToList());

                    continuation = queryResponse.ResponseContinuation;
                }while (continuation != null);
                return(venues);
            }

            async Task <List <Apprenticeship> > GetApprenticeships(int ukprn)
            {
                var    apprenticeships = new List <Apprenticeship>();
                string continuation    = null;

                //get all apprenticeships for provider
                do
                {
                    var feedOptions = new FeedOptions()
                    {
                        RequestContinuation = continuation,
                        PartitionKey        = new Microsoft.Azure.Documents.PartitionKey(ukprn)
                    };

                    //try/catch required as there are Apprenticeship records that are not valid (venueId is null in cosmos).
                    try
                    {
                        var queryResponse = await documentClient.CreateDocumentQuery <Apprenticeship>(apprenticeshipCollectionUri, feedOptions)
                                            .Where(p => p.ProviderUKPRN == ukprn && p.RecordStatus != CourseDirectory.Models.Enums.RecordStatus.Archived)
                                            .AsDocumentQuery()
                                            .ExecuteNextAsync <Apprenticeship>();

                        apprenticeships.AddRange(queryResponse);
                        continuation = queryResponse.ResponseContinuation;
                    }
                    catch (Exception)
                    {
                        continuation = null;
                    }
                }while (continuation != null);

                return(apprenticeships);
            }

            async Task <ISet <int> > GetProviderWhiteList()
            {
                var blob = blobHelper.GetBlobContainer(blobContainer).GetBlockBlobReference(whitelistFileName);

                var ms = new MemoryStream();
                await blob.DownloadToStreamAsync(ms);

                ms.Seek(0L, SeekOrigin.Begin);

                var    results = new HashSet <int>();
                string line;

                using (var reader = new StreamReader(ms))
                {
                    while ((line = reader.ReadLine()) != null)
                    {
                        if (string.IsNullOrEmpty(line))
                        {
                            continue;
                        }

                        var ukprn = int.Parse(line);
                        results.Add(ukprn);
                    }
                }

                return(results);
            }
        }