Пример #1
0
        public static void Load(Action <string> log, string schoolsRefCsvFilePath, string schoolsPerfCsvFilePath, string giasCsvFilePath)
        {
            var stopwatch = new Stopwatch();

            stopwatch.Start();
            log($"{nameof(SchoolsLoader)} started");

            ILookup <string, object> performanceLookup;
            var decimalConverter = new NumberJsonConverter();

            using (var reader = new StreamReader(schoolsPerfCsvFilePath))
                using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
                {
                    var records = csv.GetRecords <dynamic>();
                    performanceLookup = records.ToLookup(r => (string)r.DFESNumber);
                    log($"{schoolsPerfCsvFilePath} loaded");
                }

            var giasLookup = new GiasLookup(log, giasCsvFilePath);

            using (var reader = new StreamReader(schoolsRefCsvFilePath))
                using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
                {
                    var records = csv.GetRecords <dynamic>();
                    log($"{schoolsRefCsvFilePath} loaded");

                    var batchNumber = 1;

                    foreach (IEnumerable <dynamic> batch in records.Batch(1000))
                    {
                        Parallel.ForEach(batch, new ParallelOptions {
                            MaxDegreeOfParallelism = MAX_PARALLELISM
                        }, schoolRow =>
                        {
                            var gias = giasLookup[schoolRow.DFESNumber];
                            if (gias != null)
                            {
                                schoolRow.URN         = gias.urn;
                                schoolRow.SchoolType  = gias.typeofestablishmentname;
                                schoolRow.SchoolName  = $"Test School {gias.urn}";
                                var perf              = (IEnumerable <dynamic>)performanceLookup[schoolRow.DFESNumber];
                                schoolRow.performance = perf.Select(r => new { r.Code, r.SetName, r.CodeValue });
                                System.Threading.Interlocked.Increment(ref _processedCount);
                            }
                        });

                        File.WriteAllText(
                            "schools_batch_" + batchNumber + ".json",
                            JsonConvert.SerializeObject(batch, decimalConverter));

                        log($"{batchNumber} batches processed");
                        batchNumber++;
                    }
                }

            stopwatch.Stop();
            log($"{nameof(SchoolsLoader)} finished in {stopwatch.Elapsed.Minutes}m {stopwatch.Elapsed.Seconds}s");
        }
        public static void Load(
            Action <string> log, string pupilsCsvFilePath, string pupilsPerfCsvFilePath, string giasCsvFilePath, string amendmentsCsvFilePath = null)
        {
            var stopwatch = new Stopwatch();

            stopwatch.Start();
            log($"{nameof(PupilsLoader)} started");

            ILookup <string, object> performanceLookup;
            var decimalConverter = new NumberJsonConverter();

            using (var reader = new StreamReader(pupilsPerfCsvFilePath))
                using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
                {
                    var records = csv.GetRecords <dynamic>();
                    performanceLookup = records.ToLookup(r => (string)r.PortlandStudentID);
                    log($"{pupilsPerfCsvFilePath} loaded");
                }

            var giasLookup       = new GiasLookup(log, giasCsvFilePath);
            var amendmentsLookup = new AmendmentLookup(log, amendmentsCsvFilePath);
            var skippedPupils    = new List <string>();

            using (var reader = new StreamReader(pupilsCsvFilePath))
                using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
                {
                    var records = csv.GetRecords <dynamic>();
                    log($"{pupilsCsvFilePath} loaded");

                    var      batchNumber           = 1;
                    string[] resultsFieldsToRemove = new[]
                    {
                        "PortlandStudentID",
                        "DFESNumber",
                        "CandidateNumber",
                        "SchoolCandidateNumber"
                    };

                    foreach (IEnumerable <dynamic> batch in records.Batch(1000))
                    {
                        Parallel.ForEach(batch, new ParallelOptions {
                            MaxDegreeOfParallelism = MAX_PARALLELISM
                        }, pupilRow =>
                        {
                            var laestab = (string)pupilRow.DFESNumber;
                            var gias    = giasLookup[laestab];
                            if (gias == null)
                            {
                                skippedPupils.Add($"{pupilRow.CandidateNumber} ({pupilRow.DFESNumber})");
                                return;
                            }
                            var anonymisedName = ConvertToAlphaCharaters((string)pupilRow.CandidateNumber);
                            if (string.IsNullOrEmpty(anonymisedName))
                            {
                                skippedPupils.Add($"{pupilRow.PortlandStudentID}");
                                return;
                            }

                            var pupilID = string.Empty; // (string) pupilRow.UPN;
                            if (amendmentsLookup.ExistingPupilLookup.ContainsKey(laestab + pupilID))
                            {
                                // Handle existing pupil
                                var existingAmendment = amendmentsLookup.ExistingPupilLookup[laestab + pupilID];
                                var newURN            = (string)existingAmendment.urn;
                                var newLAEstab        = giasLookup.UrnLookup[newURN];
                                pupilRow.URN          = newURN;
                                pupilRow.DFESNumber   = newLAEstab;
                            }
                            else
                            {
                                pupilRow.URN      = gias.urn;
                                pupilRow.Surname  = $"{anonymisedName}S";
                                pupilRow.Forename = $"{anonymisedName}F";
                            }

                            var perf = (IEnumerable <dynamic>)performanceLookup[pupilRow.PortlandStudentID];

                            pupilRow.performance = perf.Select(r =>
                            {
                                var dict = (IDictionary <string, object>)r;

                                resultsFieldsToRemove.Select(dict.Remove).ToList();

                                return(dict);
                            });
                            System.Threading.Interlocked.Increment(ref _processedCount);
                        });

                        File.WriteAllText(
                            "pupils_batch_" + batchNumber + ".json",
                            JsonConvert.SerializeObject(batch, decimalConverter));

                        log($"{batchNumber} batches processed");
                        batchNumber++;
                    }

                    var addPupils = new List <dynamic>();
                    foreach (var pupil in amendmentsLookup.NewPupilLookup)
                    {
                        var     urn      = (string)pupil.urn;
                        dynamic addPupil = new ExpandoObject();
                        addPupil.DFESNumber      = giasLookup.UrnLookup[urn];
                        addPupil.id              = pupil.pupil_id;
                        addPupil.Forename        = pupil.forename;
                        addPupil.Surname         = pupil.surname;
                        addPupil.Gender          = (string)pupil.gender == "Male" ? "M" : "F";
                        addPupil.DOB             = ConvertDate((string)pupil.date_of_birth);
                        addPupil.ENTRYDAT        = ConvertDate((string)pupil.admission_date);
                        addPupil.ActualYearGroup = pupil.year_group;
                        addPupil.performance     = new string[0];
                        addPupil.URN             = urn;
                        addPupils.Add(addPupil);
                    }
                    File.WriteAllText(
                        "pupils_batch_" + batchNumber + ".json",
                        JsonConvert.SerializeObject(addPupils, decimalConverter));
                }



            stopwatch.Stop();
            log($"{nameof(PupilsLoader)} finished in {stopwatch.Elapsed.Minutes}m {stopwatch.Elapsed.Seconds}s");

            if (skippedPupils.Count > 0)
            {
                log($"{skippedPupils.Count} skipped pupils (no GIAS establishment record found): {string.Join(", ", skippedPupils)}");
            }
        }