public static void Load(Action <string> log, string schoolsRefCsvFilePath, string schoolsPerfCsvFilePath, string giasCsvFilePath) { var stopwatch = new Stopwatch(); stopwatch.Start(); log($"{nameof(SchoolsLoader)} started"); ILookup <string, object> performanceLookup; var decimalConverter = new NumberJsonConverter(); using (var reader = new StreamReader(schoolsPerfCsvFilePath)) using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture)) { var records = csv.GetRecords <dynamic>(); performanceLookup = records.ToLookup(r => (string)r.DFESNumber); log($"{schoolsPerfCsvFilePath} loaded"); } var giasLookup = new GiasLookup(log, giasCsvFilePath); using (var reader = new StreamReader(schoolsRefCsvFilePath)) using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture)) { var records = csv.GetRecords <dynamic>(); log($"{schoolsRefCsvFilePath} loaded"); var batchNumber = 1; foreach (IEnumerable <dynamic> batch in records.Batch(1000)) { Parallel.ForEach(batch, new ParallelOptions { MaxDegreeOfParallelism = MAX_PARALLELISM }, schoolRow => { var gias = giasLookup[schoolRow.DFESNumber]; if (gias != null) { schoolRow.URN = gias.urn; schoolRow.SchoolType = gias.typeofestablishmentname; schoolRow.SchoolName = $"Test School {gias.urn}"; var perf = (IEnumerable <dynamic>)performanceLookup[schoolRow.DFESNumber]; schoolRow.performance = perf.Select(r => new { r.Code, r.SetName, r.CodeValue }); System.Threading.Interlocked.Increment(ref _processedCount); } }); File.WriteAllText( "schools_batch_" + batchNumber + ".json", JsonConvert.SerializeObject(batch, decimalConverter)); log($"{batchNumber} batches processed"); batchNumber++; } } stopwatch.Stop(); log($"{nameof(SchoolsLoader)} finished in {stopwatch.Elapsed.Minutes}m {stopwatch.Elapsed.Seconds}s"); }
public static void Load( Action <string> log, string pupilsCsvFilePath, string pupilsPerfCsvFilePath, string giasCsvFilePath, string amendmentsCsvFilePath = null) { var stopwatch = new Stopwatch(); stopwatch.Start(); log($"{nameof(PupilsLoader)} started"); ILookup <string, object> performanceLookup; var decimalConverter = new NumberJsonConverter(); using (var reader = new StreamReader(pupilsPerfCsvFilePath)) using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture)) { var records = csv.GetRecords <dynamic>(); performanceLookup = records.ToLookup(r => (string)r.PortlandStudentID); log($"{pupilsPerfCsvFilePath} loaded"); } var giasLookup = new GiasLookup(log, giasCsvFilePath); var amendmentsLookup = new AmendmentLookup(log, amendmentsCsvFilePath); var skippedPupils = new List <string>(); using (var reader = new StreamReader(pupilsCsvFilePath)) using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture)) { var records = csv.GetRecords <dynamic>(); log($"{pupilsCsvFilePath} loaded"); var batchNumber = 1; string[] resultsFieldsToRemove = new[] { "PortlandStudentID", "DFESNumber", "CandidateNumber", "SchoolCandidateNumber" }; foreach (IEnumerable <dynamic> batch in records.Batch(1000)) { Parallel.ForEach(batch, new ParallelOptions { MaxDegreeOfParallelism = MAX_PARALLELISM }, pupilRow => { var laestab = (string)pupilRow.DFESNumber; var gias = giasLookup[laestab]; if (gias == null) { skippedPupils.Add($"{pupilRow.CandidateNumber} ({pupilRow.DFESNumber})"); return; } var anonymisedName = ConvertToAlphaCharaters((string)pupilRow.CandidateNumber); if (string.IsNullOrEmpty(anonymisedName)) { skippedPupils.Add($"{pupilRow.PortlandStudentID}"); return; } var pupilID = string.Empty; // (string) pupilRow.UPN; if (amendmentsLookup.ExistingPupilLookup.ContainsKey(laestab + pupilID)) { // Handle existing pupil var existingAmendment = amendmentsLookup.ExistingPupilLookup[laestab + pupilID]; var newURN = (string)existingAmendment.urn; var newLAEstab = giasLookup.UrnLookup[newURN]; pupilRow.URN = newURN; pupilRow.DFESNumber = newLAEstab; } else { pupilRow.URN = gias.urn; pupilRow.Surname = $"{anonymisedName}S"; pupilRow.Forename = $"{anonymisedName}F"; } var perf = (IEnumerable <dynamic>)performanceLookup[pupilRow.PortlandStudentID]; pupilRow.performance = perf.Select(r => { var dict = (IDictionary <string, object>)r; resultsFieldsToRemove.Select(dict.Remove).ToList(); return(dict); }); System.Threading.Interlocked.Increment(ref _processedCount); }); File.WriteAllText( "pupils_batch_" + batchNumber + ".json", JsonConvert.SerializeObject(batch, decimalConverter)); log($"{batchNumber} batches processed"); batchNumber++; } var addPupils = new List <dynamic>(); foreach (var pupil in amendmentsLookup.NewPupilLookup) { var urn = (string)pupil.urn; dynamic addPupil = new ExpandoObject(); addPupil.DFESNumber = giasLookup.UrnLookup[urn]; addPupil.id = pupil.pupil_id; addPupil.Forename = pupil.forename; addPupil.Surname = pupil.surname; addPupil.Gender = (string)pupil.gender == "Male" ? "M" : "F"; addPupil.DOB = ConvertDate((string)pupil.date_of_birth); addPupil.ENTRYDAT = ConvertDate((string)pupil.admission_date); addPupil.ActualYearGroup = pupil.year_group; addPupil.performance = new string[0]; addPupil.URN = urn; addPupils.Add(addPupil); } File.WriteAllText( "pupils_batch_" + batchNumber + ".json", JsonConvert.SerializeObject(addPupils, decimalConverter)); } stopwatch.Stop(); log($"{nameof(PupilsLoader)} finished in {stopwatch.Elapsed.Minutes}m {stopwatch.Elapsed.Seconds}s"); if (skippedPupils.Count > 0) { log($"{skippedPupils.Count} skipped pupils (no GIAS establishment record found): {string.Join(", ", skippedPupils)}"); } }