public void PopulateDatabaseFromUnpivotedStaarTestFile(string parsedFilePath, string logPath) { var dems = _ctx.DemographicDetails.ToList(); var cats = _ctx.CategoryDetails.ToList(); var subjs = _ctx.Subjects.ToList(); var camps = _ctx.Campuses.ToList(); var dists = _ctx.Districts.ToList(); var regs = _ctx.Regions.ToList(); var langs = _ctx.Languages.ToList(); var completed = _ctx.StaarTests .GroupBy(s => new { s.Year, Subject = s.Subject.Name, s.Grade, s.Language.Name }) .Select(s => new { s.Key.Year, SubjectName = s.Key.Subject, s.Key.Grade, Language = s.Key.Name }).ToList(); string previousCampus = "", previousSubject = "", previousDemographic = "", previousCategory = ""; long campusId = 0; var subject = new Subject(); var dd = new DemographicDetail(); var cd = new CategoryDetail(); var log = File.CreateText(logPath); //Validation if (!parsedFilePath.Contains(".csv")) throw new CustomException("This is not a csv file."); //read the lines in. First row is headers. save it then remove var rows = File.ReadAllLines(parsedFilePath).ToList(); rows.RemoveAt(0); //headers if (rows.Count > 12) //this is our test to make sure they are parsed throw new CustomException(string.Format("{0} is not in the format of a parsed file", Path.GetFileName(parsedFilePath))); //Begin adding data var splitRows = rows.Select(row => row.Split(',')).ToList(); //adjust year if it is off var year = int.Parse(splitRows[0][1]); if (year >= 90 && year < 100) year += 1900; else if (year < 90) year += 2000; var language = langs.Single(l => l.Name == splitRows[0][7]); var campusTests = new List<StaarTest>(90005); var sortedCount = splitRows.Count; for (var i = 0; i < sortedCount; i++) { try { var record = splitRows[i]; if (completed.Any(c => c.SubjectName == record[8] && c.Grade == record[6] && c.Year == year && c.Language == language.Name)) continue; //check if we're on a new school if (previousCampus != record[0]) { //Find the school. Create if not new long campusNum = Convert.ToInt32(record[0]); long regionNum = Convert.ToInt32(record[2]); long districtNum = Convert.ToInt32(record[3]); //check region, district, and campus var region = regs.FirstOrDefault(r => r.Number == regionNum); if (region == null) { region = new Region { Name = string.Format("Region {0}", regionNum), Number = regionNum }; _ctx.Regions.Add(region); } var district = dists.FirstOrDefault(r => r.Number == districtNum); if (district == null) { district = new District { Name = record[4], Number = districtNum, Region_Id = region.Id }; _ctx.Districts.Add(district); } var campus = camps.FirstOrDefault(r => r.Number == campusNum); if (campus == null) { campus = new Campus { Name = record[5], Number = campusNum, District_Id = district.Id }; _ctx.Campuses.Add(campus); } //TODO ctx.SaveChanges(); campusId = campus.Id; } if (previousSubject != record[8]) { subject = subjs.Single(s => s.Name == record[8]); } if (previousDemographic != record[9]) { dd = dems.Single(s => s.Detail == record[9]); } if (previousCategory != record[10]) { cd = cats.Single(s => s.Detail == record[10]); } //for each complex header whose category matches the current category make a demo and value campusTests.Add(new StaarTest { Campus_Id = campusId, CategoryDetail_Id = cd.Id, DemographicDetail_Id = dd.Id, Subject_Id = subject.Id, Year = year, Language_Id = language.Id, Value = Convert.ToDecimal(record[11]), Grade = record[6] }); if (campusTests.Count >= 90000 || i == sortedCount - 1) { //ctx.BulkInsert(campusTests); campusTests = new List<StaarTest>(90005); } previousCampus = record[0]; previousSubject = record[8]; previousDemographic = record[9]; previousCategory = record[10]; } catch (Exception ex) { log.WriteLine("Message - {0}\r\nStackTrace - {1}", ex.Message, ex.StackTrace); //throw; } finally { log.Flush(); log.Close(); } } //TODO ctx.SaveChanges(); log.Flush(); log.Close(); }
public void PopulateDatabaseFromUnpivotedStaarTestDirectory(string parsedFilesDirectory, string logPath) { var completed = _ctx.StaarTests .GroupBy(s => new { s.Year, Subject = s.Subject.Name, s.Grade, s.Language.Name }) .Select(s => new Complete { Year = s.Key.Year, SubjectName = s.Key.Subject, Grade = s.Key.Grade, Language = s.Key.Name }).ToList(); var firstTime = true; string previousSubject = "", previousDemographic = "", previousCategory = ""; var subject = new Subject(); var dd = new DemographicDetail(); var cd = new CategoryDetail(); var log = File.CreateText(logPath); // 0 1 2 3 4 5 6 7 8 9 10 11 // CAMPUS YEAR REGION DISTRICT DNAME CNAME Grade LanguageEnum Subject Demographic Category Value foreach (var unzippedFile in Directory.GetFiles(parsedFilesDirectory, "*.csv", SearchOption.TopDirectoryOnly)) { #region initialize var dems = new List<DemographicDetail>(); var cats = new List<CategoryDetail>(); var subjs = new List<Subject>(); var camps = new List<Campus>(); var dists = new List<District>(); var regs = new List<Region>(); var availableLanguages = new List<Language>(); #endregion try { var testsToAdd = new List<StaarTest>(90005); //enumerate through file and grab rows we need. List<string[]> testsNotInDb; try { testsNotInDb = File.ReadLines(unzippedFile) .Select(row => row.Split(',')) .Where(row => completed.All(c => c.Grade != row[6] && c.SubjectName != row[8] && c.Language != row[7] && c.Year.ToString().Substring(2, 2) != row[1])) .ToList(); testsNotInDb.RemoveAt(0); if (testsNotInDb.Count == 0) { log.WriteLine("{0} has had all recordes uploaded already", unzippedFile); continue; } if (testsNotInDb[0].Length != 12) { log.WriteLine("{0} is not in the format of a parsed file", unzippedFile); continue; } } catch (Exception ex) { log.WriteLine("An error occured reading from {0}. The error is: {1}", unzippedFile, ex.Message); continue; } //by this point, we know we have to know things if (firstTime) { dems = _ctx.DemographicDetails.ToList(); cats = _ctx.CategoryDetails.ToList(); subjs = _ctx.Subjects.ToList(); camps = _ctx.Campuses.ToList(); dists = _ctx.Districts.ToList(); regs = _ctx.Regions.ToList(); availableLanguages = _ctx.Languages.ToList(); firstTime = false; } //get bits from file var sortedCount = testsNotInDb.Count; var fileLanguage = availableLanguages.Single(l => l.Name == testsNotInDb[0][7]); var fileYear = int.Parse(testsNotInDb[0][1]); if (fileYear >= 90 && fileYear < 100) fileYear += 1900; else if (fileYear < 90) fileYear += 2000; #region add all campuses first var campusGroups = testsNotInDb.GroupBy(s => new { CampusNumber = Convert.ToInt32(s[0]), Year = s[1], RegionNumber = Convert.ToInt32(s[2]), DistrictNumber = Convert.ToInt32(s[3]), Dname = s[4], Cname = s[5], }) .Select(s => new { s.Key.CampusNumber, s.Key.Year, s.Key.RegionNumber, s.Key.DistrictNumber, s.Key.Dname, s.Key.Cname }).ToList(); var newRegs = new List<Region>(); var newDist = new List<District>(); var newCamps = new List<Campus>(); foreach (var campusGroup in campusGroups) { //Find the school. Create if not new var campusNum = campusGroup.CampusNumber; var regionNum = campusGroup.RegionNumber; var districtNum = campusGroup.DistrictNumber; //check region, district, and campus var region = regs.FirstOrDefault(r => r.Number == regionNum); if (region == null) { region = new Region { Name = string.Format("Region {0}", regionNum), Number = regionNum }; regs.Add(region); newRegs.Add(region); } var district = dists.FirstOrDefault(r => r.Number == districtNum); if (district == null) { district = new District { Name = campusGroup.Dname, Number = districtNum, Region = region }; dists.Add(district); newDist.Add(district); } var campus = camps.FirstOrDefault(r => r.Number == campusNum); if (campus == null) { campus = new Campus { Name = campusGroup.Cname, Number = campusNum, District = district }; camps.Add(campus); newCamps.Add(campus); } } _ctx.Regions.AddRange(newRegs); _ctx.Districts.AddRange(newDist); _ctx.Campuses.AddRange(newCamps); _ctx.SaveChanges(); #endregion #region go through records for (var i = 0; i < sortedCount; i++) { var record = testsNotInDb[i]; var campusId = camps.First(c => c.Number == Convert.ToInt32(record[0])).Id; //get new stuff if (previousSubject != record[8]) { subject = subjs.Single(s => s.Name == record[8]); } if (previousDemographic != record[9]) { dd = dems.Single(s => s.Detail == record[9]); } if (previousCategory != record[10]) { cd = cats.Single(s => s.Detail == record[10]); } //for each complex header whose category matches the current category make a demo and value testsToAdd.Add(new StaarTest { Campus_Id = campusId, CategoryDetail_Id = cd.Id, DemographicDetail_Id = dd.Id, Subject_Id = subject.Id, Year = fileYear, Language_Id = fileLanguage.Id, Value = Convert.ToDecimal(record[11]), Grade = record[6] }); if (testsToAdd.Count >= 90000 || i == sortedCount - 1) { _ctx.BulkInsert(testsToAdd); testsToAdd = new List<StaarTest>(90005); } previousSubject = record[8]; previousDemographic = record[9]; previousCategory = record[10]; } #endregion //add processed files to completed var groupedImportsToComplete = testsNotInDb.GroupBy(s => new { Year = fileYear, SubjectName = s[8], Grade = s[6], Langauge = fileLanguage.Name }).Select(i => new Complete { Year = i.Key.Year, SubjectName = i.Key.SubjectName, Grade = i.Key.Grade, Language = i.Key.Langauge }).ToList(); completed.AddRange(groupedImportsToComplete); log.WriteLine("{0} successfully uploaded", unzippedFile); //memory management testsNotInDb.Clear(); testsNotInDb = null; } catch (Exception ex) { log.WriteLine("{0}\r\n{1}", ex.Message, ex.StackTrace); } } log.Flush(); log.Close(); }