/// <summary> /// Consumes a line of data to populate a JobClass /// </summary> /// <param name="dataChunks">A collection of job class data points.</param> /// <param name="jobClass">A JobClass object to consume the line data.</param> /// <returns>Any remaining data after processing the JobClass.</returns> IEnumerable<string> assignClassData(IList<string> dataChunks, JobClass jobClass) { //the process order here is important, since we're relying on simple(ish) Regex //which may overlap one another // 1. code is always 4 consecutive integers string code = dataChunks.FirstOrDefault(c => FieldPatterns.ClassCode.IsMatch(c)); if (!String.IsNullOrEmpty(code)) { jobClass.Code = code; dataChunks.Remove(code); } // 2. grade is always 3 consecutive integers string grade = dataChunks.FirstOrDefault(c => FieldPatterns.Grade.IsMatch(c)); if (!String.IsNullOrEmpty(grade)) { jobClass.Grade = grade; dataChunks.Remove(grade); } // 3. bargaining unit code is always 3 consecutive capital letters string bu = dataChunks.FirstOrDefault(c => FieldPatterns.BargainingUnit.IsMatch(c)); if (!String.IsNullOrEmpty(bu)) { jobClass.BargainingUnit = bargainingUnits.ContainsKey(bu) ? bargainingUnits[bu] : new BargainingUnit() { Name = string.Empty, Code = bu }; dataChunks.Remove(bu); } // 4. the remaining chunks are either part of the job title or part of a job step // job step data is all numeric (integer or decimal numbers) // so separate out the title parts and form the title decimal dec; var titleChunks = dataChunks.Where(c => !decimal.TryParse(c, out dec)).ToArray(); string title = String.Join(" ", titleChunks).Trim(); if (!String.IsNullOrEmpty(title)) { jobClass.Title = title; foreach (var chunk in titleChunks) { dataChunks.Remove(chunk); } } //the job step chunks are all that should remain return dataChunks; }
/// <summary> /// Process the job classes on a single page. /// </summary> /// <param name="page">The textual data of a single page, broken by newline and realigned.</param> /// <returns>A collection of JobClass objects from the page.</returns> IEnumerable<JobClass> processClassesOnPage(IEnumerable<string> page) { var jobClasses = new List<JobClass>(); //to process the chunks sequentially (considering more than one at a time) var queue = new Queue<string>(page); while (queue.Any()) { var jobClass = new JobClass(); var steps = new List<JobClassStep>(); var currentStep = new JobClassStep(); IEnumerable<string> dataChunks = queue.Dequeue().Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); //assign the title, code, bargaining unit, grade for this class dataChunks = assignClassData(dataChunks.ToList(), jobClass); //if there is leftover data -> step definition if (dataChunks.Any()) { currentStep = assignStepData(dataChunks); steps.Add(currentStep); } //add each subsequent step for this class while (queue.Any() && queue.Peek().StartsWith(jobClass.Grade)) { dataChunks = queue.Dequeue().Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries).Skip(1); currentStep = assignStepData(dataChunks); steps.Add(currentStep); } jobClass.Steps = steps; jobClasses.Add(jobClass); } return jobClasses; }