/// <summary> /// Reads the the run date of this report from the first page of data (page 2). /// </summary> /// <returns>The run date if it is found, otherwise null</returns> DateTime? readReportDate(PdfReader reader) { DateTime reportDate = DateTime.MinValue; var text = reader.TextFromPage(2); if (FieldPatterns.RunDate.IsMatch(text)) { DateTime.TryParse(FieldPatterns.RunDate.Match(text).Groups[1].Value.Trim(), out reportDate); } if (reportDate != DateTime.MinValue) return reportDate; else return null; }
/// <summary> /// Reads the Bargaining Unit information on the first page of the report. /// </summary> /// <returns>A collection BargainingUnit objects for this report.</returns> IEnumerable<BargainingUnit> readBargainingUnits(PdfReader reader) { string replace = " "; var units = new List<BargainingUnit>(); //the Fire BU is never output as a code in the BU table? var text = reader.TextFromPage(1).Replace("Fire ", "FIR "); //get all the BU chunks var chunks = getPageChunks(text).Where(c => FieldPatterns.BargainingUnit.IsMatch(c)); foreach (var chunk in chunks) { //get each of the BUs in this chunk var matches = FieldPatterns.BargainingUnit.Matches(chunk); //split the chunk at the BU code points (leaving their names) var names = FieldPatterns.BargainingUnit.Split(chunk) .Where(s => !String.IsNullOrEmpty(s.Trim())) .Select(s => FieldPatterns.ConsecutiveSpaces.Replace(s, replace).Trim()); for (int i = 0; i < matches.Count; i++) { var match = matches[i]; //there will probably be chunks containing BU codes that were already processed if(!units.Any(bu => bu.Code == match.Value)) units.Add(new BargainingUnit() { Code = match.Value, Name = names.ElementAt(i) }); } } bargainingUnits = units.ToDictionary(bu => bu.Code, bu => bu); return units; }
/// <summary> /// Interprets the fiscal year from the first page of this report. /// </summary> /// <returns>A complete FiscalYear object, or null if it cannot be read.</returns> FiscalYear readFiscalYear(PdfReader reader) { FiscalYear fiscalYear = null; var text = reader.TextFromPage(1); if (FieldPatterns.FiscalYear.IsMatch(text)) { var match = FieldPatterns.FiscalYear.Match(text); fiscalYear = new FiscalYear(match.Groups[1].Value, match.Groups[2].Value); } return fiscalYear; }
/// <summary> /// Private implementation that acts on an open PdfReader. /// </summary> private IEnumerable<IEnumerable<string>> getAlignmentCorrectedClassData(PdfReader reader) { return Enumerable.Range(2, reader.NumberOfPages - 1) .Select(n => reader.TextFromPage(n)) .Select(page => getPageChunks(page)) .Select(chunks => fixAlignment(chunks)) .ToArray(); }