public void GetDateFromPdf_WithPdfContainingDateMatchingRegex_ReturnsDate() { // Arrange var pdfInfo = new PdfInfo("My Bank", null, null, @"\d{1,2}\s[A-Z,a-z]+?\sto\s(?<day>\d{1,2})\s(?<month>[A-Z,a-z]{3})[a-z]*\s(?<year>20\d{2})"); const string pdfText = "4 May to 28 May 2013\r\nAccount Name\r\nMr John SmithSortcode Account Number\r\n41-45-66 12345678\r\n"; // Act DateTime date = PdfParser.GetDateFromPdf(pdfInfo, pdfText, null); // Assert Assert.That(date, Is.EqualTo(new DateTime(2013, 5, 28)), "date"); }
internal static DateTime GetDateFromPdf(PdfInfo pdfInfo, string pdfText, string fileName) { var dateRegex = new Regex(pdfInfo.DateRegexPattern); DateTime date; if (dateRegex.IsMatch(pdfText)) { date = GetDateTimeFromText(pdfText, dateRegex); } else { // TODO check if this is ever used date = GetDateFromFileName(Path.GetFileName(fileName)); } return date; }
public static List<PdfInfo> GetPdfInfos(IEnumerable<XElement> pdfInfosElement) { IEnumerable<XElement> elements = pdfInfosElement.Elements("PdfInfo"); var pdfInfos = new List<PdfInfo>(); foreach (XElement element in elements) { // TODO validate by using a schema string filePrefix = element.Element("FilePrefix").Value; string folder = element.Element("Folder").Value; string textRegExPattern = element.Element("TextRegExPattern").Value; string dateRegExPattern = element.Element("DateRegExPattern").Value; var pdfInfo = new PdfInfo(filePrefix, folder, textRegExPattern, dateRegExPattern); pdfInfos.Add(pdfInfo); } return pdfInfos; }