Ejemplo n.º 1
0
        public void GetDateFromPdf_WithPdfContainingDateMatchingRegex_ReturnsDate()
        {
            // Arrange
            var pdfInfo = new PdfInfo("My Bank", null, null,
                @"\d{1,2}\s[A-Z,a-z]+?\sto\s(?<day>\d{1,2})\s(?<month>[A-Z,a-z]{3})[a-z]*\s(?<year>20\d{2})");
            const string pdfText =
                "4 May to 28 May 2013\r\nAccount Name\r\nMr John SmithSortcode Account Number\r\n41-45-66 12345678\r\n";

            // Act
            DateTime date = PdfParser.GetDateFromPdf(pdfInfo, pdfText, null);

            // Assert
            Assert.That(date, Is.EqualTo(new DateTime(2013, 5, 28)), "date");
        }
Ejemplo n.º 2
0
        internal static DateTime GetDateFromPdf(PdfInfo pdfInfo, string pdfText, string fileName)
        {
            var dateRegex = new Regex(pdfInfo.DateRegexPattern);

            DateTime date;

            if (dateRegex.IsMatch(pdfText))
            {
                date = GetDateTimeFromText(pdfText, dateRegex);
            }
            else
            {
                // TODO check if this is ever used
                date = GetDateFromFileName(Path.GetFileName(fileName));
            }

            return date;
        }
Ejemplo n.º 3
0
        public static List<PdfInfo> GetPdfInfos(IEnumerable<XElement> pdfInfosElement)
        {
            IEnumerable<XElement> elements = pdfInfosElement.Elements("PdfInfo");
            var pdfInfos = new List<PdfInfo>();

            foreach (XElement element in elements)
            {
                // TODO validate by using a schema
                string filePrefix = element.Element("FilePrefix").Value;
                string folder = element.Element("Folder").Value;
                string textRegExPattern = element.Element("TextRegExPattern").Value;
                string dateRegExPattern = element.Element("DateRegExPattern").Value;

                var pdfInfo = new PdfInfo(filePrefix, folder, textRegExPattern, dateRegExPattern);
                pdfInfos.Add(pdfInfo);
            }

            return pdfInfos;
        }