private static void UpdateCardInteger(CardItem card, Action <int> method, string text, params Func <string, int>[] tests) { if (string.IsNullOrWhiteSpace(text)) { return; } string formatted = text.Trim(); foreach (var test in tests) { int result = test(formatted); if (result > 0) { method(result); return; } } int tmp; if (int.TryParse(formatted, out tmp)) { method(tmp); } }
private static void UpdateCardUsingOCR(string filename, CardItem card) { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) using (var bitmap = RescaleBitmap(new Bitmap(filename), 2000, 2800)) using (var pix = PixConverter.ToPix(bitmap)) { if (card.Job.Equals("Unit", StringComparison.OrdinalIgnoreCase)) { string unitSubtitle = null; using (var subtitle = engine.Process(pix, Rect.FromCoords(405, 205, 1750, 260), PageSegMode.SingleBlock)) { unitSubtitle = subtitle.GetText(); } if (string.IsNullOrWhiteSpace(unitSubtitle)) { throw new NotSupportedException("Unit title could not be acquired by OCR."); } if (unitSubtitle.IndexOf("ATTACHMENT", StringComparison.OrdinalIgnoreCase) > -1) { card.UpdateAttachment(); using (var points = engine.Process(pix, Rect.FromCoords(1640, 2690, 1705, 2745), PageSegMode.SingleBlock)) { UpdateCardInteger(card, card.UpdatePoints, points.GetText()); } } else { string unitCosts = null; using (var costs = engine.Process(pix, Rect.FromCoords(1105, 2640, 1710, 2740), PageSegMode.SingleBlock)) { unitCosts = costs.GetText(); } if (string.IsNullOrWhiteSpace(unitCosts)) { throw new NotSupportedException("Unit cost area could not be acquired by OCR."); } string[] lines = unitCosts.Split('\n'); var points = new List <string>(); var sizes = new List <string>(); for (int i = 0; i < lines.Length; i += 1) { if (string.IsNullOrWhiteSpace(lines[i])) { if (i == 0 || i == 1) { continue; } else { break; } } // we're not whitespace, process the line with a poor regex var matches = _unitCostExpression.Matches(lines[i]); if (matches.Count == 1) { points.Add(matches[0].Groups[1].Value); } else if (matches.Count == 2) { sizes.Add(matches[0].Groups[1].Value); points.Add(matches[1].Groups[1].Value); } } if (points.Count == 1) { UpdateCardInteger(card, card.UpdatePoints, points[0]); } else if (points.Count == 2) { UpdateCardInteger(card, card.UpdatePointsMin, points[0]); UpdateCardInteger(card, card.UpdatePointsMax, points[1]); } if (sizes.Count == 2) { UpdateCardInteger(card, card.UpdateSizeMin, sizes[0]); UpdateCardInteger(card, card.UpdateSizeMax, sizes[1]); } } } else { using (var points = engine.Process(pix, Rect.FromCoords(1620, 2690, 1725, 2745), PageSegMode.SingleBlock)) { UpdateCardInteger(card, card.UpdatePoints, points.GetText()); } } using (var fieldAllowance = engine.Process(pix, Rect.FromCoords(1830, 2690, 1890, 2745), PageSegMode.SingleBlock)) { UpdateCardInteger(card, card.UpdateFieldAllowance, fieldAllowance.GetText(), txt => txt.Equals("C", StringComparison.OrdinalIgnoreCase) ? 1 : -1, txt => txt.Equals("U", StringComparison.OrdinalIgnoreCase) ? 999 : -1 ); } } }
private static async Task ProcessCard(DirectoryInfo factionDirectory, Stream stream, string cardKey, CardItem card) { var imageNames = new List <string>(); using (stream) using (var document = PdfReader.Open(stream)) { PdfDictionary resources = document.Pages.Elements.GetDictionary("/Resources"); PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject"); foreach (var item in xObjects.Elements.Values) { var reference = item as PdfReference; if (reference == null) { continue; } var xObject = reference.Value as PdfDictionary; if (xObject == null || xObject.Elements.GetString("/Subtype") != "/Image") { continue; } if (xObject.Elements.GetInteger("/Height") != 1050 || xObject.Elements.GetInteger("/Width") != 750) { continue; } string cardName = $"{cardKey}-{imageNames.Count + 1}.jpg"; string filename = Path.Combine(factionDirectory.FullName, cardName); using (var outfs = new FileStream(filename, FileMode.Create)) { await outfs.WriteAsync(xObject.Stream.Value, 0, xObject.Stream.Value.Length); await outfs.FlushAsync(); } if (imageNames.Count == 0) { UpdateCardUsingOCR(filename, card); } imageNames.Add(cardName); } } card.UpdateImageNames(imageNames); }