private void ParseTitle(string title, HOcrClass ocrclass) { if (title == null) { return; } string[] values = title.Split(';'); foreach (string s in values) { if (s.Contains("image ") || s.Contains("file ")) { string filePath = s.Replace("image ", string.Empty).Replace("file ", string.Empty).Replace('"', ' ').Trim(); if (File.Exists(filePath)) { if (ocrclass is HPage) { _currentPage.ImageFile = filePath; } } else { filePath = _hOcrFilePath.Replace(Path.GetFileName(_hOcrFilePath), Path.GetFileName(filePath)); { if (ocrclass is HPage) { _currentPage.ImageFile = filePath; } } } } if (s.Contains("ppageno")) { if (int.TryParse(s.Replace("ppageno", ""), out int frame)) { _currentPage.ImageFrameNumber = frame; } } if (!s.Contains("bbox")) { continue; } string coords = s.Replace("bbox", ""); BBox box = new BBox(coords, _dpi); ocrclass.BBox = box; } }
public static BBox ConvertBBoxToPoints(BBox bbox, float dpi) { if (dpi == 0) { throw new Exception("DPI is zero."); } BBox newBbox = new BBox(dpi) { Left = bbox.Left * 72 / dpi, Top = bbox.Top * 72 / dpi, Width = bbox.Width * 72 / dpi, Height = bbox.Height * 72 / dpi, Format = UnitFormat.Point }; return(newBbox); }
private void ParseCharactersForLine(string title) { if (title == null) { return; } title = title.Replace("x_bboxes", ""); string[] coords = title.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); string bbox = ""; string word = ""; HWord w = new HWord(); int charPos = 0; for (int i = 0; i < coords.Length; i++) { if (i % 4 == 0 && i != 0) { HChar c = new HChar(); BBox b = new BBox(bbox, _dpi); c.BBox = b; char[] chars = _currentLine.Text.ToCharArray(); c.Text = chars[charPos].ToString(); if (c.Text != " ") { word += c.Text; if (w.BBox == null) { w.BBox = new BBox(_dpi); { w.BBox.Height = c.BBox.Height; w.BBox.Left = c.BBox.Left; w.BBox.Top = _currentLine.BBox.Top; } } } else { if (w.Characters.Count > 0) { HChar previouschar = w.Characters.OrderBy(x => x.ListOrder).Last(); w.BBox.Width = previouschar.BBox.Left + previouschar.BBox.Width - w.BBox.Left; w.Text = word + " "; w.BBox.Height = w.Characters.Select(x => x.BBox.Height).Max(); w.CleanText(); w.CleanText(); if (w.Characters.Count > 0 && w.Text != null && w.Text.Trim() != "") { _currentLine.Words.Add(w); } w = new HWord(); word = string.Empty; } } bbox = string.Empty; if ((int)c.BBox.Left != -1) { c.ListOrder = charPos; w.Characters.Add(c); } charPos += 1; } bbox += coords[i] + " "; } if (w.Characters.Count <= 0 || word.Trim() == string.Empty) { return; } w.Text = word; w.CleanText(); _currentLine.Words.Add(w); }