internal static void ProcessWords(Bitmap source, int screen, string text, Point location, PageSections pageSections) { List<List<Rectangle>> imageWords = new List<List<Rectangle>>(); string[] textWords = text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); List<Rectangle> letters = new List<Rectangle>(); bool active = false; bool inDescription = pageSections.DescriptiveText.Bounds.Contains(location); for (int j = 0, i = 0; i < pageSections.AllLetters.Count; i++) { if (pageSections.AllLetters[i].Contains(location)) { active = true; } if (active) { if (j > 0 && ImageLetters.IsNewWord(pageSections.AllLetters, i, inDescription)) { imageWords.Add(letters); letters = new List<Rectangle>(); } letters.Add(pageSections.AllLetters[i]); j++; } } imageWords.Add(letters); for (int i = 0; i < Math.Min(imageWords.Count, textWords.Length); i++) { // Test if words in text and gaps in bitmap text match up. // We need this to reject any text where letters were not isolated properly. if (imageWords[i].Count != textWords[i].Length) { continue; } for (int j = 0; j < textWords[i].Length; j++) { TeachLetters.ProcessLetter(source, screen, textWords[i][j], imageWords[i][j]); } } return; }
private void PrepareBitmaps(Bitmap bmp) { DisposeOldImages(); Bitmap binary, binarySplit; pageSections = LibExplOCR.PrepareBitmaps(bmp, out baseBmp, out binary, out binarySplit); pageSections = ImageLetters.RefinePartition(pageSections, binary); drawBmpRaw = new Bitmap(binary); drawBmpRawSplit = new Bitmap(binarySplit); drawBmp = new Bitmap(binary); LibExplOCR.AnnotatePageStructure(drawBmp, pageSections); binary.Dispose(); }
internal static void AnnotatePageStructure(Bitmap bmp, PageSections sections) { if (sections == null) { return; } using (Graphics g = Graphics.FromImage(bmp)) using (Pen greenpen = new Pen(Color.FromArgb(120, Color.Green))) using (Brush brushHatch = new HatchBrush(HatchStyle.BackwardDiagonal, Color.Purple)) using (Brush b = new SolidBrush(Color.FromArgb(120, Color.Blue))) { foreach (ExcludeSection exclude in sections.Excluded) { g.FillRectangle(brushHatch, exclude.Bounds); g.DrawRectangle(Pens.Purple, exclude.Bounds); } foreach (Line line in sections.AllLines) { g.FillRectangle(b, line.Bounds); foreach (Rectangle letter in line) { g.DrawRectangle(greenpen, letter); } } if (sections.DescriptiveText != null) { Rectangle r = sections.DescriptiveText.Bounds; r.Inflate(-3, 0); g.DrawRectangle(Pens.Orange, r); } foreach (TextLineSection tl in sections.TextLines) { g.DrawRectangle(Pens.Purple, tl.Line.Bounds); } foreach (HeadlineSection hl in sections.Headlines) { g.DrawRectangle(Pens.Khaki, hl.Line.Bounds); } foreach (TableSection table in sections.Tables) { g.DrawRectangle(Pens.Red, table.Bounds); g.DrawRectangle(Pens.Red, table.Gap); int previousItem = 0; for (int i = 1 /*sic!*/; i < table.Count; i++) { if (table.GetLineItem(i) > previousItem) { g.DrawLine(Pens.Red, table.Bounds.Left, table[i].Bounds.Top - 6, table.Gap.Left, table[i].Bounds.Top - 6); g.DrawLine(Pens.Red, table.Bounds.Right, table[i].Bounds.Top - 6, table.Gap.Right, table[i].Bounds.Top - 6); previousItem = table.GetLineItem(i); } } } /* Word display for debugging. int wordStart = 0; for (int i = 0; i < sections.AllLetters.Count; i++) { bool small = sections.DescriptiveText.Bounds.Contains(sections.AllLetters[i]); if (ImageLetters.IsNewWord(sections.AllLetters,i, small)) { r = sections.AllLetters[wordStart]; for (int j = wordStart; j < i; j++) { r = Rectangle.Union(r, sections.AllLetters[j]); } g.FillRectangle(Brushes.Yellow, r); wordStart = i; } }*/ } }
internal static PageSections RefinePartition(PageSections pageSections, Bitmap binary) { Bytemap imageBinary = new Bytemap(binary); List<Line> descriptionLines = new List<Line>(); if (pageSections.DescriptiveText == null) { return pageSections; } foreach (Line line in pageSections.DescriptiveText) { List<Rectangle> accumulate = new List<Rectangle>(); foreach (Rectangle letter in line) { Bytemap letterMask = ImageLetters.CopyRectangle(imageBinary, letter); accumulate.AddRange(ImageLetters.CleanupKerning(letterMask, false)); } descriptionLines.Add(new Line(line.Bounds, accumulate)); } TextSection descriptiveText = new TextSection(descriptionLines); // Fix kerning for all text lines - hoping for terraforming and mining resources lines. List<TextLineSection> textLines = new List<TextLineSection>(); foreach (TextLineSection tls in pageSections.TextLines) { List<Rectangle> accumulate = new List<Rectangle>(); foreach (Rectangle letter in tls.Line) { Bytemap letterMask = ImageLetters.CopyRectangle(imageBinary, letter); accumulate.AddRange(ImageLetters.CleanupKerning(letterMask, false)); } textLines.Add(new TextLineSection(new Line(tls.Line.Bounds, accumulate))); } return new PageSections(pageSections.Tables, descriptiveText, textLines, pageSections.Excluded, pageSections.Headlines); }
internal void ReadPage(Bytemap imageGray, Bytemap imageBinary, Bytemap imageBinarySplit, PageSections sections) { int descriptionLimit = -1; qualityData.Clear(); List<TransferItem> output = new List<TransferItem>(); // Get rid of those pesky powerplay tables. foreach (IPageSection section in sections.AllSections) { if (section is TextSection) { descriptionLimit = section.Bounds.Bottom; } } foreach (IPageSection section in sections.AllSections) { if (section is HeadlineSection) { TransferItem ti = ReadHeadline(section as HeadlineSection, imageGray, imageBinary); if (ti != null) { output.Add(ti); } } if (section is TextSection) { output.Add(ReadDescription(section as TextSection, imageGray, imageBinary)); } if (section is TableSection) { if (descriptionLimit > section.Bounds.Top) { continue; } output.AddRange(ReadTableSection(section as TableSection, sections, imageGray, imageBinary, imageBinarySplit)); } if (section is TextLineSection) { TransferItem ti; TextLineSection tsl = section as TextLineSection; ti = ReadTerraformingLine(tsl, sections, imageGray, imageBinary); if (ti != null) { output.Add(ti); } ti = ReadMiningReservesLine(tsl, sections, imageGray, imageBinary); if (ti != null) { output.Add(ti); } } } CustomItemProcessing(output); if (StitchPrevious) { output = MergeItems(currentItems, output); } AppendMetaInformation(output); currentItems = output.ToArray(); }
private TransferItem ReadTerraformingLine(TextLineSection tsl, PageSections sections, Bytemap imageGray, Bytemap imageBinary) { // Terraforming description is above the first table. if (sections.Tables.Count < 1) return null; if (tsl.Line.Bounds.Bottom >= sections.Tables[0].Bounds.Top) return null; string terraforming = ""; List<Rectangle> rs = new List<Rectangle>(tsl.Line); for (int i = 0; i < tsl.Line.Count; i++) { if (i > 0 && ImageLetters.IsNewWord(rs, i, true)) { terraforming += " "; } terraforming += PredictAsLetterD(imageGray, imageBinary, rs[i]); } return GuessTerraforming(terraforming); }
private TransferItem ReadMiningReservesLine(TextLineSection tsl, PageSections sections, Bytemap imageGray, Bytemap imageBinary) { bool afterTable = false; bool afterDescription = false; // Mining reserves are stated between first table and a headline OR immediately after the description.. if (sections.Tables.Count < 1) return null; afterTable = tsl.Line.Bounds.Top > sections.Tables[0].Bounds.Bottom; afterDescription = tsl.Line.Bounds.Bottom < sections.Tables[0].Bounds.Top && tsl.Line.Bounds.Bottom > sections.Tables[0].Bounds.Top - 50; if (!afterTable && !afterDescription) return null; int index = sections.AllSections.IndexOf(tsl); if (index < 0 || index + 1 >= sections.AllSections.Count) return null; if (!afterDescription && !(sections.AllSections[index + 1] is HeadlineSection)) return null; string mining = ""; List<Rectangle> rs = new List<Rectangle>(tsl.Line); for (int i = 0; i < tsl.Line.Count; i++) { if (i > 0 && ImageLetters.IsNewWord(rs, i, true)) { mining += " "; } mining += PredictAsLetterD(imageGray, imageBinary, rs[i]); } return GuessMiningReserves(mining); }
private IEnumerable<TransferItem> ReadTableSection(TableSection table, PageSections sections, Bytemap imageGray, Bytemap imageBinary, Bytemap imageBinarySplit) { List<TransferItem> tis = new List<TransferItem>(); // TODO: For now, do not allow table items that are above the // description. This would be configurable in the future. if (sections.DescriptiveText != null && table.Bounds.Bottom < sections.DescriptiveText.Bounds.Top) { return tis; } if (!HasLeftText(table) || !HasRightText(table)) { return tis; } tis.Add(new TransferItem("DELIMITER")); for (int i = 0; i < table.Count; i++) { List<Line> left; List<Line> right; GetTableItem(table, i, out left, out right); tis.Add(ReadTableItem(imageGray, imageBinary, imageBinarySplit, left, right)); i += (left.Count - 1); } return tis; }
internal void ReadPageClassic(Bytemap imageGray, Bytemap imageBinary, Bytemap imageBinarySplit, PageSections sections) { qualityData.Clear(); List<TransferItem> output = new List<TransferItem>(); if (sections.DescriptiveText != null) { output.Add(ReadDescription(sections.DescriptiveText, imageGray, imageBinary)); } foreach (HeadlineSection hl in sections.Headlines) { TransferItem ti = ReadHeadline(hl, imageGray, imageBinary); if (ti != null) { output.Add(ti); } } foreach (TableSection table in sections.Tables) { output.AddRange(ReadTableSection(table, sections, imageGray, imageBinary, imageBinarySplit)); } foreach (TextLineSection tsl in sections.TextLines) { TransferItem ti; ti = ReadTerraformingLine(tsl, sections, imageGray, imageBinary); if (ti != null) { output.Add(ti); } ti = ReadMiningReservesLine(tsl, sections, imageGray, imageBinary); if (ti != null) { output.Add(ti); } } CustomItemProcessing(output); if (StitchPrevious) { output = MergeItems(currentItems, output); } AppendMetaInformation(output); currentItems = output.ToArray(); }