public IEnumerable<RecognizedSegment> Recognize(Stream document, IProgressReporter progress) { var imageData = new ImageData(document); var boards = imageData.DefineIteratedBoards(); var heuristics = boards.Segment().ToList(); progress.Maximum = heuristics.Count * 1000; foreach (var segment in heuristics) { var whitespaceResults = trainingData.PerformWhitespaceLookup(segment, progress.ScaledChildOperation(500)).LastOrDefault(r => r.Certainty > 10); if (whitespaceResults != null && whitespaceResults.Text == "AllLabels") { var match = trainingData.PerformLookup(segment, progress.ScaledChildOperation(500)).LastOrDefault(r => r.Certainty > 10); if (match != null) yield return match; } else progress.Progress += 500; //Add the progress that would have been used by the character recognition } }
public void TestAlgorithm() { string renderText = "أدخل نص هنا لترجمة"; //string renderText = "أد"; //string renderText = "ﺀﺁﺂﺃﺄﺅﺆﺇﺈﺉﺊﺋﺌﺍﺎﺏﺐﺑﺒﺓﺔﺕﺖﺗﺘﺙﺚﺛﺜﺝﺞﺟﺠﺡﺢﺣﺤﺥﺦﺧﺨﺩﺪﺫﺬﺭﺮﺯﺰﺱﺲﺳﺴﺵﺶﺷﺸﺹﺺﺻﺼﺽﺾﺿﻀﻁﻂﻃﻄﻅﻆﻇﻈﻉﻊﻋﻌﻍﻎﻏﻐﻑﻒﻓﻔﻕﻖﻗﻘﻙﻚﻛﻜﻝﻞﻟﻠﻡﻢﻣﻤﻥﻦﻧﻨﻩﻪﻫﻬﻭﻮﻯﻰﻱﻲﻳﻴﻵﻶﻷﻸﻹﻺﻻﻼ"; //string renderText = "ﺬﻤﺈﺗﻹﻕﻐﻘﻝﺺﻔﺒﺭﻧﻂﺓﻫﻲﺸﺪﺊﻭﻜﺶﻆﻸﻒﺆﺄﻤﻴﺣﻄﻗﻖﺹﺶﺅﺆﺔﻣﻕﻨﺝﻓﻂﺭﺡﺀﻷﺣﺜﻷﻞﺶﺼﺈﻎﻍﺁﺂﻊﻓﻶﻃﺰﻊﻒﺑﺈﻙﻦﻬﺣﻃﺲﺄﺿﻠﺄﻊﺰﺆﺊﻁﻗﺨﻣﺡﺚﺱﻤﺴﺄﺑﺙﺂﻮﻖﺢﻴﻘﺐﻴﺞﺹﺬﻔﺛﺄﺷﻅﻫﻈﺳﻎﻃﻉﺵﻨﺇﻤﺱﻴﻔﻱﺲﻨﺑﺩﺯﺒﻥﺽﺍﻯﺠﺏﻨﺇﺫﻑﻈﺜﻟﻨﻶﻨﻎﻀﻒﺵﺹﻫﺚﺏﺧﺙﺟﺪﺇﻁﻂﻮﻑﺤﻝﺭﺻﻁﺮﺴﻝﺭﺽﺳﺛﺔﺔﻩﻸﻆﺙﻎﻓﻶﻖﻷﺣﻺﻇﺇﻬﺾﺀﻦﺎﻖﻈﺻﺋﻈﺭﺌﺑﺞﻕﺋﺮﻤﻱﺒﺅﺳﺮﺽﺨﻱﺛﻗﻊﻣﺊﺽﻶﺧﻄﺞﺭﻔﺤﻁﻉﻕﻝﺯﺘﺌﺼﺴﺡﻊﻈﺼﺉﺵﺁﺹﺏﺿﺾﺚﺻﻭﻭﺥﺽﻬﺓﻧﻗﺷﻚﻗﺿﺯﻅﺬﻒﻼﺥﺛﺴﻣﺶﻼﺚﻋﺳﻁﺥﺊﺎﻫﺕﺊﺆﻙﻥﺸﻯﺨﻶﻒﻚﻧﻭﻮﻹﻗ"; var output = new DrawingGroup(); var format = new BasicTextParagraphProperties("Times New Roman", 14, FlowDirection.LeftToRight); var words = BoundedWord.GetWords(renderText, Measurer.MeasureLines(renderText, 200, format, output)).ToList(); //using (var stream = output.ToBitmap().CreateStream()) { using (var stream = File.OpenRead(@"C:\Users\SSL\Temp\Arabic.png")) { //using (var targetStream = File.Create(@"C:\Users\SSL\Temp\Arabic-Console.png")) // stream.CopyTo(targetStream); stream.Position = 0; var imageData = new ImageData(stream); stream.Position = 0; //System.Drawing.Bitmap b = boards.Boards.First().Matrix.ExtractRectangularContentArea(segment.Bounds).ConvertDoubleArrayToBitmap(System.Drawing.Color.White); //whitespaceResults.Last().Log(boards.Boards.First().Matrix.ExtractRectangularContentArea(segment.Bounds).ConvertDoubleArrayToBitmap(System.Drawing.Color.White)); //characterResults.Last().Log(boards.Boards.First().Matrix.ExtractRectangularContentArea(segment.Bounds).ConvertDoubleArrayToBitmap(System.Drawing.Color.White)); //Debug.Print(characterResults.Last().Text + " " + characterResults.Last().Certainty.ToString()); var trainingData = new MutableReferenceSet(); trainingData.ReadFrom(trainingFolder); var searcher = new ReferenceSearcher(trainingData); var recognizer = new OriginalDocumentRecognizer(searcher); var reporter = new ConsoleProgressReporter(false); var results = recognizer.Recognize(stream, reporter).ToList().AsReadOnly(); var outputString = new StreamReader(OutputRenderer.PlainText.Convert(stream, results)).ReadToEnd(); Debug.Print(outputString); } }
//TODO: Test for white space first public void TrainAlgorithm() { string renderText = "أدخل نص هنا لترجمة"; //string renderText = "ﻂﺎﺉﻳﺕﺍﻈﺕﻊﺹﻫﻑﻈﻂﻟﺐﺻﺔﺛﺅﺡﺽﻛﻟﺊﺒﻖﺼﻞﺫﻳﺀﺎﻠﻜﻯﺛﺠﻔﺡﺜﺠﻹﺡﺕﺨﺜﻛﻃﺜﺗﺯﺘﺈﻞﺄﺰﺦﻨﺂﺳﺋﺳﺟﻃﻪﻃﻤﻣﻭﻱﺚﺽﻷﻦﺁﻁﺹﻊﻞﻲﻷﻶﻁﺓﻃﻥﺅﺂﻗﺿﻲﺵﻑﻪﺐﻩﻚﻛﺧﻢﺵﻵﻓﺨﻹﻯﺝﺟﻝﺿﻳﻘﺉﺌﺸﺒﻮﺋﻼﺅﺽﻢﺋﺭﻻﻢﻨﺍﻕﺔﻤﻛﺢﻹﺽﺂﺣﺙﺖﻑﻇﻣﺢﺘﻈﺙﺫﻄﺱﻺﻷﻎﻛﺢﺥﻏﻩﺥﺩﺗﺇﺐﻆﻈﻘﺘﺶﺍﻌﺶﻬﻕﻱﺫﺺﻖﻀﻰﻢﻡﺶﺣﻘﻗﺙﺖﻼﻼﻔﻫﺕﻺﻬﻼﻉﺶﺜﻖﺼﺰﻠﺞﻵﻸﺎﺢﻙﺯﻤﻋﺏﻵﺿﻄﻝﻡﺵﻬﺯﻂﺘﺶﺵﻞﺠﻮﻥﻱﻟﺄﺺﻦﺳﺲﺁﺘﺪﻜﻉﻴﺹﻍﺘﺉﻛﺜﺻﺧﺟﻥﺁﺭﺧﻷﺗﺪﻥﺗﻗﺜﺹﻥﻍﻠﺐﺥﻂﺍﻃﻅﺶﻕﺣﺸﺈﻇﺞﺬﺂﺃﺳﺽﻒﻶﺃﺌﺿﺸﺚﻓﺯﺒﺄﺬﺺﻐﺀﺴﻵﺶﻗﺭﻡﻗﺮﺨﺊﻫﻯﻌﺌﺹﺕﻔﻦﻸﺒﻓﻝﺄﻢﺉﻯﻬﺕﺤﻐﻳﻺﺵﻇﻦﺴﻉﺨﺅﻙﻷﻇﻗﺂﻐﻆﻊﻇﻲﻺﺭﻃﺾﺽﺩﻓﻰﻐﻎﺺﻱﻁﺊﻕﺓﻭﺺﺅﺢﺄﻈﺫﻠﺟﺘﺌﺆﺂﺑﺿﺤﺤﺟﺦﺣﺰﺼﻴﺞﺒﻀﻶﻗﻇﻤﺶﺼﺂﻫﻟﺌﻅﺥﻮﺱﺁﻟﺟﺥﺝﻍﻮﺾﺎﺖﻌﺡﻠﺱﻠﺱﻂﻣﺏﻁﻏﻐﺘﺷﻞﻌﺃﺓﺮﺳﻜﻼﺮﻲﺡﻍﺝﻎﺀﻂﻎﺷﻉﺱﺘﺔﻙﻍﺊﺮﻆﻞﺕﺭﻞﺭﻇﻤﻃﻍﻉﻛﺏﻗﺶﻍﻯﻫﺐﺒﺔﻱﻢﻊﻯﺗﺇﻣﺞﻉﻰﺳﺉﺀﻩﻺﻦﺁﺓﻰﺆﺵﺊﺰﻵﻰﺨﻮﻷﻈﺢﻪﺽﻴﺚﻒﻸﻈﺺﻆﺦﻠﻅﻶﻚﺖﻑﺏﻫﺹﻣﻮﻗﺜﺌﻛﻧﺲﻒﻻﻬﺕﻨﻓﺧﺅﻳﺑﻣﺗﺁﺞﻄﻉﻉﺢﻬﺐﻒﻞﺞﺘﺃﺾﻡﺱﺒﺠﻨﺻﻨﻅﻲﺻﻗﺑﻵﻯﻏﻔﺟﻀﺩﻓﺷﻄﺩﺎﻐﻦﻬﻖﻝﻰﻜﻓﺒﺪﻮﻳﻰﻓﺦﺧﺬﺸﺨﺈﻯﺸﺪﻺﺾﻺﺾﻛﻬﺬﺆﺝﺇﺄﻍﺁﻋﺑﻥﺂﻜﺃﻻﻊﺀﻘﻡﻉﺚﺁﺧﻻﺛﺡﺛﺗﺼﻯﺘﻠﺱﺁﻤﻣﺣﻑﻬﻫﺆﺤﻘﺚﻐﺦﺩﻬﺺﻜﻺﻢﺘﻲﻀﻫﺄﺇﺁﻢﻦﺵﻁﻙﻶﺄﻍﻚﺛﺘﺧﺫﻨﻳﻃﻲﺥﺌﺫﻱﻵﺫﻚﻜﻲﺲﻇﺼﺽﻵﻞﺛﻐﺊﺅﻍﻠﻻﺡﻌﻖﻋﻐﺡﺵﻎﻷﺢﺮﻋﻅﻴﻙﺌﻩﺺﻘﻢﻤﻃﺹﺐﺜﺣﺷﺋﻘﻞﻭﻳﺒﺦﻨﻜﺛﺪﻘﺸﺧﺣﺸﺥﺎﻯﺰﻦﺿﻃﺝﻈﻱﻺﺗﻚﺒﻅﺯﺭﻧﺩﻳﺘﺲﺷﺸﺞﺌﻺﻭﺿﻞﺠﺺﻁﺋﺔﺂﺩﻯﻍﻉﺕﻀﺤﺂﺔﺏﺍﻊﻓﻷﺪﻍﻻﺭﺟﻰﻓﺷﺒﻚﻂﻒﻛﻦﻑﻀﻓﻏﻺﻂﺅﺺﺚﻝﻞﻕﺍﺵﺣﺎﻼﺰﺚﻄﺴﻏﺼﺓﻨﺡﺫﻯﻝﻰﺇﺿﻢﺻﺮﻑﺹﺻﻌﺐﻲﻬﺳﺝﻌﻇﻢﺞﺓﺃﻛﺒﺸﺗﺲﺁﺃﺿﻣﻘﺖﺒﻚﺦﻖﺞﻯﻏﺇﻏﻒﻁﻠﺉﻎﻄﺛﺞﻄﺡﺼﺍﻜﻚﺕﻲﺇﻊﺈﺬﻍﻷﻔﻱﻷﻠﺒﺌﻄﺭﻔﺓﺖﻀﺮﺙﺮﺯﻅﻫﻢﻢﻷﻂﺜﺉﺳﺀﺑﻡﺗﺥﻆﻘﺁﻗﻩﻫﺏﻫﺲﺎﻗﺗﻉﺨﻳﺛﺫﻸﻃﻕﺅﺚﻆﻨﻼﺑﻔﻭﺬﺑﻸﻠﺖﺯﻉﺻﺾﻓﺭﻀﺟﻖﺸﻔﺊﻠﻶﻰﺒﺗﻑﺖﺝﺿﺟﺬﺑﺖﻗﻘﺖﺞﺾ"; var output = new DrawingGroup(); var format = new BasicTextParagraphProperties("Times New Roman", 14, FlowDirection.LeftToRight); var words = BoundedWord.GetWords(renderText, Measurer.MeasureLines(renderText, 200, format, output)).ToList(); var stream = output.ToBitmap().CreateStream(); var imageData = new ImageData(stream); imageData.SaveFile("RenderedFile1.png"); stream.Close(); Directory.CreateDirectory(trainingFolder); var boards = imageData.DefineIteratedBoards(); var trainingData = new MutableReferenceSet(); //ADD TO EXISTING TRAINING DATA LIBRARY: //trainingData.ReadFrom(trainingFolder); var characters = words.SelectMany(w => w.Characters).ToList(); for (int i = 0; i < characters.Count(); i++) { var ch1 = characters[i]; var letterHeuristics = boards.GetLetterHeuristics(ch1); if (i < characters.Count() - 1) { var ch2 = characters[i + 1]; var whitespaceHeuristics = boards.GetSpaceHeuristics(ch1, ch2); trainingData.AddHeuristics(whitespaceHeuristics); } if (letterHeuristics != null) trainingData.AddHeuristics(letterHeuristics); } ImageUtilities.Utilities.TrainingLog.ToString(); trainingData.WriteTo(trainingFolder); }