public AsyncOCR(Image image, OCRCallback callback = null) { this.image = image.Copy(); this.callback = callback; if (image == null) { _bigBox = null; _smallBoxes = new OCRBox[0]; _timeStamp = "[empty]"; isDone = true; this.callback?.Invoke(this); } else { task = Task.Run(DoOCR); } }
// Generate all the phraserects for a certain enumerable of OCRBoxes private static IEnumerable <Rectangle> AutoPhraseRects(IEnumerable <OCRBox> boxes) { // Put all smallboxes into a queue, left to right Queue <OCRBox> boxQueue = new Queue <OCRBox>(boxes.OrderBy(box => box.rect.Left)); // We're planning to cut items out of this queue as we go, // so I don't think we can safely foreach over it while (boxQueue.Count > 0) { // Pick the current leftmost box, and start with its rect exactly. OCRBox firstBox = boxQueue.Dequeue(); Rectangle growingRect = firstBox.rect; int charWidth = firstBox.CharWidth(); List <OCRBox> phraseCandidates = new List <OCRBox> { firstBox }; // While there are any boxes in the queue that are aligned with my current rect: while (boxQueue.Any(other => growingRect.CouldBeNextRect(other.rect, charWidth))) { // Pick out the leftmost aligned box OCRBox next = boxQueue.First(other => growingRect.CouldBeNextRect(other.rect, charWidth)); // Add it to our phrase phraseCandidates.Add(next); // Remake the queue without any elements from the phrase // (this is the only way to delete from the middle of a queue) boxQueue = new Queue <OCRBox>(boxQueue.Except(phraseCandidates)); // Expand the rect to include the new smallbox growingRect = growingRect.Include(next.rect); } yield return(growingRect); } }
private async Task DoOCR() { try { string Identifer = Utility.RandomHex(); DebugLog.Log("Making MS OCR request [" + Identifer + "]"); // No keyfile to check - maybe somehow validate the API key? // Wait for rate limiter before starting the clock AsyncStatic.rate.Check(); Stopwatch sw = new Stopwatch(); // Dump the provided image to a memory stream var ms = new MemoryStream(); image.Save(ms, ImageFormat.Png); // Dump the stream out to a byte array byte[] byteData = ms.ToArray(); // Make our connection client HttpClient client = new HttpClient(); // Build the OCR request HttpRequestMessage request = new HttpRequestMessage(); request.RequestUri = new Uri(Properties.Settings.Default.microsoftOcrEndpoint + "vision/v2.1/ocr"); request.Method = HttpMethod.Post; request.Headers.Add( "Ocp-Apim-Subscription-Key", Properties.Settings.Default.microsoftOcrApiKey); request.Content = new ByteArrayContent(byteData); request.Content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); // Ask for OCR sw.Start(); HttpResponseMessage response = await client.SendAsync(request); var json = JToken.Parse(await response.Content.ReadAsStringAsync()); sw.Stop(); // MS data does not contain a meaningful bigbox, just fill something in here _bigBox = new OCRBox( (string)json["regions"][0]["boundingBox"], "Azure doesn't provide this data."); // MS smallbox data, meanwhile, is organized in interesting ways _smallBoxes = json["regions"] .SelectMany(region => region["lines"]) .SelectMany(line => line["words"]) .Select(word => new OCRBox((string)word["boundingBox"], (string)word["text"])) .ToArray(); _timeStamp = string.Format("{0:00}:{1:00}:{2:00}.{3:000}", sw.Elapsed.Hours, sw.Elapsed.Minutes, sw.Elapsed.Seconds, sw.Elapsed.Milliseconds); isDone = true; callback?.Invoke(this); DebugLog.Log("Finished MS OCR request [" + Identifer + "]"); } catch (Exception e) { Console.WriteLine("\n" + e.Message); } }
// Guess the width of the characters in the box, based on total width and character count public static int CharWidth(this OCRBox box) => box.rect.Width / box.text.Length;
private async Task DoOCR() { try { string Identifer = Utility.RandomHex(); DebugLog.Log("Making Google OCR request [" + Identifer + "]"); if (!File.Exists(Properties.Settings.Default.googleApiKeyPath)) { throw new FileNotFoundException("Keyfile not present at " + Properties.Settings.Default.googleApiKeyPath); } // Wait for rate limiter before starting the clock AsyncStatic.rate.Check(); Stopwatch sw = new Stopwatch(); // Dump the provided image to a memory stream var stream = new MemoryStream(); image.Save(stream, ImageFormat.Png); stream.Position = 0; // Load the stream as a gimage GImage gimage = GImage.FromStream(stream); // Make our connection client ImageAnnotatorClient client = new ImageAnnotatorClientBuilder { CredentialsPath = Properties.Settings.Default.googleApiKeyPath, }.Build(); // Ask for OCR sw.Start(); var response = await client.DetectTextAsync(gimage); sw.Stop(); // If we didn't get anything back if (response.Count == 0) { _bigBox = OCRBox.ErrorBigBox(); _smallBoxes = new OCRBox[] { }; } else { // First result is the big box _bigBox = new OCRBox(response.First()); // Following results are the small boxes _smallBoxes = response.Skip(1) .Select(ann => new OCRBox(ann)) .ToArray(); } _timeStamp = string.Format("{0:00}:{1:00}:{2:00}.{3:000}", sw.Elapsed.Hours, sw.Elapsed.Minutes, sw.Elapsed.Seconds, sw.Elapsed.Milliseconds); isDone = true; callback?.Invoke(this); DebugLog.Log("Finished Google OCR request [" + Identifer + "]"); } catch (Grpc.Core.RpcException e) { string url = ""; // Define a regular expression for repeated words. Regex rx = new Regex(@"(http\S*)", RegexOptions.Compiled | RegexOptions.IgnoreCase); // Find matches. MatchCollection matches = rx.Matches(e.Message); if (matches.Count > 0) { url = matches[0].Groups[0].Value; } frmBabel.LogWorkerError(e.Message, url); } catch (Exception e) { frmBabel.LogWorkerError(e.Message, ""); } }