private string RecognizeFromBytesDocument(byte[] data) { var image = Image.FromBytes(data); var response = googleClient.DetectDocumentText(image, context); return(response == null ? "" : response.Text); }
public void DetectDocumentText() { Image image = LoadResourceImage("DocumentText.png"); // Snippet: DetectDocumentText ImageAnnotatorClient client = ImageAnnotatorClient.Create(); TextAnnotation text = client.DetectDocumentText(image); Console.WriteLine($"Text: {text.Text}"); foreach (var page in text.Pages) { foreach (var block in page.Blocks) { string box = string.Join(" - ", block.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})")); Console.WriteLine($"Block {block.BlockType} at {box}"); foreach (var paragraph in block.Paragraphs) { box = string.Join(" - ", paragraph.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})")); Console.WriteLine($" Paragraph at {box}"); foreach (var word in paragraph.Words) { Console.WriteLine($" Word: {string.Join("", word.Symbols.Select(s => s.Text))}"); } } } } // End snippet var lines = text.Pages[0].Blocks .Select(b => b.Paragraphs[0].Words.Select(w => string.Join("", w.Symbols.Select(s => s.Text)))) .ToList(); Assert.Equal(new[] { "Sample", "text", "line", "1", }, lines[0]); Assert.Equal(new[] { "Text", "near", "the", "middle", }, lines[1]); Assert.Equal(new[] { "Text", "near", "bottom", "right", }, lines[2]); }
private void Watcher_Created(object sender, FileSystemEventArgs e) { var image = Google.Cloud.Vision.V1.Image.FromFile(e.FullPath); var text = _imageAnnotator.DetectDocumentText(image); if (text == null) { Dispatcher.BeginInvoke(new Action(() => { TextBoxDetected.Text = "Text was not detected from the image."; })); return; } var translated = _translation.TranslateText(text.Text, _language.TranslateCode); lock (_translateLock) { Dispatcher.BeginInvoke(new Action(() => { TextBoxDetected.Text = text.Text; TextBoxTranslated.Text = translated.TranslatedText; })); if (_play) { var audio = TextToSpeech(translated.TranslatedText); var player = new SoundPlayer(new MemoryStream(audio)); player.Play(); player.Dispose(); } } }
/// <summary> /// A detecção de texto do documento executa o reconhecimento óptico de caracteres.Esse recurso detecta texto de documento denso em uma imagem. /// </summary> public List <ImageProcessPage> DetectDocumentText(System.IO.Stream fileStream) { try { var image = Google.Cloud.Vision.V1.Image.FromStream(fileStream); ImageAnnotatorClient client = ImageAnnotatorClient.Create(); TextAnnotation text = client.DetectDocumentText(image); ImageProcessPage imageProcessPage; ImageProcessPageParagraphs imageProcessPageParagraphs; List <ImageProcessPage> imageProcessPageCollection = new List <ImageProcessPage>(); // para cada pagina detecta blocos de textos. foreach (var page in text.Pages) { imageProcessPage = new ImageProcessPage(); // para cada bloco de texto. foreach (var block in page.Blocks) { imageProcessPageParagraphs = new ImageProcessPageParagraphs(); foreach (var paragraph in block.Paragraphs) { // pega as coordenadas do paragrafo. foreach (var coordenate in paragraph.BoundingBox.Vertices) { imageProcessPageParagraphs.Coordenates.Add(new ImageProcessCoordenate { X = coordenate.X, // Coordenada na horizontal Y = coordenate.Y // Coordenada na vertical }); } foreach (var word in paragraph.Words) { var phrase = new StringBuilder(); phrase.Append(word.Symbols.Select(x => x.Text) + " "); imageProcessPageParagraphs.Phrase = phrase.ToString(); } } } imageProcessPageCollection.Add(imageProcessPage); } return(imageProcessPageCollection); } catch (AnnotateImageException e) { //AnnotateImageResponse response = e.Response; //return (response.Error); throw new FaultException(e.Response.ToString()); } }
public string doc_text_dection(string GVA_File_Path, string Credential_Path) { //var credential = GoogleCredential.FromFile(Credential_Path); Environment.SetEnvironmentVariable("GOOGLE_APPLICATION_CREDENTIALS", "Your_Json_File_Name.json"); //Load the image file into memory var image = Image.FromFile(GVA_File_Path); // Instantiates a client ImageAnnotatorClient client = ImageAnnotatorClient.Create(); TextAnnotation text = client.DetectDocumentText(image); //Console.WriteLine($"Text: {text.Text}"); return($"Text: {text.Text}"); //return "test image..."; }
private void ExecuteProcessCommand(object obj) { var image = Google.Cloud.Vision.V1.Image.FromFile(Image); var documentText = _client.DetectDocumentText(image, _imageContext); var detection = _client.DetectWebInformation(image, _imageContext); var text = _client.DetectText(image, _imageContext); var label = detection.BestGuessLabels.FirstOrDefault().Label.ToUpper(); Detector mainDetector = new Detector(documentText.Text.ToUpper(), label, detection, text); var document = mainDetector.Execute(); DocumentType = document.Type.ToString(); DocumentData = document.ToString(); OnPropertyChanged("DocumentData"); OnPropertyChanged("DocumentType"); }
public int Executar(MemoryStream arquivo) { var documento = arquivo.ToArray(); //Image imagem = await Image.FromFileAsync(arquivo); Image imagem = Image.FromBytes(documento); ImageAnnotatorClient client = ImageAnnotatorClient.Create(); TextAnnotation text = client.DetectDocumentText(imagem); var conteudo = text.Text.Replace("\n", " "); var remocaoDosEspacos = conteudo.Split(' '); int total = 0; foreach (var item in remocaoDosEspacos) { total += item.Length; } return(total); }
private void OcrImageData(string fileName) { sysDraw.Image img = new sysDraw.Bitmap(fileName); float bairitu = 1.8f; this.parentViewModel.CanvasWidth.Value = (int)(img.Width * bairitu); this.parentViewModel.CanvasHeight.Value = (int)(img.Height * bairitu); ImageAnnotatorClient client = ImageAnnotatorClient.Create(); var imageForGoogle = Image.FromFile(fileName); TextAnnotation response = client.DetectDocumentText(imageForGoogle); foreach (var page in response.Pages) { foreach (var block in page.Blocks) { string box = string.Join(" - ", block.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})")); Debug.Print($"Block {block.BlockType} at {box}"); foreach (var paragraph in block.Paragraphs) { box = string.Join(" - ", paragraph.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})")); Debug.Print($" Paragraph at {box}"); string text = ""; foreach (var word in paragraph.Words) { Debug.Print($" Word: {string.Join("", word.Symbols.Select(s => s.Text))}"); text += string.Join("", word.Symbols.Select(s => s.Text)); } int top = paragraph.BoundingBox.Vertices[0].Y; int left = paragraph.BoundingBox.Vertices[0].X; int height = paragraph.BoundingBox.Vertices[2].Y - top; int width = paragraph.BoundingBox.Vertices[2].X - left; this.boundingTextList.Add(new BoundingText(text, left, top, width, height)); } } } Write2FileOcrResult(this.boundingTextList, fileName); }
static void GoogleVisionRequest(string path) { ImageAnnotatorClientBuilder builder = new ImageAnnotatorClientBuilder { CredentialsPath = @"C:\Users\d4gei\Workspace\ScanAndHoardData\scanandhoard-11700d373751.json" }; ImageAnnotatorClient client = builder.Build(); var image = Image.FromFile(path); TextAnnotation text = client.DetectDocumentText(image); string fileName = path + ".txt"; using (StreamWriter writer = new StreamWriter(fileName)) { writer.Write(text.Text); } Console.WriteLine($"Text: {text.Text}"); foreach (var page in text.Pages) { foreach (var block in page.Blocks) { string box = string.Join(" - ", block.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})")); Console.WriteLine($"Block {block.BlockType} at {box}"); foreach (var paragraph in block.Paragraphs) { box = string.Join(" - ", paragraph.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})")); Console.WriteLine($" Paragraph at {box}"); foreach (var word in paragraph.Words) { Console.WriteLine($" Word: {string.Join("", word.Symbols.Select(s => s.Text))}"); } } } } }
static async Task Main(string[] args) { try { if (args.Length != 1) { Console.WriteLine("Missing image"); return; } var imageFile = args[0]; if (!File.Exists(imageFile)) { Console.WriteLine("Image file doesn't exist"); return; } await CodAuthenticatorAsync(); Console.WriteLine("Logged in"); var users = new List <UserModel>(); ImageAnnotatorClient client = ImageAnnotatorClient.Create(); var image = Image.FromFile(imageFile); TextAnnotation text = client.DetectDocumentText(image); foreach (var page in text.Pages) { foreach (var block in page.Blocks) { foreach (var paragraph in block.Paragraphs) { // Realistically, if this block is actually one of the teams, you won't have less than 2 'words' here // One word will always be the rank (or should be) and another would be the username. // Clan tags would also show up here, but they're irrelevant and will be ignored. if (paragraph.Words.Count < 2) { continue; } bool inClanTag = false; DetectedBreak lastBreakingChar = default; foreach (var word in paragraph.Words) { var fullWord = string.Join("", word.Symbols.Select(s => s.Text)).Trim(); var lastUser = users.LastOrDefault(); switch (fullWord) { case "[": if (inClanTag) { throw new Exception("Found clan tag opening bracket before a previous clan tag was closed"); } inClanTag = true; continue; case "]": if (!inClanTag) { throw new Exception("Found clan tag closing bracket while not in clan tag"); } inClanTag = false; continue; } if (inClanTag) { //Console.WriteLine($"Found {fullWord} in clan tag"); continue; } if (short.TryParse(fullWord, out var level)) { if (level < 1 || level > 155) { Console.WriteLine($"Level {level} found but not in range."); continue; } if ((lastBreakingChar != default && lastBreakingChar.Type == DetectedBreak.Types.BreakType.EolSureSpace) || lastUser == default || lastUser.IsComplete()) { users.Add(new UserModel(level, block)); Console.WriteLine($"Added new possible user with level {level} to list"); } else { Console.WriteLine($"Found possible level {level} but no EOL found and lastUser exists"); } } // This regex might need to be more strict. // Your Activision ID is what is displayed in-game for Modern Warfare. Display names must be between 2 and 16 characters with no special characters. Unicode characters are supported. else if (new Regex(@"^(\w{2,16})$").IsMatch(fullWord)) { if (lastUser != default && lastUser.IsComplete() && lastBreakingChar != default && lastBreakingChar.Type == DetectedBreak.Types.BreakType.Space && block == lastUser.Block) { Console.WriteLine($"Found another part of username for {lastUser.Username} -> {fullWord}"); lastUser.AppendToUsername(fullWord); } else if (lastUser == default || lastUser.IsComplete()) { Console.WriteLine($"Found matching username pattern '{fullWord}' but no rank was found before"); continue; } else { lastUser.SetUsername(fullWord); Console.WriteLine($"\tSet username to {fullWord}"); } } lastBreakingChar = word.Symbols.LastOrDefault()?.Property?.DetectedBreak; }
/// <summary> /// Batch processing method for performing OCR on all pages of all /// documents provided and writes the result to the database. /// </summary> /// <param name="filePaths">Array of paths to the pdf files to be added.</param> /// <param name="overrideExisting">Indicate whether or not to override existing /// data for any of the documents (if there is any).</param> /// <returns>An awaitable Task object.</returns> public async Task WriteToDBAsync(string[] filePaths, bool overrideExisting) { ValidateVision(); var toProcess = ProcessPaths(filePaths, overrideExisting); if (toProcess.Length > 0) { var tempPath = Path.GetTempPath(); var pngBlock = new BufferBlock <Tuple <string, string[]> >(); var dbBlock = new BufferBlock <Tuple <Document, List <PageText> > >(); var post = Task.Run(() => { foreach (var path in toProcess) { pngBlock.Post(new Tuple <string, string[]> (Path.GetFileNameWithoutExtension(path).Trim(), GetPngImage(path, tempPath))); } pngBlock.Complete(); }); var receiveThenPost = Task.Run(() => { while (!pngBlock.Completion.IsCompleted) { var tuple = pngBlock.Receive(); var document = new Document { FileName = tuple.Item1.Trim() }; var docText = new List <PageText>(); Parallel.For(0, tuple.Item2.Length, pageNum => { var image = Image.FromFile(tuple.Item2[pageNum]); File.Delete(tuple.Item2[pageNum]); //Cleanup var response = _client.DetectDocumentText(image); docText.AddRange(ProcessResponse(response, document.FileName, pageNum)); }); dbBlock.Post(new Tuple <Document, List <PageText> >(document, docText)); } dbBlock.Complete(); }); var receive = Task.Run(() => { while (!dbBlock.Completion.IsCompleted) { var tuple = dbBlock.Receive(); using (var context = new DataModel(_connectionString)) { context.Documents.Add(tuple.Item1); context.PageText.AddRange(tuple.Item2); context.SaveChanges(); } } }); await Task.WhenAll(post, receiveThenPost, receive); } else { return; } }
public void ProcessImage(Bitmap bitmap, IExtractionResultBuilder builder) { Image image = ConvertBitmapToGoogleImage(bitmap); //MessageBox.Show("Here"); ImageAnnotatorClient client = ImageAnnotatorClient.Create(); TextAnnotation response = client.DetectDocumentText(image); //MessageBox.Show(response.Text); if (response == null) { return; } //MessageBox.Show(response.Text); foreach (Page page in response.Pages) { foreach (Block block in page.Blocks) { foreach (Paragraph paragraph in block.Paragraphs) { foreach (Word word in paragraph.Words) { foreach (Symbol symbol in word.Symbols) { Character s = new Character(); s.Text = symbol.Text; s.Confidence = symbol.Confidence; s.Bound[0] = new Vertices(); s.Bound[0].X = symbol.BoundingBox.Vertices[0].X; s.Bound[0].Y = symbol.BoundingBox.Vertices[0].Y; s.Bound[1] = new Vertices(); s.Bound[1].X = symbol.BoundingBox.Vertices[1].X; s.Bound[1].Y = symbol.BoundingBox.Vertices[1].Y; s.Bound[2] = new Vertices(); s.Bound[2].X = symbol.BoundingBox.Vertices[2].X; s.Bound[2].Y = symbol.BoundingBox.Vertices[2].Y; s.Bound[3] = new Vertices(); s.Bound[3].X = symbol.BoundingBox.Vertices[3].X; s.Bound[3].Y = symbol.BoundingBox.Vertices[3].Y; Rectangle bounds = new Rectangle(s.Bound[0].X, s.Bound[0].Y, s.Bound[1].X - s.Bound[0].X, s.Bound[3].Y - s.Bound[0].Y); builder.AddNewCharacter(s.Text, (int)(Math.Round(s.Confidence * 100)), bounds); if (symbol.Property?.DetectedBreak != null) { switch (symbol.Property.DetectedBreak.Type) { case TextAnnotation.Types.DetectedBreak.Types.BreakType.EolSureSpace: builder.AddNewLine(); break; case TextAnnotation.Types.DetectedBreak.Types.BreakType.Hyphen: break; case TextAnnotation.Types.DetectedBreak.Types.BreakType.LineBreak: builder.AddNewLine(); break; case TextAnnotation.Types.DetectedBreak.Types.BreakType.Space: builder.AddWhiteSpace(); break; case TextAnnotation.Types.DetectedBreak.Types.BreakType.SureSpace: builder.AddWhiteSpace(); break; case TextAnnotation.Types.DetectedBreak.Types.BreakType.Unknown: builder.AddWhiteSpace(); break; } } } } } } } }