/// <summary> /// Used to initialize the routine asynchronously /// </summary> /// <param name="languageId">Language ID</param> /// <returns>IEnumerator</returns> private IEnumerator InitializeRoutine(string languageId) { // Copies all tess data to persistent directory yield return CoroutineRunner.RunCoroutine(InitializeTessdataRoutine()); string[] languageSplit = languageId.Split('+'); // Check whether we have stored the correct tesseract language data for(int i = 0; i < languageSplit.Length; i++) { if(!m_AvailableDataSet.Contains(languageSplit[i])) { Debug.LogWarning(string.Format("Tesseract language id is not available. Please check your settings for the correct data import. [{0}]", languageSplit[i])); } } if(m_Handle != IntPtr.Zero) { // Perform initialization from the native library if(TesseractOCRBridge.Initialize(m_Handle, GetTessdataPersistentPath(), languageId) == 0) { // Copies the configuration file to persistent directory and sets the config file to use yield return CoroutineRunner.RunCoroutine(InitializeConfigFileRoutine(true)); Debug.Log("Initialize TesseractOCR Success!"); } else { Debug.Log("Initialize TesseractOCR Failed!"); } } }
/// <summary> /// Sets the page segmentation mode. /// </summary> /// <param name="mode">Segmentation Mode</param> private void SetPageSegmentationMode(SegmentationMode mode) { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { TesseractOCRBridge.SetPageSegmentationMode(m_Handle, mode); } }
/// <summary> /// Library cleanup /// </summary> public void Dispose() { if(m_Handle != IntPtr.Zero) { TesseractOCRBridge.EndTesseractHandle(m_Handle); TesseractOCRBridge.DeleteTesseractHandle(m_Handle); m_Handle = IntPtr.Zero; } }
/// <summary> /// Clears tesseract engine's persistent cache /// </summary> private void ClearPersistentCache() { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { TesseractOCRBridge.ClearPersistentCache(m_Handle); } }
/// <summary> /// Used to initialize Tesseract OCR library /// </summary> /// <param name="languageId">Language ID</param> public void Initialize(string languageId) { if(m_Handle == IntPtr.Zero) { m_Handle = TesseractOCRBridge.CreateTesseractHandle(); } CoroutineRunner.RunCoroutine(InitializeRoutine(languageId)); }
/// <summary> /// Recognizes the image if the library can parse it /// Monitor handle can be nulled /// </summary> /// <returns>0 = success, -1 = failed</returns> private int Recognize() { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { return TesseractOCRBridge.Recognize(m_Handle, IntPtr.Zero); } return -1; }
/// <summary> /// Gets the page segmentation mode /// </summary> /// <returns>Segmentation Mode</returns> private SegmentationMode GetPageSegmentationMode() { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { return TesseractOCRBridge.GetPageSegmentationMode(m_Handle); } return SegmentationMode.SINGLE_BLOCK; }
/// <summary> /// Gets the OCR Engine Mode /// </summary> /// <returns>OCR Engine Mode</returns> private OCREngineMode GetOCREngineMode() { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { return TesseractOCRBridge.GetTesseractEngineMode(m_Handle); } return OCREngineMode.DEFAULT; }
/// <summary> /// Sets the image to be used /// </summary> /// <param name="imageData">Image data buffer</param> /// <param name="width">Image Width</param> /// <param name="height">Image Height</param> public void SetImage(byte[] imageData, int width, int height) { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { Color32[] colorBuffer = TesseractOCRUtility.ImageFlipVertical(imageData, width, height); imageData = TesseractOCRUtility.Color32ToBytes(colorBuffer); TesseractOCRBridge.SetImageData(m_Handle, imageData, width, height, BYTES_PER_PIXEL, BYTES_PER_PIXEL * width); } }
/// <summary> /// Gets the version of Tesseract OCR library /// </summary> /// <returns>TesseractOCR Version</returns> public string GetVersion() { IntPtr versionPtr = TesseractOCRBridge.GetTesseractVersion(); Debug.Assert(versionPtr != IntPtr.Zero, "Version must not be nulled"); if(versionPtr != IntPtr.Zero) { return Marshal.PtrToStringAnsi(versionPtr); } return ""; }
/// <summary> /// Sets the configuration file /// </summary> /// <param name="filename">Config Filename</param> /// <param name="isDebug">Is Debug</param> public void SetConfigurationFile(string filename, bool isDebug = false) { if(m_Handle != IntPtr.Zero) { if(!isDebug) { TesseractOCRBridge.SetConfigurationFile(m_Handle, filename); } else { TesseractOCRBridge.SetDebugConfigurationFile(m_Handle, filename); } } }
/// <summary> /// Gets a type double configuration variable /// </summary> /// <param name="name">Parameter Name</param> /// <returns>Double Result</returns> private double GetConfigDoubleVariable(string name) { double result = 0; Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { if(TesseractOCRBridge.GetVariable(m_Handle, name, out result)) { return result; } } return result; }
/// <summary> /// Gets a type boolean configuration variable /// </summary> /// <param name="name">Parameter Name</param> /// <returns>Integer Result</returns> private bool GetConfigBoolVariable(string name) { bool result = false; Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { if(TesseractOCRBridge.GetVariable(m_Handle, name, out result)) { return result; } } return result; }
/// <summary> /// Sets a configuration variable /// </summary> /// <param name="name">Parameter Name</param> /// <param name="value">Parameter Value</param> /// <param name="isDebug">Is Debug</param> private void SetConfigVariable(string name, string value, bool isDebug = false) { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { if(!isDebug) { TesseractOCRBridge.SetVariable(m_Handle, name, value); } else { TesseractOCRBridge.SetDebugVariable(m_Handle, name, value); } } }
/// <summary> /// Gets a type string configuration variable /// </summary> /// <param name="name">Parameter Name</param> /// <returns>String Result</returns> private string GetConfigStringVariable(string name) { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { IntPtr stringPtr = TesseractOCRBridge.GetVariable(m_Handle, name); Debug.Assert(stringPtr != IntPtr.Zero, "Handle must not be nulled!"); if(stringPtr != IntPtr.Zero) { return Marshal.PtrToStringAnsi(stringPtr); } } return ""; }
/// <summary> /// Gets the text from the image in UTF-8 format /// </summary> /// <returns>UTF-8 Text</returns> public string GetText() { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { IntPtr textPtr = TesseractOCRBridge.GetTextData(m_Handle); Debug.Assert(textPtr != IntPtr.Zero, "Text must not be nulled!"); if(textPtr != IntPtr.Zero) { return Marshal.PtrToStringAnsi(textPtr); } } return ""; }
/// <summary> /// Gets the language id used by the library /// </summary> /// <returns>Language ID</returns> public string GetLanguageId() { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { IntPtr languagePtr = TesseractOCRBridge.GetTesseractLanguage(m_Handle); Debug.Assert(languagePtr != IntPtr.Zero, "Datapath must not be nulled!"); if(languagePtr != IntPtr.Zero) { return Marshal.PtrToStringAnsi(languagePtr); } } return ""; }
/// <summary> /// Gets the datapath where the tessdata is stored /// </summary> /// <returns>Tess data directory path</returns> public string GetDataPath() { Debug.Assert(m_Handle != IntPtr.Zero, "Handle must not be nulled!"); if(m_Handle != IntPtr.Zero) { IntPtr dataPathPtr = TesseractOCRBridge.GetTesseractDataPath(m_Handle); Debug.Assert(dataPathPtr != IntPtr.Zero, "Datapath must not be nulled!"); if(dataPathPtr != IntPtr.Zero) { return Marshal.PtrToStringAnsi(dataPathPtr); } } return ""; }
/// <summary> /// Initializes tesseract's configuration list /// </summary> private void InitializeTesseractConfiguration() { IntPtr handle = TesseractOCRBridge.CreateTesseractHandle(); if(TesseractOCRBridge.Initialize(handle, Application.persistentDataPath + "/tessdata", "eng") == 0) { TesseractOCRBridge.PrintVariablesToFile(handle, "config"); } TesseractOCRBridge.EndTesseractHandle(handle); TesseractOCRBridge.DeleteMonitorHandle(handle); string defaultConfigPath = Path.Combine(Directory.GetCurrentDirectory(), "config"); if(File.Exists(defaultConfigPath)) { string[] lines = File.ReadAllLines(defaultConfigPath); m_TesseractDefaultConfigInfo = new TesseractConfigInfo[lines.Length]; for(int i = 0; i < lines.Length; i++) { string[] split = lines[i].Split('\t'); m_TesseractDefaultConfigInfo[i] = new TesseractConfigInfo() { name = split[0], value = split[1], description = split[2] }; } IComparer<TesseractConfigInfo> comparer = new AscendingCompare(); Array.Sort<TesseractConfigInfo>(m_TesseractDefaultConfigInfo, comparer); int defaultConfigLen = m_TesseractDefaultConfigInfo.Length; m_TesseractConfigInfo = new TesseractConfigInfo[defaultConfigLen]; Array.Copy(m_TesseractDefaultConfigInfo, m_TesseractConfigInfo, defaultConfigLen); File.Delete(defaultConfigPath); } string userDefinedConfigPath = "Assets/StreamingAssets/tessdata/configs"; if(Directory.Exists(userDefinedConfigPath)) { string[] configs = Directory.GetFiles(userDefinedConfigPath, "*.", SearchOption.TopDirectoryOnly); if(configs.Length == 1) { string[] lines = File.ReadAllLines(configs[0]); for(int i = 0; i < lines.Length; i++) { string[] split = lines[i].Split('\t'); int index = Array.FindIndex<TesseractConfigInfo>(m_TesseractConfigInfo, a => a.name == split[0]); if(index >= 0) { m_TesseractConfigInfo[index].value = split[1]; } } IComparer<TesseractConfigInfo> comparer = new AscendingCompare(); Array.Sort<TesseractConfigInfo>(m_TesseractDefaultConfigInfo, comparer); } } }