private void ImageRead(object sender, RoutedEventArgs e) { string strID, strXML, notebookXml; string pageToBeChange = "SandboxPage"; Microsoft.Office.Interop.OneNote.Application app = new Microsoft.Office.Interop.OneNote.Application(); //app.OpenHierarchy(@"C:\Users\kjlue_000\Documents\OneNote Notebooks\OCRSandbox\Ocr.one", // System.String.Empty, out strID, CreateFileType.cftNone); app.GetHierarchy(null, HierarchyScope.hsPages, out notebookXml); var doc = XDocument.Parse(notebookXml); var ns = doc.Root.Name.Namespace; var pageNode = doc.Descendants(ns + "Page").Where(n => n.Attribute("name").Value == pageToBeChange).FirstOrDefault(); var existingPageId = pageNode.Attribute("ID").Value; Bitmap bitmap = ScreenCapture(); MemoryStream stream = new MemoryStream(); bitmap.Save(stream, ImageFormat.Jpeg); string fileString = Convert.ToBase64String(stream.ToArray()); String strImportXML; strImportXML = "<?xml version=\"1.0\"?>" + "<one:Page xmlns:one=\"http://schemas.microsoft.com/office/onenote/2013/onenote\" ID=\"" + existingPageId + "\">" + //{D2954871-2111-06B9-1AB9-882CD62848AA}{1}{E1833485368852652557020163191444754720811741}\">" + " <one:PageSettings RTL=\"false\" color=\"automatic\">" + " <one:PageSize>" + " <one:Automatic/>" + " </one:PageSize>" + " <one:RuleLines visible=\"false\"/>" + " </one:PageSettings>" + " <one:Title style=\"font-family:Calibri;font-size:17.0pt\" lang=\"en-US\">" + " <one:OE alignment=\"left\">" + " <one:T>" + " <![CDATA[SandboxPage]]>" + " </one:T>" + " </one:OE>" + " </one:Title>" + " <one:Outline >" + " <one:Position x=\"20\" y=\"50\"/>" + " <one:Size width=\"" + bitmap.Width + "\" height=\"" + bitmap.Height + "\" isSetByUser=\"true\"/>" + " <one:OEChildren>" + " <one:OE alignment=\"left\">" + //" <one:T>" + " <one:Image> <one:Data>" + fileString + "</one:Data></one:Image>" + //" <![CDATA[Sample Text]]>" + //" </one:T>" + " </one:OE>" + " </one:OEChildren>" + " </one:Outline>" + "</one:Page>"; app.UpdatePageContent(strImportXML); //app.SyncHierarchy(strID); //Give one note some time to ocr the texts app.GetPageContent(existingPageId, out strXML); doc = XDocument.Parse(strXML); int timeoutCounter = 0; while (doc.Descendants(ns + "OCRText").FirstOrDefault() == null) { System.Threading.Thread.Sleep(200); app.GetPageContent(existingPageId, out strXML); doc = XDocument.Parse(strXML); timeoutCounter++; if (timeoutCounter > 30) { textbox.Text = "OneNote timed out texify-ing image! try again? maybe?..."; return; } } string readText = doc.Descendants(ns + "OCRText").FirstOrDefault().Value; if (savedEnd != null) { readText = savedEnd + " " + readText; savedEnd = null; } Filters.CombineLines(ref readText); readText = readText.Replace('ยก', 'i'); Filters.PsychologyFilter(ref readText); textbox.Text = readText; //Empty Page (I.E. Cleanup) doc = XDocument.Parse(strXML); var imageXML = doc.Descendants(ns + "Outline"); foreach (var item in imageXML) { string outlineID = item.Attribute("objectID").Value; if (outlineID != null) { app.DeletePageContent(existingPageId, outlineID); } } //Minimize then read this.WindowState = System.Windows.WindowState.Minimized; this.ReadText(sender, e); }
private string GetOCRText(string base64img) { byte[] imageBytes = Convert.FromBase64String(base64img); MemoryStream stream = new MemoryStream(imageBytes, 0, imageBytes.Length); stream.Write(imageBytes, 0, imageBytes.Length); Image bitmap = Image.FromStream(stream, true); Console.WriteLine(bitmap.Width); Console.WriteLine(bitmap.Height); string pageToBeChange = "ocr"; string strNamespace = "http://schemas.microsoft.com/office/onenote/2010/onenote"; OneNote.Application onApplication = new OneNote.Application(); string notebookXml; onApplication.GetHierarchy(null, HierarchyScope.hsPages, out notebookXml); XDocument doc = XDocument.Parse(notebookXml); XNamespace ns = doc.Root.Name.Namespace; XElement pageNode = doc.Descendants(ns + "Page").FirstOrDefault(); string existingPageId = pageNode.Attribute("ID").Value; string origPageXML; onApplication.GetPageContent(existingPageId, out origPageXML, OneNote.PageInfo.piAll); XmlDocument xmlDoc = new XmlDocument(); xmlDoc.LoadXml(origPageXML); XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable); nsmgr.AddNamespace("one", strNamespace); XmlNodeList imageNodes = xmlDoc.SelectNodes("//one:Image", nsmgr); for (int i = 0; i < imageNodes.Count; i++) { if (imageNodes[i].Attributes["objectID"] != null) { onApplication.DeletePageContent(existingPageId, imageNodes[i].Attributes["objectID"].Value); } } string m_xmlImageContent = "<one:Image><one:Size width=\"{1}\" height=\"{2}\" isSetByUser=\"true\" /><one:Data>{0}</one:Data>" + //"<one:OCRData lang=\"zh-TW\">" + //"<one:OCRToken startPos=\"0\" region=\"0\" line=\"0\" x=\"5.27999973297119\" y=\"4.800000190734863\" width=\"48.72000122070312\" height=\"51.8400001525879\"/>" + //"</one:OCRData>" + "</one:Image>"; string m_xmlNewOutline = "<?xml version=\"1.0\"?><one:Page xmlns:one=\"{2}\" ID=\"{1}\"><one:Title><one:OE><one:T><![CDATA[{3}]]></one:T></one:OE></one:Title>{0}</one:Page>"; if (pageNode != null) { string imageXmlStr = string.Format(m_xmlImageContent, base64img, bitmap.Width, bitmap.Height); string pageChangesXml = string.Format(m_xmlNewOutline, new object[] { imageXmlStr, existingPageId, strNamespace, pageToBeChange }); onApplication.UpdatePageContent(pageChangesXml.ToString(), DateTime.MinValue); //onenoteApp.UpdateHierarchy(pageChangesXml); } long startTime = currentUnixTime(); string strPageContent = ""; while (strPageContent == "") { string strHierarchy; string strXML; var onApp = onApplication; // Get the hierarchy from the root to pages onApp.GetHierarchy(System.String.Empty, OneNote.HierarchyScope.hsPages, out strHierarchy); // Load the xml into a document xmlDoc = new XmlDocument(); xmlDoc.LoadXml(strHierarchy); //Create an XmlNamespaceManager for resolving namespaces. nsmgr = new XmlNamespaceManager(xmlDoc.NameTable); nsmgr.AddNamespace("one", strNamespace); // Find the page ID of the active page XmlElement xmlActivePage = (XmlElement)xmlDoc.SelectSingleNode("//one:Page", nsmgr); string strActivePageID = xmlActivePage.GetAttribute("ID"); // Get the content from the active page onApp.GetPageContent(strActivePageID, out strXML, OneNote.PageInfo.piBinaryData); xmlDoc.LoadXml(strXML); //Get the data in the T nodes string strOcrContent = ""; strPageContent = ""; XmlNodeList elemList = xmlDoc.GetElementsByTagName("one:OCRText"); for (int i = 0; i < elemList.Count; i++) { strOcrContent = elemList[i].InnerText; //Get the contents of the <![CDATA[]] block strPageContent = strPageContent + strOcrContent; } if (currentUnixTime() > startTime + 3) { break; } } if (strPageContent == "") { return("<No OCR content on this page>"); } else { return(strPageContent); } }