Esempio n. 1
0
        private void ImageRead(object sender, RoutedEventArgs e)
        {
            string strID, strXML, notebookXml;
            string pageToBeChange = "SandboxPage";

            Microsoft.Office.Interop.OneNote.Application app = new Microsoft.Office.Interop.OneNote.Application();
            //app.OpenHierarchy(@"C:\Users\kjlue_000\Documents\OneNote Notebooks\OCRSandbox\Ocr.one",
            //    System.String.Empty, out strID, CreateFileType.cftNone);
            app.GetHierarchy(null, HierarchyScope.hsPages, out notebookXml);
            var doc            = XDocument.Parse(notebookXml);
            var ns             = doc.Root.Name.Namespace;
            var pageNode       = doc.Descendants(ns + "Page").Where(n => n.Attribute("name").Value == pageToBeChange).FirstOrDefault();
            var existingPageId = pageNode.Attribute("ID").Value;

            Bitmap bitmap = ScreenCapture();

            MemoryStream stream = new MemoryStream();

            bitmap.Save(stream, ImageFormat.Jpeg);
            string fileString = Convert.ToBase64String(stream.ToArray());

            String strImportXML;

            strImportXML = "<?xml version=\"1.0\"?>" +
                           "<one:Page xmlns:one=\"http://schemas.microsoft.com/office/onenote/2013/onenote\" ID=\"" + existingPageId + "\">" + //{D2954871-2111-06B9-1AB9-882CD62848AA}{1}{E1833485368852652557020163191444754720811741}\">" +
                           "    <one:PageSettings RTL=\"false\" color=\"automatic\">" +
                           "        <one:PageSize>" +
                           "            <one:Automatic/>" +
                           "        </one:PageSize>" +
                           "        <one:RuleLines visible=\"false\"/>" +
                           "    </one:PageSettings>" +
                           "    <one:Title style=\"font-family:Calibri;font-size:17.0pt\" lang=\"en-US\">" +
                           "        <one:OE alignment=\"left\">" +
                           "            <one:T>" +
                           "                <![CDATA[SandboxPage]]>" +
                           "            </one:T>" +
                           "        </one:OE>" +
                           "    </one:Title>" +
                           "    <one:Outline >" +
                           "        <one:Position x=\"20\" y=\"50\"/>" +
                           "        <one:Size width=\"" + bitmap.Width + "\" height=\"" + bitmap.Height + "\"  isSetByUser=\"true\"/>" +
                           "        <one:OEChildren>" +
                           "            <one:OE alignment=\"left\">" +
                           //"                <one:T>" +
                           "    <one:Image> <one:Data>" + fileString + "</one:Data></one:Image>" +
                           //"                    <![CDATA[Sample Text]]>" +
                           //"                </one:T>" +
                           "            </one:OE>" +
                           "        </one:OEChildren>" +
                           "    </one:Outline>" +
                           "</one:Page>";
            app.UpdatePageContent(strImportXML);

            //app.SyncHierarchy(strID);

            //Give one note some time to ocr the texts
            app.GetPageContent(existingPageId, out strXML);
            doc = XDocument.Parse(strXML);
            int timeoutCounter = 0;

            while (doc.Descendants(ns + "OCRText").FirstOrDefault() == null)
            {
                System.Threading.Thread.Sleep(200);
                app.GetPageContent(existingPageId, out strXML);
                doc = XDocument.Parse(strXML);
                timeoutCounter++;
                if (timeoutCounter > 30)
                {
                    textbox.Text = "OneNote timed out texify-ing image! try again? maybe?...";
                    return;
                }
            }
            string readText = doc.Descendants(ns + "OCRText").FirstOrDefault().Value;

            if (savedEnd != null)
            {
                readText = savedEnd + " " + readText;
                savedEnd = null;
            }

            Filters.CombineLines(ref readText);
            readText = readText.Replace('¡', 'i');

            Filters.PsychologyFilter(ref readText);

            textbox.Text = readText;

            //Empty Page (I.E. Cleanup)
            doc = XDocument.Parse(strXML);
            var imageXML = doc.Descendants(ns + "Outline");

            foreach (var item in imageXML)
            {
                string outlineID = item.Attribute("objectID").Value;
                if (outlineID != null)
                {
                    app.DeletePageContent(existingPageId, outlineID);
                }
            }

            //Minimize then read
            this.WindowState = System.Windows.WindowState.Minimized;
            this.ReadText(sender, e);
        }
Esempio n. 2
0
        private string GetOCRText(string base64img)
        {
            byte[]       imageBytes = Convert.FromBase64String(base64img);
            MemoryStream stream     = new MemoryStream(imageBytes, 0, imageBytes.Length);

            stream.Write(imageBytes, 0, imageBytes.Length);
            Image bitmap = Image.FromStream(stream, true);

            Console.WriteLine(bitmap.Width);
            Console.WriteLine(bitmap.Height);

            string pageToBeChange = "ocr";

            string strNamespace = "http://schemas.microsoft.com/office/onenote/2010/onenote";

            OneNote.Application onApplication = new OneNote.Application();
            string notebookXml;

            onApplication.GetHierarchy(null, HierarchyScope.hsPages, out notebookXml);
            XDocument  doc            = XDocument.Parse(notebookXml);
            XNamespace ns             = doc.Root.Name.Namespace;
            XElement   pageNode       = doc.Descendants(ns + "Page").FirstOrDefault();
            string     existingPageId = pageNode.Attribute("ID").Value;
            string     origPageXML;

            onApplication.GetPageContent(existingPageId, out origPageXML, OneNote.PageInfo.piAll);
            XmlDocument xmlDoc = new XmlDocument();

            xmlDoc.LoadXml(origPageXML);
            XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable);

            nsmgr.AddNamespace("one", strNamespace);
            XmlNodeList imageNodes = xmlDoc.SelectNodes("//one:Image", nsmgr);

            for (int i = 0; i < imageNodes.Count; i++)
            {
                if (imageNodes[i].Attributes["objectID"] != null)
                {
                    onApplication.DeletePageContent(existingPageId, imageNodes[i].Attributes["objectID"].Value);
                }
            }


            string m_xmlImageContent =
                "<one:Image><one:Size width=\"{1}\" height=\"{2}\" isSetByUser=\"true\" /><one:Data>{0}</one:Data>" +
                //"<one:OCRData lang=\"zh-TW\">" +
                //"<one:OCRToken startPos=\"0\" region=\"0\" line=\"0\" x=\"5.27999973297119\" y=\"4.800000190734863\" width=\"48.72000122070312\" height=\"51.8400001525879\"/>" +
                //"</one:OCRData>" +
                "</one:Image>";
            string m_xmlNewOutline =
                "<?xml version=\"1.0\"?><one:Page xmlns:one=\"{2}\" ID=\"{1}\"><one:Title><one:OE><one:T><![CDATA[{3}]]></one:T></one:OE></one:Title>{0}</one:Page>";

            if (pageNode != null)
            {
                string imageXmlStr    = string.Format(m_xmlImageContent, base64img, bitmap.Width, bitmap.Height);
                string pageChangesXml = string.Format(m_xmlNewOutline, new object[] { imageXmlStr, existingPageId, strNamespace, pageToBeChange });
                onApplication.UpdatePageContent(pageChangesXml.ToString(), DateTime.MinValue);
                //onenoteApp.UpdateHierarchy(pageChangesXml);
            }

            long   startTime      = currentUnixTime();
            string strPageContent = "";

            while (strPageContent == "")
            {
                string strHierarchy;
                string strXML;
                var    onApp = onApplication;
                // Get the hierarchy from the root to pages
                onApp.GetHierarchy(System.String.Empty, OneNote.HierarchyScope.hsPages, out strHierarchy);

                // Load the xml into a document
                xmlDoc = new XmlDocument();
                xmlDoc.LoadXml(strHierarchy);

                //Create an XmlNamespaceManager for resolving namespaces.
                nsmgr = new XmlNamespaceManager(xmlDoc.NameTable);
                nsmgr.AddNamespace("one", strNamespace);

                // Find the page ID of the active page
                XmlElement xmlActivePage   = (XmlElement)xmlDoc.SelectSingleNode("//one:Page", nsmgr);
                string     strActivePageID = xmlActivePage.GetAttribute("ID");

                // Get the content from the active page
                onApp.GetPageContent(strActivePageID, out strXML, OneNote.PageInfo.piBinaryData);
                xmlDoc.LoadXml(strXML);

                //Get the data in the T nodes
                string strOcrContent = "";
                strPageContent = "";
                XmlNodeList elemList = xmlDoc.GetElementsByTagName("one:OCRText");
                for (int i = 0; i < elemList.Count; i++)
                {
                    strOcrContent  = elemList[i].InnerText; //Get the contents of the <![CDATA[]] block
                    strPageContent = strPageContent + strOcrContent;
                }
                if (currentUnixTime() > startTime + 3)
                {
                    break;
                }
            }
            if (strPageContent == "")
            {
                return("<No OCR content on this page>");
            }
            else
            {
                return(strPageContent);
            }
        }