Example #1
0
        internal void InsertImageIntoSection(OneNoteSection section, string imageFilePath, string publishUrl)
        {
            string newPageId;

            _oneNoteApp.CreateNewPage(section.Id, out newPageId, OneNote.NewPageStyle.npsDefault);

            OneNotePage newPage = GetPageById(newPageId);

            InsertImageIntoPage(newPage, imageFilePath, publishUrl, true);
        }
Example #2
0
        private void fnOCR(string v_strImgPath)
        {
            //获取图片的Base64编码
            FileInfo file = new FileInfo(v_strImgPath);

            using (MemoryStream ms = new MemoryStream())
            {
                Bitmap bp = new Bitmap(v_strImgPath);

                switch (file.Extension.ToLower())
                {
                    case ".jpg":
                        bp.Save(ms, ImageFormat.Jpeg);
                        break;
                    case ".jpeg":
                        bp.Save(ms, ImageFormat.Jpeg);
                        break;
                    case ".gif":
                        bp.Save(ms, ImageFormat.Gif);
                        break;
                    case ".bmp":
                        bp.Save(ms, ImageFormat.Bmp);
                        break;
                    case ".tiff":
                        bp.Save(ms, ImageFormat.Tiff);
                        break;
                    case ".png":
                        bp.Save(ms, ImageFormat.Png);
                        break;
                    case ".emf":
                        bp.Save(ms, ImageFormat.Emf);
                        break;
                    default:
                        this.labMsg.Content = "不支持的图片格式。";
                        return;
                }

                byte[] buffer = ms.GetBuffer();
                string _Base64 = Convert.ToBase64String(buffer);

                //向Onenote2010中插入图片
                var onenoteApp = new Microsoft.Office.Interop.OneNote.Application();


                /*string sectionID; Console.WriteLine("wang");
                onenoteApp.OpenHierarchy(AppDomain.CurrentDomain.BaseDirectory + "tmpPath/" + "newfile.one", 
                    null, out sectionID, Microsoft.Office.Interop.OneNote.CreateFileType.cftSection);
                string pageID = "{A975EE72-19C3-4C80-9C0E-EDA576DAB5C6}{1}{B0}";  // 格式 {guid}{tab}{??}
                onenoteApp.CreateNewPage(sectionID, out pageID, Microsoft.Office.Interop.OneNote.NewPageStyle.npsBlankPageNoTitle);
                */

                var existingPageId = "";
                //var pageNode;
                string notebookXml;
                if (existingPageId == "")
                {
                    onenoteApp.GetHierarchy(null, Microsoft.Office.Interop.OneNote.HierarchyScope.hsPages, out notebookXml);
                    //onenoteApp.GetHierarchy(pageID, HierarchyScope.hsPages, out notebookXml);

                    var doc = XDocument.Parse(notebookXml);
                    var ns = doc.Root.Name.Namespace;
                    var sectionNode = doc.Descendants(ns + "Section").FirstOrDefault();
                    var sectionID = sectionNode.Attribute("ID").Value;
                    onenoteApp.CreateNewPage(sectionID, out existingPageId);
                    var pageNode = doc.Descendants(ns + "Page").FirstOrDefault();
                    if (pageNode != null)
                    {
                        //Image Type 只支持这些类型:auto|png|emf|jpg
                        string ImgExtension = file.Extension.ToLower().Substring(1);
                        switch (ImgExtension)
                        {
                            case "jpg":
                                ImgExtension = "jpg";
                                break;
                            case "png":
                                ImgExtension = "png";
                                break;
                            case "emf":
                                ImgExtension = "emf";
                                break;
                            default:
                                ImgExtension = "auto";
                                break;
                        }


                        var page = new XDocument(new XElement(ns + "Page", new XAttribute("ID", existingPageId),
                                         new XElement(ns + "Outline",
                                           new XElement(ns + "OEChildren",
                                             new XElement(ns + "OE",
                                               new XElement(ns + "Image",
                                                 new XAttribute("format", ImgExtension), new XAttribute("originalPageNumber", "0"),
                                                 new XElement(ns + "Position",
                                                        new XAttribute("x", "0"), new XAttribute("y", "0"), new XAttribute("z", "0")),
                                                 new XElement(ns + "Size",
                                                        new XAttribute("width", bp.Width.ToString()), new XAttribute("height", bp.Height.ToString())),
                                                    new XElement(ns + "Data", _Base64)))))));
                        //page.Root.SetAttributeValue("ID", existingPageId);
                        onenoteApp.UpdatePageContent(page.ToString(), DateTime.MinValue);

                        //线程休眠时间,单位毫秒,若图片很大,则延长休眠时间,保证Onenote OCR完毕
                        System.Threading.Thread.Sleep(Int32.Parse(System.Configuration.ConfigurationManager.AppSettings["WaitTIme"]));

                        string pageXml;
                        onenoteApp.GetPageContent(existingPageId, out pageXml, Microsoft.Office.Interop.OneNote.PageInfo.piBinaryData);//piAll

                        //获取OCR后的内容
                        FileStream tmpXml = new FileStream(System.Configuration.ConfigurationManager.AppSettings["tmpPath"] + @"\tmp.xml", FileMode.Create, FileAccess.ReadWrite);
                        StreamWriter sw = new StreamWriter(tmpXml);
                        sw.Write(pageXml);
                        sw.Flush();
                        sw.Close();
                        tmpXml.Close();

                        FileStream tmpOnenote = new FileStream(System.Configuration.ConfigurationManager.AppSettings["tmpPath"] + @"\tmp.xml", FileMode.Open, FileAccess.ReadWrite);
                        XmlReader reader = XmlReader.Create(tmpOnenote);
                        XElement rdlc = XElement.Load(reader);

                        XmlNameTable nameTable = reader.NameTable;
                        XmlNamespaceManager mgr = new XmlNamespaceManager(nameTable);
                        mgr.AddNamespace("one", ns.ToString());

                        StringReader sr = new StringReader(pageXml);
                        XElement onenote = XElement.Load(sr);

                        var xml = from o in onenote.XPathSelectElements("//one:Image", mgr)
                                  select o.XPathSelectElement("//one:OCRText", mgr).Value;
                        this.txtOCRed.Text = (xml.First().ToString()).Replace(" ", "");

                        sr.Close();
                        reader.Close();
                        tmpOnenote.Close();
                        onenoteApp.DeleteHierarchy(existingPageId);
                        //onenoteApp.DeleteHierarchy(sectionID, DateTime.MinValue, true);  // 摧毁原始页面
                    }
                }

                /*Onenote 2010 中图片的XML格式
                   <one:Image format="" originalPageNumber="0" lastModifiedTime="" objectID="">
                        <one:Position x="" y="" z=""/>
                        <one:Size width="" height=""/>
                        <one:Data>Base64</one:Data>
                  
                        //以下标签由Onenote 2010自动生成,不要在程序中处理,目标是获取OCRText中的内容。
                        <one:OCRData lang="en-US">
                        <one:OCRText>
                            <![CDATA[   OCR后的文字   ]]>
                        </one:OCRText>
                        <one:OCRToken startPos="0" region="0" line="0" x="4.251968383789062" y="3.685039281845092" width="31.18110275268555" height="7.370078563690185"/>
                        <one:OCRToken startPos="7" region="0" line="0" x="39.40157318115234" y="3.685039281845092" width="13.32283401489258" height="8.78740119934082"/>
                        <one:OCRToken startPos="12" region="0" line="1" x="4.251968383789062" y="17.85826683044434" width="23.52755928039551" height="6.803150177001953"/>
                        <one:OCRToken startPos="18" region="0" line="1" x="32.031494140625" y="17.85826683044434" width="41.10236358642578" height="6.803150177001953"/>
                        <one:OCRToken startPos="28" region="0" line="1" x="77.66928863525391" y="17.85826683044434" width="31.46456718444824" height="6.803150177001953"/>
                        ................
                   </one:Image>      
                */


                /*ObjectID格式
                  The representation of an object to be used for identification of objects on a page. Not unique through OneNote, but unique on the page and the hierarchy. 
                   <xsd:simpleType name="ObjectID" ">
                      <xsd:restriction base="xsd:string">
                         <xsd:pattern value="\{[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\}\{[0-9]+\}\{[A-Z][0-9]+\}" />
                      </xsd:restriction>
                   </xsd:simpleType>
                */

                
            }
        }
        /// <summary>
        /// onenote 2010,注意需要先在onenote中创建笔记本,并且将至转换为onenote2007格式
        /// 推荐使用onenote2016(个人版即可),API与2010类似,(去掉XMLSchema.xs2007参数即可)其他可参考API参数命名。
        /// 注意1:一定要将dll属性中的“嵌入互操作类型”属性关闭
        /// </summary>
        /// <param name="imgPath"></param>
        /// <returns></returns>
        public string Ocr_2010(string imgPath)
        {
            try
            {
                #region 确定section_path存在
                section_path = @"C:\Users\zhensheng\Desktop\打杂\ocr\ocr.one";
                if (string.IsNullOrEmpty(section_path))
                {
                    Console.WriteLine("请先建立笔记本");
                    File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "需要先在onenote中创建笔记本,并且将至转换为onenote2007格式,且将.one文件得路径赋值给section_path");
                    return("");
                }
                #endregion

                #region 准备数据
                //后缀
                var imgType = Path.GetExtension(imgPath);
                imgPath = imgPath.Replace(".", "");

                var data = File.ReadAllBytes(imgPath);
                //根据大小确定重试次数
                int fileSize = Convert.ToInt32(data.Length / 1024 / 1024); // 文件大小 单位M

                string guid   = Guid.NewGuid().ToString();
                string pageID = "{" + guid + "}{1}{B0}";  // 格式 {guid}{tab}{??}

                string     pageXml;
                XNamespace ns;

                var onenoteApp = new Microsoft.Office.Interop.OneNote.Application();  //onenote提供的API
                if (onenoteApp == null)
                {
                    File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "Microsoft.Office.Interop.OneNote.Application()创建失败");
                    return("");
                }

                //重试使用
                XmlNode xmlNode;
                int     retry = 0;

                #endregion

                do
                {
                    #region 创建页面并返回pageID
                    string sectionID;
                    onenoteApp.OpenHierarchy(section_path, null, out sectionID, CreateFileType.cftSection);
                    onenoteApp.CreateNewPage(sectionID, out pageID);
                    #endregion

                    #region 获取onenote页面xml结构格式
                    string notebookXml;
                    onenoteApp.GetHierarchy(null, HierarchyScope.hsPages, out notebookXml, XMLSchema.xs2007);
                    var doc = XDocument.Parse(notebookXml);
                    ns = doc.Root.Name.Namespace;

                    #endregion

                    #region 将图片插入页面
                    Tuple <string, int, int> imgInfo = this.GetBase64(data, imgType);
                    var page = new XDocument(new XElement(ns + "Page",
                                                          new XElement(ns + "Outline",
                                                                       new XElement(ns + "OEChildren",
                                                                                    new XElement(ns + "OE",
                                                                                                 new XElement(ns + "Image",
                                                                                                              new XAttribute("format", imgType), new XAttribute("originalPageNumber", "0"),
                                                                                                              new XElement(ns + "Position",
                                                                                                                           new XAttribute("x", "0"), new XAttribute("y", "0"), new XAttribute("z", "0")),
                                                                                                              new XElement(ns + "Size",
                                                                                                                           new XAttribute("width", imgInfo.Item2), new XAttribute("height", imgInfo.Item3)),
                                                                                                              new XElement(ns + "Data", imgInfo.Item1)))))));

                    page.Root.SetAttributeValue("ID", pageID);
                    onenoteApp.UpdatePageContent(page.ToString(), DateTime.MinValue, XMLSchema.xs2007);
                    #endregion

                    #region 通过轮询访问获取OCR识别的结果,轮询超时次数为6次
                    int count = 0;
                    do
                    {
                        System.Threading.Thread.Sleep(waitTime * (fileSize > 1 ? fileSize : 1)); // 小于1M的都默认1M
                        onenoteApp.GetPageContent(pageID, out pageXml, PageInfo.piBinaryData, XMLSchema.xs2007);
                    }while (pageXml == "" && count++ < 6);
                    #endregion

                    #region  除页面
                    onenoteApp.DeleteHierarchy(pageID, DateTime.MinValue);
                    //onenoteApp = null;
                    #endregion

                    #region 从xml中提取OCR识别后的文档信息,然后输出到string中
                    XmlDocument xmlDoc = new XmlDocument();
                    xmlDoc.LoadXml(pageXml);
                    XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable);
                    nsmgr.AddNamespace("one", ns.ToString());
                    xmlNode = xmlDoc.SelectSingleNode("//one:Image//one:OCRText", nsmgr);
                    #endregion
                }
                //如果OCR没有识别出信息,则重试三次(个人测试2010失败率为0.2~0.3)
                while (xmlNode == null && retry++ < 3);
                if (xmlNode == null)
                {
                    File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "OCR没有识别出值");
                    return("");
                }
                var localFilePath = AppDomain.CurrentDomain.BaseDirectory + @"\" + Guid.NewGuid().ToString() + ".txt";
                File.WriteAllText(localFilePath, xmlNode.InnerText.ToString());
                Console.WriteLine(xmlNode.InnerText.ToString());

                return(localFilePath);
            }
            catch (Exception e)
            {
                File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", e.ToString());
                return("");
            }
        }