internal void InsertImageIntoSection(OneNoteSection section, string imageFilePath, string publishUrl) { string newPageId; _oneNoteApp.CreateNewPage(section.Id, out newPageId, OneNote.NewPageStyle.npsDefault); OneNotePage newPage = GetPageById(newPageId); InsertImageIntoPage(newPage, imageFilePath, publishUrl, true); }
private void fnOCR(string v_strImgPath) { //获取图片的Base64编码 FileInfo file = new FileInfo(v_strImgPath); using (MemoryStream ms = new MemoryStream()) { Bitmap bp = new Bitmap(v_strImgPath); switch (file.Extension.ToLower()) { case ".jpg": bp.Save(ms, ImageFormat.Jpeg); break; case ".jpeg": bp.Save(ms, ImageFormat.Jpeg); break; case ".gif": bp.Save(ms, ImageFormat.Gif); break; case ".bmp": bp.Save(ms, ImageFormat.Bmp); break; case ".tiff": bp.Save(ms, ImageFormat.Tiff); break; case ".png": bp.Save(ms, ImageFormat.Png); break; case ".emf": bp.Save(ms, ImageFormat.Emf); break; default: this.labMsg.Content = "不支持的图片格式。"; return; } byte[] buffer = ms.GetBuffer(); string _Base64 = Convert.ToBase64String(buffer); //向Onenote2010中插入图片 var onenoteApp = new Microsoft.Office.Interop.OneNote.Application(); /*string sectionID; Console.WriteLine("wang"); onenoteApp.OpenHierarchy(AppDomain.CurrentDomain.BaseDirectory + "tmpPath/" + "newfile.one", null, out sectionID, Microsoft.Office.Interop.OneNote.CreateFileType.cftSection); string pageID = "{A975EE72-19C3-4C80-9C0E-EDA576DAB5C6}{1}{B0}"; // 格式 {guid}{tab}{??} onenoteApp.CreateNewPage(sectionID, out pageID, Microsoft.Office.Interop.OneNote.NewPageStyle.npsBlankPageNoTitle); */ var existingPageId = ""; //var pageNode; string notebookXml; if (existingPageId == "") { onenoteApp.GetHierarchy(null, Microsoft.Office.Interop.OneNote.HierarchyScope.hsPages, out notebookXml); //onenoteApp.GetHierarchy(pageID, HierarchyScope.hsPages, out notebookXml); var doc = XDocument.Parse(notebookXml); var ns = doc.Root.Name.Namespace; var sectionNode = doc.Descendants(ns + "Section").FirstOrDefault(); var sectionID = sectionNode.Attribute("ID").Value; onenoteApp.CreateNewPage(sectionID, out existingPageId); var pageNode = doc.Descendants(ns + "Page").FirstOrDefault(); if (pageNode != null) { //Image Type 只支持这些类型:auto|png|emf|jpg string ImgExtension = file.Extension.ToLower().Substring(1); switch (ImgExtension) { case "jpg": ImgExtension = "jpg"; break; case "png": ImgExtension = "png"; break; case "emf": ImgExtension = "emf"; break; default: ImgExtension = "auto"; break; } var page = new XDocument(new XElement(ns + "Page", new XAttribute("ID", existingPageId), new XElement(ns + "Outline", new XElement(ns + "OEChildren", new XElement(ns + "OE", new XElement(ns + "Image", new XAttribute("format", ImgExtension), new XAttribute("originalPageNumber", "0"), new XElement(ns + "Position", new XAttribute("x", "0"), new XAttribute("y", "0"), new XAttribute("z", "0")), new XElement(ns + "Size", new XAttribute("width", bp.Width.ToString()), new XAttribute("height", bp.Height.ToString())), new XElement(ns + "Data", _Base64))))))); //page.Root.SetAttributeValue("ID", existingPageId); onenoteApp.UpdatePageContent(page.ToString(), DateTime.MinValue); //线程休眠时间,单位毫秒,若图片很大,则延长休眠时间,保证Onenote OCR完毕 System.Threading.Thread.Sleep(Int32.Parse(System.Configuration.ConfigurationManager.AppSettings["WaitTIme"])); string pageXml; onenoteApp.GetPageContent(existingPageId, out pageXml, Microsoft.Office.Interop.OneNote.PageInfo.piBinaryData);//piAll //获取OCR后的内容 FileStream tmpXml = new FileStream(System.Configuration.ConfigurationManager.AppSettings["tmpPath"] + @"\tmp.xml", FileMode.Create, FileAccess.ReadWrite); StreamWriter sw = new StreamWriter(tmpXml); sw.Write(pageXml); sw.Flush(); sw.Close(); tmpXml.Close(); FileStream tmpOnenote = new FileStream(System.Configuration.ConfigurationManager.AppSettings["tmpPath"] + @"\tmp.xml", FileMode.Open, FileAccess.ReadWrite); XmlReader reader = XmlReader.Create(tmpOnenote); XElement rdlc = XElement.Load(reader); XmlNameTable nameTable = reader.NameTable; XmlNamespaceManager mgr = new XmlNamespaceManager(nameTable); mgr.AddNamespace("one", ns.ToString()); StringReader sr = new StringReader(pageXml); XElement onenote = XElement.Load(sr); var xml = from o in onenote.XPathSelectElements("//one:Image", mgr) select o.XPathSelectElement("//one:OCRText", mgr).Value; this.txtOCRed.Text = (xml.First().ToString()).Replace(" ", ""); sr.Close(); reader.Close(); tmpOnenote.Close(); onenoteApp.DeleteHierarchy(existingPageId); //onenoteApp.DeleteHierarchy(sectionID, DateTime.MinValue, true); // 摧毁原始页面 } } /*Onenote 2010 中图片的XML格式 <one:Image format="" originalPageNumber="0" lastModifiedTime="" objectID=""> <one:Position x="" y="" z=""/> <one:Size width="" height=""/> <one:Data>Base64</one:Data> //以下标签由Onenote 2010自动生成,不要在程序中处理,目标是获取OCRText中的内容。 <one:OCRData lang="en-US"> <one:OCRText> <![CDATA[ OCR后的文字 ]]> </one:OCRText> <one:OCRToken startPos="0" region="0" line="0" x="4.251968383789062" y="3.685039281845092" width="31.18110275268555" height="7.370078563690185"/> <one:OCRToken startPos="7" region="0" line="0" x="39.40157318115234" y="3.685039281845092" width="13.32283401489258" height="8.78740119934082"/> <one:OCRToken startPos="12" region="0" line="1" x="4.251968383789062" y="17.85826683044434" width="23.52755928039551" height="6.803150177001953"/> <one:OCRToken startPos="18" region="0" line="1" x="32.031494140625" y="17.85826683044434" width="41.10236358642578" height="6.803150177001953"/> <one:OCRToken startPos="28" region="0" line="1" x="77.66928863525391" y="17.85826683044434" width="31.46456718444824" height="6.803150177001953"/> ................ </one:Image> */ /*ObjectID格式 The representation of an object to be used for identification of objects on a page. Not unique through OneNote, but unique on the page and the hierarchy. <xsd:simpleType name="ObjectID" "> <xsd:restriction base="xsd:string"> <xsd:pattern value="\{[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\}\{[0-9]+\}\{[A-Z][0-9]+\}" /> </xsd:restriction> </xsd:simpleType> */ } }
/// <summary> /// onenote 2010,注意需要先在onenote中创建笔记本,并且将至转换为onenote2007格式 /// 推荐使用onenote2016(个人版即可),API与2010类似,(去掉XMLSchema.xs2007参数即可)其他可参考API参数命名。 /// 注意1:一定要将dll属性中的“嵌入互操作类型”属性关闭 /// </summary> /// <param name="imgPath"></param> /// <returns></returns> public string Ocr_2010(string imgPath) { try { #region 确定section_path存在 section_path = @"C:\Users\zhensheng\Desktop\打杂\ocr\ocr.one"; if (string.IsNullOrEmpty(section_path)) { Console.WriteLine("请先建立笔记本"); File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "需要先在onenote中创建笔记本,并且将至转换为onenote2007格式,且将.one文件得路径赋值给section_path"); return(""); } #endregion #region 准备数据 //后缀 var imgType = Path.GetExtension(imgPath); imgPath = imgPath.Replace(".", ""); var data = File.ReadAllBytes(imgPath); //根据大小确定重试次数 int fileSize = Convert.ToInt32(data.Length / 1024 / 1024); // 文件大小 单位M string guid = Guid.NewGuid().ToString(); string pageID = "{" + guid + "}{1}{B0}"; // 格式 {guid}{tab}{??} string pageXml; XNamespace ns; var onenoteApp = new Microsoft.Office.Interop.OneNote.Application(); //onenote提供的API if (onenoteApp == null) { File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "Microsoft.Office.Interop.OneNote.Application()创建失败"); return(""); } //重试使用 XmlNode xmlNode; int retry = 0; #endregion do { #region 创建页面并返回pageID string sectionID; onenoteApp.OpenHierarchy(section_path, null, out sectionID, CreateFileType.cftSection); onenoteApp.CreateNewPage(sectionID, out pageID); #endregion #region 获取onenote页面xml结构格式 string notebookXml; onenoteApp.GetHierarchy(null, HierarchyScope.hsPages, out notebookXml, XMLSchema.xs2007); var doc = XDocument.Parse(notebookXml); ns = doc.Root.Name.Namespace; #endregion #region 将图片插入页面 Tuple <string, int, int> imgInfo = this.GetBase64(data, imgType); var page = new XDocument(new XElement(ns + "Page", new XElement(ns + "Outline", new XElement(ns + "OEChildren", new XElement(ns + "OE", new XElement(ns + "Image", new XAttribute("format", imgType), new XAttribute("originalPageNumber", "0"), new XElement(ns + "Position", new XAttribute("x", "0"), new XAttribute("y", "0"), new XAttribute("z", "0")), new XElement(ns + "Size", new XAttribute("width", imgInfo.Item2), new XAttribute("height", imgInfo.Item3)), new XElement(ns + "Data", imgInfo.Item1))))))); page.Root.SetAttributeValue("ID", pageID); onenoteApp.UpdatePageContent(page.ToString(), DateTime.MinValue, XMLSchema.xs2007); #endregion #region 通过轮询访问获取OCR识别的结果,轮询超时次数为6次 int count = 0; do { System.Threading.Thread.Sleep(waitTime * (fileSize > 1 ? fileSize : 1)); // 小于1M的都默认1M onenoteApp.GetPageContent(pageID, out pageXml, PageInfo.piBinaryData, XMLSchema.xs2007); }while (pageXml == "" && count++ < 6); #endregion #region 除页面 onenoteApp.DeleteHierarchy(pageID, DateTime.MinValue); //onenoteApp = null; #endregion #region 从xml中提取OCR识别后的文档信息,然后输出到string中 XmlDocument xmlDoc = new XmlDocument(); xmlDoc.LoadXml(pageXml); XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable); nsmgr.AddNamespace("one", ns.ToString()); xmlNode = xmlDoc.SelectSingleNode("//one:Image//one:OCRText", nsmgr); #endregion } //如果OCR没有识别出信息,则重试三次(个人测试2010失败率为0.2~0.3) while (xmlNode == null && retry++ < 3); if (xmlNode == null) { File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "OCR没有识别出值"); return(""); } var localFilePath = AppDomain.CurrentDomain.BaseDirectory + @"\" + Guid.NewGuid().ToString() + ".txt"; File.WriteAllText(localFilePath, xmlNode.InnerText.ToString()); Console.WriteLine(xmlNode.InnerText.ToString()); return(localFilePath); } catch (Exception e) { File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", e.ToString()); return(""); } }