private void oneToMht() { Microsoft.Office.Interop.OneNote.Application onenoteApp = new Microsoft.Office.Interop.OneNote.Application(); string sectionId; onenoteApp.OpenHierarchy(srcPath, null, out sectionId); try { //타이틀(제목) 확인을 위한 xml 생성. string xml; XmlDocument document = new XmlDocument(); onenoteApp.GetHierarchy(sectionId, HierarchyScope.hsPages, out xml); document.LoadXml(xml); XmlNodeList xnList = document.GetElementsByTagName("one:Page"); //접근할 노드 string pageId = ""; foreach (XmlNode xn in xnList) { title = xn.Attributes["name"].Value; // get page title pageId = xn.Attributes["ID"].Value; //get page id } document.RemoveAll(); onenoteApp.GetPageContent(pageId, out xml, PageInfo.piAll); document.LoadXml(xml); /*xnList = document.GetElementsByTagName("one:OEChildren/one:T"); //접근할 노드 * * contents = xnList[0].InnerText; * * foreach (XmlNode xn in xnList) * { * title = xn.Attributes["name"].Value; // get page title * pageId = xn.Attributes["ID"].Value; //get page id * * }*/ //document.Save(@"c:\1111.xml"); onenoteApp.Publish(sectionId, dstPath, Microsoft.Office.Interop.OneNote.PublishFormat.pfMHTML, ""); } catch { return; } }
/// <summary> /// onenote 2010,注意需要先在onenote中创建笔记本,并且将至转换为onenote2007格式 /// 推荐使用onenote2016(个人版即可),API与2010类似,(去掉XMLSchema.xs2007参数即可)其他可参考API参数命名。 /// 注意1:一定要将dll属性中的“嵌入互操作类型”属性关闭 /// </summary> /// <param name="imgPath"></param> /// <returns></returns> public string Ocr_2010(string imgPath) { try { #region 确定section_path存在 section_path = @"C:\Users\zhensheng\Desktop\打杂\ocr\ocr.one"; if (string.IsNullOrEmpty(section_path)) { Console.WriteLine("请先建立笔记本"); File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "需要先在onenote中创建笔记本,并且将至转换为onenote2007格式,且将.one文件得路径赋值给section_path"); return(""); } #endregion #region 准备数据 //后缀 var imgType = Path.GetExtension(imgPath); imgPath = imgPath.Replace(".", ""); var data = File.ReadAllBytes(imgPath); //根据大小确定重试次数 int fileSize = Convert.ToInt32(data.Length / 1024 / 1024); // 文件大小 单位M string guid = Guid.NewGuid().ToString(); string pageID = "{" + guid + "}{1}{B0}"; // 格式 {guid}{tab}{??} string pageXml; XNamespace ns; var onenoteApp = new Microsoft.Office.Interop.OneNote.Application(); //onenote提供的API if (onenoteApp == null) { File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "Microsoft.Office.Interop.OneNote.Application()创建失败"); return(""); } //重试使用 XmlNode xmlNode; int retry = 0; #endregion do { #region 创建页面并返回pageID string sectionID; onenoteApp.OpenHierarchy(section_path, null, out sectionID, CreateFileType.cftSection); onenoteApp.CreateNewPage(sectionID, out pageID); #endregion #region 获取onenote页面xml结构格式 string notebookXml; onenoteApp.GetHierarchy(null, HierarchyScope.hsPages, out notebookXml, XMLSchema.xs2007); var doc = XDocument.Parse(notebookXml); ns = doc.Root.Name.Namespace; #endregion #region 将图片插入页面 Tuple <string, int, int> imgInfo = this.GetBase64(data, imgType); var page = new XDocument(new XElement(ns + "Page", new XElement(ns + "Outline", new XElement(ns + "OEChildren", new XElement(ns + "OE", new XElement(ns + "Image", new XAttribute("format", imgType), new XAttribute("originalPageNumber", "0"), new XElement(ns + "Position", new XAttribute("x", "0"), new XAttribute("y", "0"), new XAttribute("z", "0")), new XElement(ns + "Size", new XAttribute("width", imgInfo.Item2), new XAttribute("height", imgInfo.Item3)), new XElement(ns + "Data", imgInfo.Item1))))))); page.Root.SetAttributeValue("ID", pageID); onenoteApp.UpdatePageContent(page.ToString(), DateTime.MinValue, XMLSchema.xs2007); #endregion #region 通过轮询访问获取OCR识别的结果,轮询超时次数为6次 int count = 0; do { System.Threading.Thread.Sleep(waitTime * (fileSize > 1 ? fileSize : 1)); // 小于1M的都默认1M onenoteApp.GetPageContent(pageID, out pageXml, PageInfo.piBinaryData, XMLSchema.xs2007); }while (pageXml == "" && count++ < 6); #endregion #region 除页面 onenoteApp.DeleteHierarchy(pageID, DateTime.MinValue); //onenoteApp = null; #endregion #region 从xml中提取OCR识别后的文档信息,然后输出到string中 XmlDocument xmlDoc = new XmlDocument(); xmlDoc.LoadXml(pageXml); XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable); nsmgr.AddNamespace("one", ns.ToString()); xmlNode = xmlDoc.SelectSingleNode("//one:Image//one:OCRText", nsmgr); #endregion } //如果OCR没有识别出信息,则重试三次(个人测试2010失败率为0.2~0.3) while (xmlNode == null && retry++ < 3); if (xmlNode == null) { File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", "OCR没有识别出值"); return(""); } var localFilePath = AppDomain.CurrentDomain.BaseDirectory + @"\" + Guid.NewGuid().ToString() + ".txt"; File.WriteAllText(localFilePath, xmlNode.InnerText.ToString()); Console.WriteLine(xmlNode.InnerText.ToString()); return(localFilePath); } catch (Exception e) { File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + @"\log.txt", e.ToString()); return(""); } }