static void Main(string[] args) { CssParser cp = new CssParser(); cp.AddStyleSheet(@"1_files/quemain.css"); var html = File.ReadAllText("2.html"); var htmlText = CssHtmlMerger.MergeHtmlAndCss(cp, html); var bodyhtml = htmlText; //string body = File.ReadAllText("E:\\N1.txt"); //mathjax2word.OpenXmlWord word = new mathjax2word.OpenXmlWord("E:\\a4.docx"); //word.WriteTextToWord(bodyhtml); //word.Close(); //Console.ReadLine(); #region html 2 openxml const string filename = "test.docx"; if (File.Exists(filename)) File.Delete(filename); using (MemoryStream generatedDocument = new MemoryStream()) { // Uncomment and comment the second using() to open an existing template document // instead of creating it from scratch. byte[] data = Resource1.template; generatedDocument.Write(data, 0, data.Length); generatedDocument.Position = 0L; using (WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true)) //using (WordprocessingDocument package = WordprocessingDocument.Create(generatedDocument, WordprocessingDocumentType.Document)) { MainDocumentPart mainPart = package.MainDocumentPart; if (mainPart == null) { mainPart = package.AddMainDocumentPart(); new Document(new Body()).Save(mainPart); } HtmlConverter converter = new HtmlConverter(mainPart); //converter.WebProxy.Credentials = new System.Net.NetworkCredential("nizeto", "****", "domain"); //converter.WebProxy.Proxy = new System.Net.WebProxy("proxy01:8080"); converter.ImageProcessing = ImageProcessing.AutomaticDownload; converter.ProvisionImage += OnProvisionImage; Body body = mainPart.Document.Body; converter.ParseHtml(bodyhtml); mainPart.Document.Save(); //AssertThatOpenXmlDocumentIsValid(package); } File.WriteAllBytes(filename, generatedDocument.ToArray()); } System.Diagnostics.Process.Start(filename); #endregion #region 1 // string br = "<w:p><w:r><w:t></w:t></w:r></w:p>"; // string oxmlText = "<w:p><w:r><w:t>{0}</w:t></w:r></w:p>"; // var brArray = body.Split(new string[] { "<br>" }, body.Length, StringSplitOptions.None); // var strList = new List<string>(); // for (int k = 0; k < brArray.Length; k++) // { // strList.Add(HandleStr(brArray[k])); // strList.Add(br); // } // var xml = string.Join("", strList); // Console.WriteLine(xml); // OpenXmlWord word = new OpenXmlWord("E:\\a.docx"); //// var omml = OpenXmlWord.ConvertMathMl2OMML(xml); // word.WriteTextToWord(xml); // word.Close(); // Console.ReadLine(); #endregion #region 2 //var lmm = new LatexToMathMLConverter( // "E:\\source.txt", // Encoding.UTF8, // "E:\\source.xml"); ////lmm.ValidateResult = true; ////lmm.Convert(); //var str = lmm.ConvertToText(); //Console.WriteLine(str); //string strText = System.Text.RegularExpressions.Regex.Replace(body, "<[^>]+>", ""); //strText = System.Text.RegularExpressions.Regex.Replace(strText, "&[^;]+;", ""); ////Regex.Match(body,) //body.Replace(@"\(", "$"); //OpenXmlWord word = new OpenXmlWord("E:\\a.docx"); //var omml = OpenXmlWord.ConvertMathMl2OMML(str); //word.WritOfficeMathMLToWord(omml); //word.Close(); //Console.ReadLine(); #endregion }
public static string MergeHtmlAndCss(CssParser cssParser, String htmlText) { HtmlDocument html = new HtmlDocument(); html.OptionOutputAsXml = true; html.LoadHtml(htmlText); HtmlNode document = html.DocumentNode; foreach (KeyValuePair<String, CssParser.StyleClass> style in cssParser.Styles) { List<HtmlNode> foundNodes = document.QuerySelectorAll(style.Key).ToList(); foreach (HtmlNode foundNode in foundNodes) { if (foundNode.Attributes["style"] == null) { foundNode.SetAttributeValue("style", style.Value.ToString()); } else { foundNode.SetAttributeValue("style", foundNode.Attributes["style"].Value + ";" + style.Value.ToString()); } } } var tableList = document.QuerySelectorAll(".MathJye"); foreach (var item in tableList) { if (item.HasChildNodes) { var tb = item.FirstChild; var math = @"\(\frac{{0}}{{1}}\)"; var arr = new List<string>(); foreach (var tr in tb.ChildNodes) { arr.Add(tr.FirstChild.InnerText); } if (arr.Count > 1) { math = string.Format(math, arr[0], arr[1]); item.ParentNode.ReplaceChild(HtmlNode.CreateNode(math), item); ; } } } htmlText = html.DocumentNode.OuterHtml; htmlText = htmlText.Replace("<?xml version=\"1.0\" encoding=\"gb2312\"?>", ""); htmlText = htmlText.Replace(@"<?xml version=""1.0"" encoding=""iso-8859-1""?>", "").Replace("&", "&"); htmlText = htmlText.Replace(@"<?xml version=""1.0"" encoding=""utf-8""?>", "").Replace("&", "&"); return htmlText; }