Beispiel #1
1
        static void Main(string[] args)
        {
            CssParser cp = new CssParser();
            cp.AddStyleSheet(@"1_files/quemain.css");
            var html = File.ReadAllText("2.html");
            var htmlText = CssHtmlMerger.MergeHtmlAndCss(cp, html);
            var bodyhtml = htmlText;
            //string body = File.ReadAllText("E:\\N1.txt");
            //mathjax2word.OpenXmlWord word = new mathjax2word.OpenXmlWord("E:\\a4.docx");
            //word.WriteTextToWord(bodyhtml);
            //word.Close();
            //Console.ReadLine();

            #region html 2 openxml
            const string filename = "test.docx";

            if (File.Exists(filename)) File.Delete(filename);

            using (MemoryStream generatedDocument = new MemoryStream())
            {
                // Uncomment and comment the second using() to open an existing template document
                // instead of creating it from scratch.

                byte[] data = Resource1.template;
                generatedDocument.Write(data, 0, data.Length);
                generatedDocument.Position = 0L;
                using (WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true))
                //using (WordprocessingDocument package = WordprocessingDocument.Create(generatedDocument, WordprocessingDocumentType.Document))
                {
                    MainDocumentPart mainPart = package.MainDocumentPart;
                    if (mainPart == null)
                    {
                        mainPart = package.AddMainDocumentPart();
                        new Document(new Body()).Save(mainPart);
                    }

                    HtmlConverter converter = new HtmlConverter(mainPart);
                    //converter.WebProxy.Credentials = new System.Net.NetworkCredential("nizeto", "****", "domain");
                    //converter.WebProxy.Proxy = new System.Net.WebProxy("proxy01:8080");
                    converter.ImageProcessing = ImageProcessing.AutomaticDownload;
                    converter.ProvisionImage += OnProvisionImage;

                    Body body = mainPart.Document.Body;

                    converter.ParseHtml(bodyhtml);
                    mainPart.Document.Save();

                    //AssertThatOpenXmlDocumentIsValid(package);
                }

                File.WriteAllBytes(filename, generatedDocument.ToArray());
            }

            System.Diagnostics.Process.Start(filename);
            #endregion

            #region 1
            // string br = "<w:p><w:r><w:t></w:t></w:r></w:p>";
            // string oxmlText = "<w:p><w:r><w:t>{0}</w:t></w:r></w:p>";

            // var brArray = body.Split(new string[] { "<br>" }, body.Length, StringSplitOptions.None);
            // var strList = new List<string>();
            // for (int k = 0; k < brArray.Length; k++)
            // {
            //     strList.Add(HandleStr(brArray[k]));

            //     strList.Add(br);

            // }

            // var xml = string.Join("", strList);

            // Console.WriteLine(xml);

            // OpenXmlWord word = new OpenXmlWord("E:\\a.docx");

            //// var omml = OpenXmlWord.ConvertMathMl2OMML(xml);
            // word.WriteTextToWord(xml);
            // word.Close();
            // Console.ReadLine();
            #endregion

            #region 2
            //var lmm = new LatexToMathMLConverter(
            //   "E:\\source.txt",
            //  Encoding.UTF8,
            //  "E:\\source.xml");
            ////lmm.ValidateResult = true;
            ////lmm.Convert();
            //var str = lmm.ConvertToText();
            //Console.WriteLine(str);

            //string strText = System.Text.RegularExpressions.Regex.Replace(body, "<[^>]+>", "");
            //strText = System.Text.RegularExpressions.Regex.Replace(strText, "&[^;]+;", "");
            ////Regex.Match(body,)

            //body.Replace(@"\(", "$");

            //OpenXmlWord word = new OpenXmlWord("E:\\a.docx");
            //var omml = OpenXmlWord.ConvertMathMl2OMML(str);
            //word.WritOfficeMathMLToWord(omml);
            //word.Close();
            //Console.ReadLine();
            #endregion
        }
Beispiel #2
0
        public static string MergeHtmlAndCss(CssParser cssParser, String htmlText)
        {
            HtmlDocument html = new HtmlDocument();
            html.OptionOutputAsXml = true;

            html.LoadHtml(htmlText);
            HtmlNode document = html.DocumentNode;

            foreach (KeyValuePair<String, CssParser.StyleClass> style in cssParser.Styles)
            {
                List<HtmlNode> foundNodes = document.QuerySelectorAll(style.Key).ToList();

                foreach (HtmlNode foundNode in foundNodes)
                {
                    if (foundNode.Attributes["style"] == null)
                    {
                        foundNode.SetAttributeValue("style", style.Value.ToString());
                    }
                    else
                    {
                        foundNode.SetAttributeValue("style", foundNode.Attributes["style"].Value + ";" + style.Value.ToString());
                    }
                }
            }

            var tableList = document.QuerySelectorAll(".MathJye");
            foreach (var item in tableList)
            {
                if (item.HasChildNodes)
                {
                    var tb = item.FirstChild;
                    var math = @"\(\frac{{0}}{{1}}\)";
                    var arr = new List<string>();
                    foreach (var tr in tb.ChildNodes)
                    {
                        arr.Add(tr.FirstChild.InnerText);

                    }
                    if (arr.Count > 1)
                    {
                        math = string.Format(math, arr[0], arr[1]);
                        item.ParentNode.ReplaceChild(HtmlNode.CreateNode(math), item); ;
                    }
                }
            }
            htmlText = html.DocumentNode.OuterHtml;
            htmlText = htmlText.Replace("<?xml version=\"1.0\" encoding=\"gb2312\"?>", "");
            htmlText = htmlText.Replace(@"<?xml version=""1.0"" encoding=""iso-8859-1""?>", "").Replace("&amp;", "&");
            htmlText = htmlText.Replace(@"<?xml version=""1.0"" encoding=""utf-8""?>", "").Replace("&amp;", "&");
            return htmlText;
        }