public HtmlElement Handle(DocumentFormat.OpenXml.OpenXmlElement elem, ConvertConfig config) { HtmlElement no = new HtmlElement("no"); if ((new Regex("Paragraph")).Match(elem.GetType().ToString()).Success) { return(this.HandleParagraph((Paragraph)elem, config)); } if ((new Regex("Table")).Match(elem.GetType().ToString()).Success) { return(this.HandleTable((Table)elem, config)); } return(no); }
public HtmlElement HandleTable(Table t, ConvertConfig conConfig) { HtmlElement table = new HtmlElement("table"); var trs = t.Elements <TableRow>(); foreach (TableRow tr in trs) { HtmlElement hTr = new HtmlElement("tr"); var tcs = tr.Elements <TableCell>(); foreach (TableCell tc in tcs) { HtmlElement hTd = new HtmlElement("td"); var pars = tc.Elements <Paragraph>(); foreach (Paragraph p in pars) { hTd.AddChild(this.HandleParagraph(p, conConfig)); } hTr.AddChild(hTd); } table.AddChild(hTr); } return(table); }
public HtmlElement HandleParagraph(Paragraph p, ConvertConfig conConfig) { HtmlElement pa = new HtmlElement("p"); OpenXmlHelper helper = new OpenXmlHelper(); StyleConfig config = new StyleConfig { Charset = "EastAsia" }; #region Paragraph var pClass = ""; if (p.ParagraphProperties != null) { var pPr = p.ParagraphProperties; pa.AddStyle(helper.GetStyleFromParagraphProperties(pPr, config)); if (pPr.ParagraphStyleId != null) { var id = pPr.ParagraphStyleId.Val; pClass += "Inner_P_" + id; } } pa.AddAttribute("class", pClass); //div.AddChild(_pa); var runs = p.Elements <Run>(); foreach (var r in runs) { var span = new HtmlElement("span"); var rpr = r.RunProperties; config.Charset = "EastAsia"; var tt = r.InnerText; Regex reg = new Regex("[\x00-\xff]{1,}", RegexOptions.IgnoreCase | RegexOptions.Multiline); var mc = reg.Match(tt); if (mc.Success) { config.Charset = "Ascii"; } span.AddStyle(helper.GetStyleFromRunProperties(rpr, config)); if (r.RunProperties != null && r.RunProperties.RunStyle != null && r.RunProperties.RunStyle.Val != null) { var id = r.RunProperties.RunStyle.Val; span.AddAttribute("class", "Inner_P_" + id + " " + "Inner_R_" + id); } var ts = r.Elements <Text>(); var pics = r.Elements <Drawing>(); foreach (Text t in ts) { span.AddChild(new HtmlElement("span", t.InnerText)); } foreach (Drawing d in pics) { DocumentFormat.OpenXml.Drawing.GraphicData gd = d.Inline.Graphic.GraphicData; var pic = gd.Elements <DocumentFormat.OpenXml.Drawing.Pictures.Picture>().ElementAt(0); var pb = pic.BlipFill; var blip = pb.Blip; var ppp = blip.Embed.Value; //var path = doc.GetReferenceRelationship(ppp).Uri; //var relationships = doc.Package.GetRelationships(); //content += ppp; var imgp = this._mainPart.GetPartById(ppp); var img = new HtmlElement("img"); //img.AddAttribute("src", imgp.Uri.ToString().Replace("/word/", "./")); img.AddAttribute("src", conConfig.ResourcePath + imgp.Uri.ToString().Substring(1)); span.AddChild(img); } pa.AddChild(span); } #endregion return(pa); }
static void Main(string[] args) { string file = "./data/word.docx"; file = "./data/DirectX_9_3D.docx"; const string ROOT = "./OUT/"; string fileMd5 = Utities.GetMd5(Path.GetFileName(file)); string docRoot = Path.Combine(new[] { ROOT, fileMd5 + "/" }); ConvertConfig config = new ConvertConfig { ResourcePath = "./" + fileMd5 + "/" }; if (!Directory.Exists(docRoot)) { Console.WriteLine("Unzip File"); UnZip un = new UnZip(); un.UnZipToDir(file, docRoot); } Console.WriteLine("Convert Word to Html"); OpenXmlHelper helper = new OpenXmlHelper(); Html.Html html = new Html.Html(); HtmlElement meta0 = new HtmlElement("meta", false); meta0.AddAttribute("http-equiv", "X-UA-Compatible"); meta0.AddAttribute("content", "IE=edge,chrome=1"); HtmlElement meta1 = new HtmlElement("meta", false); meta1.AddAttribute("name", "viewport"); meta1.AddAttribute("content", "width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no"); HtmlElement meta2 = new HtmlElement("meta", false); meta2.AddAttribute("name", "apple-mobile-web-app-capable"); meta2.AddAttribute("content", "yes"); HtmlElement meta3 = new HtmlElement("meta", false); meta3.AddAttribute("http-equiv", "content-type"); meta3.AddAttribute("content", "text/html; charset=UTF-8"); html.AddHeadElement(meta0); html.AddHeadElement(meta1); html.AddHeadElement(meta2); html.AddHeadElement(meta3); CSS css = new CSS(); CssStyle body = new CssStyle("body"); body.AddStyle("background-color", "gray"); CssStyle center = new CssStyle(".center"); center.AddStyle("text-align", "center"); css.AddStyle(body); //css.AddStyle(center); html.AddStyle(css); HtmlElement div = new HtmlElement("div"); HtmlAttribute divClass = new HtmlAttribute("class", "documentbody"); div.AddAttribute(divClass); CssStyle divStyle = new CssStyle(); divStyle.AddStyle("font-family", "'Times New Roman' 宋体"); divStyle.AddStyle("font-size", "10.5pt"); divStyle.AddStyle("margin", "0 auto"); divStyle.AddStyle("width", "600px"); divStyle.AddStyle("padding", "100px 120px"); divStyle.AddStyle("border", "2px solid gray"); divStyle.AddStyle("background-color", "white"); div.AddStyle(divStyle); #region docuemnt WordprocessingDocument doc = WordprocessingDocument.Open(file, false); //StyleParts StylesPart docstyles = doc.MainDocumentPart.StyleDefinitionsPart == null ? doc.MainDocumentPart.StylesWithEffectsPart : (StylesPart)doc.MainDocumentPart.StyleDefinitionsPart; var styles = docstyles.Styles; //styles var styleEl = styles.Elements <Style>(); //var i = __styles.Count(); //生产Style模版对应的CSS Style html.AddStyle(helper.GetStyles(styleEl)); var pps = doc.MainDocumentPart.Document.Body.ChildElements; ElementHandler handler = new ElementHandler(doc); //处理各个Word元素 foreach (var pp in pps) { //Console.WriteLine(pp.GetType().ToString()); div.AddChild(handler.Handle(pp, config)); } #endregion html.AddElement(div); string htmlfile = ROOT + fileMd5 + ".html"; FileStream fs = File.Exists(htmlfile) ? new FileStream(htmlfile, FileMode.Truncate, FileAccess.Write) : new FileStream(htmlfile, FileMode.CreateNew, FileAccess.Write); StreamWriter sw = new StreamWriter(fs); sw.WriteLine(html.ToString()); sw.Close(); fs.Close(); //// //XDocument _styles = null; //if (docstyles != null) //{ // using (var reader = XmlReader.Create(docstyles.GetStream(FileMode.Open, FileAccess.Read))) // { // _styles = XDocument.Load(reader); // } //} //if (_styles != null) //{ // //Console.WriteLine(_styles.ToString()); //} }