static void Main(string[] args) { DirectoryInfo di = new DirectoryInfo("../../"); foreach (var file in di.GetFiles("*out.docx")) file.Delete(); foreach (var file in di.GetFiles("*.docx")) { Console.WriteLine(file.Name); var newFile = new FileInfo("../../" + file.Name.Replace(".docx", "out.docx")); File.Copy(file.FullName, newFile.FullName); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newFile.FullName, true)) { FormattingAssemblerSettings settings = new FormattingAssemblerSettings() { ClearStyles = true, RemoveStyleNamesFromParagraphAndRunProperties = true, CreateHtmlConverterAnnotationAttributes = true, OrderElementsPerStandard = true, RestrictToSupportedLanguages = true, RestrictToSupportedNumberingFormats = true, }; FormattingAssembler.AssembleFormatting(wDoc, settings); } } }
public static WmlDocument AssembleFormatting(WmlDocument document, FormattingAssemblerSettings settings) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document)) { using (WordprocessingDocument doc = streamDoc.GetWordprocessingDocument()) { AssembleFormatting(doc, settings); } return streamDoc.GetModifiedWmlDocument(); } }
public static void AssembleFormatting(WordprocessingDocument wDoc, FormattingAssemblerSettings settings) { FormattingAssemblerInfo fai = new FormattingAssemblerInfo(); XDocument sXDoc = wDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument(); XElement defaultParagraphStyle = sXDoc .Root .Elements(W.style) .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true && (string)st.Attribute(W.type) == "paragraph"); if (defaultParagraphStyle != null) fai.DefaultParagraphStyleName = (string)defaultParagraphStyle.Attribute(W.styleId); XElement defaultCharacterStyle = sXDoc .Root .Elements(W.style) .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true && (string)st.Attribute(W.type) == "character"); if (defaultCharacterStyle != null) fai.DefaultCharacterStyleName = (string)defaultCharacterStyle.Attribute(W.styleId); XElement defaultTableStyle = sXDoc .Root .Elements(W.style) .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true && (string)st.Attribute(W.type) == "table"); if (defaultTableStyle != null) fai.DefaultTableStyleName = (string)defaultTableStyle.Attribute(W.styleId); ListItemRetrieverSettings listItemRetrieverSettings = new ListItemRetrieverSettings(); AssembleListItemInformation(wDoc, settings.ListItemRetrieverSettings); foreach (var part in wDoc.ContentParts()) { var pxd = part.GetXDocument(); FixNonconformantHexValues(pxd.Root); AnnotateWithGlobalDefaults(wDoc, pxd.Root, settings); AnnotateTablesWithTableStyles(wDoc, pxd.Root); AnnotateParagraphs(fai, wDoc, pxd.Root, settings); AnnotateRuns(fai, wDoc, pxd.Root, settings); } NormalizeListItems(fai, wDoc, settings); if (settings.ClearStyles) ClearStyles(wDoc); foreach (var part in wDoc.ContentParts()) { var pxd = part.GetXDocument(); pxd.Root.Descendants().Attributes().Where(a => a.IsNamespaceDeclaration).Remove(); FormattingAssembler.NormalizePropsForPart(pxd, settings); var newRoot = (XElement)CleanupTransform(pxd.Root); pxd.Root.ReplaceWith(newRoot); part.PutXDocument(); } }
public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings) { InitEntityMap(); RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveGoBackBookmark = true, ReplaceTabsWithSpaces = false, }; MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings); FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings { RemoveStyleNamesFromParagraphAndRunProperties = false, ClearStyles = false, RestrictToSupportedLanguages = htmlConverterSettings.RestrictToSupportedLanguages, RestrictToSupportedNumberingFormats = htmlConverterSettings.RestrictToSupportedNumberingFormats, CreateHtmlConverterAnnotationAttributes = true, OrderElementsPerStandard = false, ListItemRetrieverSettings = new ListItemRetrieverSettings() { ListItemTextImplementations = htmlConverterSettings.ListItemImplementations, }, }; FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings); InsertAppropriateNonbreakingSpaces(wordDoc); CalculateSpanWidthForTabs(wordDoc); ReverseTableBordersForRtlTables(wordDoc); AdjustTableBorders(wordDoc); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; FieldRetriever.AnnotateWithFieldInfo(wordDoc.MainDocumentPart); AnnotateForSections(wordDoc); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, false, 0m); ReifyStylesAndClasses(htmlConverterSettings, xhtml); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return xhtml; }
private static void AnnotateWithGlobalDefaults(WordprocessingDocument wDoc, XElement rootElement, FormattingAssemblerSettings settings) { XElement globalDefaultParaProps = null; XElement globalDefaultParaPropsAsDefined = null; XElement globalDefaultRunProps = null; XElement globalDefaultRunPropsAsDefined = null; XDocument sXDoc = wDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument(); var defaultParaStyleName = (string)sXDoc .Root .Elements(W.style) .Where(st => (string)st.Attribute(W.type) == "paragraph" && st.Attribute(W._default).ToBoolean() == true) .Attributes(W.styleId) .FirstOrDefault(); var defaultCharStyleName = (string)sXDoc .Root .Elements(W.style) .Where(st => (string)st.Attribute(W.type) == "character" && st.Attribute(W._default).ToBoolean() == true) .Attributes(W.styleId) .FirstOrDefault(); XElement docDefaults = sXDoc.Root.Element(W.docDefaults); if (docDefaults != null) { globalDefaultParaPropsAsDefined = docDefaults.Elements(W.pPrDefault).Elements(W.pPr) .FirstOrDefault(); if (globalDefaultParaPropsAsDefined == null) globalDefaultParaPropsAsDefined = new XElement(W.pPr, new XElement(W.rPr)); globalDefaultRunPropsAsDefined = docDefaults.Elements(W.rPrDefault).Elements(W.rPr) .FirstOrDefault(); if (globalDefaultRunPropsAsDefined == null) globalDefaultRunPropsAsDefined = new XElement(W.rPr); if (globalDefaultRunPropsAsDefined.Element(W.rFonts) == null) globalDefaultRunPropsAsDefined.Add( new XElement(W.rFonts, new XAttribute(W.ascii, "Times New Roman"), new XAttribute(W.hAnsi, "Times New Roman"), new XAttribute(W.cs, "Times New Roman"))); if (globalDefaultRunPropsAsDefined.Element(W.sz) == null) globalDefaultRunPropsAsDefined.Add( new XElement(W.sz, new XAttribute(W.val, "20"))); if (globalDefaultRunPropsAsDefined.Element(W.szCs) == null) globalDefaultRunPropsAsDefined.Add( new XElement(W.szCs, new XAttribute(W.val, "20"))); var runPropsForGlobalDefaultParaProps = MergeStyleElement(globalDefaultRunPropsAsDefined, globalDefaultParaPropsAsDefined.Element(W.rPr)); globalDefaultParaProps = new XElement(globalDefaultParaPropsAsDefined.Name, globalDefaultParaPropsAsDefined.Attributes(), globalDefaultParaPropsAsDefined.Elements().Where(e => e.Name != W.rPr), runPropsForGlobalDefaultParaProps); globalDefaultRunProps = MergeStyleElement(globalDefaultParaPropsAsDefined.Element(W.rPr), globalDefaultRunPropsAsDefined); } var rPr = new XElement(W.rPr, new XElement(W.rFonts, new XAttribute(W.ascii, "Times New Roman"), new XAttribute(W.hAnsi, "Times New Roman"), new XAttribute(W.cs, "Times New Roman")), new XElement(W.sz, new XAttribute(W.val, "20")), new XElement(W.szCs, new XAttribute(W.val, "20"))); if (globalDefaultParaProps == null) globalDefaultParaProps = new XElement(W.pPr, rPr); if (globalDefaultRunProps == null) globalDefaultRunProps = rPr; XElement ptGlobalDefaultParaProps = new XElement(globalDefaultParaProps); XElement ptGlobalDefaultRunProps = new XElement(globalDefaultRunProps); ptGlobalDefaultParaProps.Name = PtOpenXml.pPr; ptGlobalDefaultRunProps.Name = PtOpenXml.rPr; var parasAndRuns = rootElement.Descendants().Where(d => { return d.Name == W.p || d.Name == W.r; }); if (settings.CreateHtmlConverterAnnotationAttributes) { foreach (var d in parasAndRuns) { if (d.Name == W.p) { var pStyle = (string)d.Elements(W.pPr).Elements(W.pStyle).Attributes(W.val).FirstOrDefault(); if (pStyle == null) pStyle = defaultParaStyleName; if (pStyle != null) d.Add(new XAttribute(PtOpenXml.StyleName, pStyle)); d.Add(ptGlobalDefaultParaProps); } else { var rStyle = (string)d.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault(); if (rStyle == null) rStyle = defaultCharStyleName; if (rStyle != null) d.Add(new XAttribute(PtOpenXml.StyleName, rStyle)); d.Add(ptGlobalDefaultRunProps); } } } else { foreach (var d in parasAndRuns) { if (d.Name == W.p) { d.Add(ptGlobalDefaultParaProps); } else { d.Add(ptGlobalDefaultRunProps); } } } }
public static void NormalizePropsForPart(XDocument pxd, FormattingAssemblerSettings settings) { if (settings.CreateHtmlConverterAnnotationAttributes) { pxd.Root.Descendants().Attributes().Where(d => d.Name.Namespace == PtOpenXml.pt && !PtNamesToKeep.Contains(d.Name)).Remove(); if (pxd.Root.Attribute(XNamespace.Xmlns + "pt14") == null) pxd.Root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName)); if (pxd.Root.Attribute(XNamespace.Xmlns + "mc") == null) pxd.Root.Add(new XAttribute(XNamespace.Xmlns + "mc", MC.mc.NamespaceName)); XAttribute mci = pxd.Root.Attribute(MC.Ignorable); if (mci != null) { var ig = pxd.Root.Attribute(MC.Ignorable).Value + " pt14"; mci.Value = ig; } else { pxd.Root.Add(new XAttribute(MC.Ignorable, "pt14")); } } else { pxd.Root.Descendants().Attributes().Where(d => d.Name.Namespace == PtOpenXml.pt).Remove(); } var runProps = pxd.Root.Descendants(PtOpenXml.rPr).ToList(); foreach (var item in runProps) { XElement newRunProps = new XElement(W.rPr, item.Attributes(), item.Elements()); XElement parent = item.Parent; if (parent.Name == W.p) { XElement existingParaProps = parent.Element(W.pPr); if (existingParaProps == null) { existingParaProps = new XElement(W.pPr); parent.Add(existingParaProps); } XElement existingRunProps = existingParaProps.Element(W.rPr); if (existingRunProps != null) { if (!settings.RemoveStyleNamesFromParagraphAndRunProperties) { if (newRunProps.Element(W.rStyle) == null) newRunProps.Add(existingRunProps.Element(W.rStyle)); } existingRunProps.ReplaceWith(newRunProps); } else existingParaProps.Add(newRunProps); } else { XElement existingRunProps = parent.Element(W.rPr); if (existingRunProps != null) { if (!settings.RemoveStyleNamesFromParagraphAndRunProperties) { if (newRunProps.Element(W.rStyle) == null) newRunProps.Add(existingRunProps.Element(W.rStyle)); } existingRunProps.ReplaceWith(newRunProps); } else parent.Add(newRunProps); } } var paraProps = pxd.Root.Descendants(PtOpenXml.pPr).ToList(); foreach (var item in paraProps) { var paraRunProps = item.Parent.Elements(W.pPr).Elements(W.rPr).FirstOrDefault(); var merged = MergeStyleElement(item.Element(W.rPr), paraRunProps); if (!settings.RemoveStyleNamesFromParagraphAndRunProperties) { if (merged.Element(W.rStyle) == null) { merged.Add(paraRunProps.Element(W.rStyle)); } } XElement newParaProps = new XElement(W.pPr, item.Attributes(), item.Elements().Where(e => e.Name != W.rPr), merged); XElement para = item.Parent; XElement existingParaProps = para.Element(W.pPr); if (existingParaProps != null) { if (!settings.RemoveStyleNamesFromParagraphAndRunProperties) { if (newParaProps.Element(W.pStyle) == null) newParaProps.Add(existingParaProps.Element(W.pStyle)); } existingParaProps.ReplaceWith(newParaProps); } else para.Add(newParaProps); } var tblProps = pxd.Root.Descendants(PtOpenXml.tblPr).ToList(); foreach (var item in tblProps) { XElement newTblProps = new XElement(item); newTblProps.Name = W.tblPr; XElement table = item.Parent; XElement existingTableProps = table.Element(W.tblPr); if (existingTableProps != null) existingTableProps.ReplaceWith(newTblProps); else table.AddFirst(newTblProps); } var trProps = pxd.Root.Descendants(PtOpenXml.trPr).ToList(); foreach (var item in trProps) { XElement newTrProps = new XElement(item); newTrProps.Name = W.trPr; XElement row = item.Parent; XElement existingRowProps = row.Element(W.trPr); if (existingRowProps != null) existingRowProps.ReplaceWith(newTrProps); else row.AddFirst(newTrProps); } var tcProps = pxd.Root.Descendants(PtOpenXml.tcPr).ToList(); foreach (var item in tcProps) { XElement newTcProps = new XElement(item); newTcProps.Name = W.tcPr; XElement row = item.Parent; XElement existingRowProps = row.Element(W.tcPr); if (existingRowProps != null) existingRowProps.ReplaceWith(newTcProps); else row.AddFirst(newTcProps); } pxd.Root.Descendants(W.numPr).Remove(); if (settings.RemoveStyleNamesFromParagraphAndRunProperties) { pxd.Root.Descendants(W.pStyle).Where(ps => ps.Parent.Name == W.pPr).Remove(); pxd.Root.Descendants(W.rStyle).Where(ps => ps.Parent.Name == W.rPr).Remove(); } pxd.Root.Descendants(W.tblStyle).Where(ps => ps.Parent.Name == W.tblPr).Remove(); pxd.Root.Descendants().Where(d => d.Name.Namespace == PtOpenXml.pt).Remove(); if (settings.OrderElementsPerStandard) { XElement newRoot = (XElement)TransformAndOrderElements(pxd.Root); pxd.Root.ReplaceWith(newRoot); } }
private static void AdjustFontAttributes(WordprocessingDocument wDoc, XElement paraOrRun, XElement pPr, XElement rPr, FormattingAssemblerSettings settings) { XDocument themeXDoc = null; if (wDoc.MainDocumentPart.ThemePart != null) themeXDoc = wDoc.MainDocumentPart.ThemePart.GetXDocument(); XElement fontScheme = null; XElement majorFont = null; XElement minorFont = null; if (themeXDoc != null) { fontScheme = themeXDoc.Root.Element(A.themeElements).Element(A.fontScheme); majorFont = fontScheme.Element(A.majorFont); minorFont = fontScheme.Element(A.minorFont); } var rFonts = rPr.Element(W.rFonts); if (rFonts == null) { return; } var asciiTheme = (string)rFonts.Attribute(W.asciiTheme); var hAnsiTheme = (string)rFonts.Attribute(W.hAnsiTheme); var eastAsiaTheme = (string)rFonts.Attribute(W.eastAsiaTheme); var cstheme = (string)rFonts.Attribute(W.cstheme); string ascii = null; string hAnsi = null; string eastAsia = null; string cs = null; XElement minorLatin = null; string minorLatinTypeface = null; XElement majorLatin = null; string majorLatinTypeface = null; if (minorFont != null) { minorLatin = minorFont.Element(A.latin); minorLatinTypeface = (string)minorLatin.Attribute("typeface"); } if (majorFont != null) { majorLatin = majorFont.Element(A.latin); majorLatinTypeface = (string)majorLatin.Attribute("typeface"); } if (asciiTheme != null) { if (asciiTheme.StartsWith("minor") && minorLatinTypeface != null) { ascii = minorLatinTypeface; } else if (asciiTheme.StartsWith("major") && majorLatinTypeface != null) { ascii = majorLatinTypeface; } } if (hAnsiTheme != null) { if (hAnsiTheme.StartsWith("minor") && minorLatinTypeface != null) { hAnsi = minorLatinTypeface; } else if (hAnsiTheme.StartsWith("major") && majorLatinTypeface != null) { hAnsi = majorLatinTypeface; } } if (eastAsiaTheme != null) { if (eastAsiaTheme.StartsWith("minor") && minorLatinTypeface != null) { eastAsia = minorLatinTypeface; } else if (eastAsiaTheme.StartsWith("major") && majorLatinTypeface != null) { eastAsia = majorLatinTypeface; } } if (cstheme != null) { if (cstheme.StartsWith("minor") && minorFont != null) { cs = (string)minorFont.Element(A.cs).Attribute("typeface"); } else if (cstheme.StartsWith("major") && majorFont != null) { cs = (string)majorFont.Element(A.cs).Attribute("typeface"); } } if (ascii != null) { rFonts.SetAttributeValue(W.ascii, ascii); } if (hAnsi != null) { rFonts.SetAttributeValue(W.hAnsi, hAnsi); } if (eastAsia != null) { rFonts.SetAttributeValue(W.eastAsia, eastAsia); } if (cs != null) { rFonts.SetAttributeValue(W.cs, cs); } var firstTextNode = paraOrRun.Descendants(W.t).FirstOrDefault(t => t.Value.Length > 0); string str = " "; // if there is a run with no text in it, then no need to do any of the rest of this method. if (firstTextNode == null && paraOrRun.Name == W.r) return; if (firstTextNode != null) str = firstTextNode.Value; var csa = new CharStyleAttributes(pPr, rPr); // This module determines the font based on just the first character. // Technically, a run can contain characters from different Unicode code blocks, and hence should be rendered with different fonts. // However, Word breaks up runs that use more than one font into multiple runs. Other producers of WordprocessingML may not, so in // that case, this routine may need to be augmented to look at all characters in a run. /* old code var fontFamilies = str.select(function (c) { var ft = Pav.DetermineFontTypeFromCharacter(c, csa); switch (ft) { case Pav.FontType.Ascii: return cast(rFonts.attribute(W.ascii)); case Pav.FontType.HAnsi: return cast(rFonts.attribute(W.hAnsi)); case Pav.FontType.EastAsia: return cast(rFonts.attribute(W.eastAsia)); case Pav.FontType.CS: return cast(rFonts.attribute(W.cs)); default: return null; } }) .where(function (f) { return f != null && f != ""; }) .distinct() .select(function (f) { return new Pav.FontFamily(f); }) .toArray(); */ var charToExamine = str.FirstOrDefault(c => ! WeakAndNeutralDirectionalCharacters.Contains(c)); if (charToExamine == '\0') charToExamine = str[0]; var ft = DetermineFontTypeFromCharacter(charToExamine, csa); string fontType = null; string languageType = null; switch (ft) { case FontType.Ascii: fontType = (string)rFonts.Attribute(W.ascii); languageType = "western"; break; case FontType.HAnsi: fontType = (string)rFonts.Attribute(W.hAnsi); languageType = "western"; break; case FontType.EastAsia: if (settings.RestrictToSupportedLanguages) throw new UnsupportedLanguageException("EastAsia languages are not supported"); fontType = (string)rFonts.Attribute(W.eastAsia); languageType = "eastAsia"; break; case FontType.CS: if (settings.RestrictToSupportedLanguages) throw new UnsupportedLanguageException("Complex script (RTL) languages are not supported"); fontType = (string)rFonts.Attribute(W.cs); languageType = "bidi"; break; } if (fontType != null) { if (paraOrRun.Attribute(PtOpenXml.FontName) == null) { XAttribute fta = new XAttribute(PtOpenXml.FontName, fontType.ToString()); paraOrRun.Add(fta); } else { paraOrRun.Attribute(PtOpenXml.FontName).Value = fontType.ToString(); } } if (languageType != null) { if (paraOrRun.Attribute(PtOpenXml.LanguageType) == null) { XAttribute lta = new XAttribute(PtOpenXml.LanguageType, languageType); paraOrRun.Add(lta); } else { paraOrRun.Attribute(PtOpenXml.LanguageType).Value = languageType; } } }
private static object NormalizeListItemsTransform(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XNode node, FormattingAssemblerSettings settings) { var element = node as XElement; if (element != null) { if (element.Name == W.p) { var li = ListItemRetriever.RetrieveListItem(wDoc, element, settings.ListItemRetrieverSettings); if (li != null) { ListItemRetriever.ListItemInfo listItemInfo = element.Annotation<ListItemRetriever.ListItemInfo>(); var newParaProps = new XElement(W.pPr, element.Elements(W.pPr).Elements().Where(e => e.Name != W.numPr) ); XElement listItemRunProps = null; int? abstractNumId = null; if (listItemInfo != null) { abstractNumId = listItemInfo.AbstractNumId; var paraStyleRunProps = CharStyleRollup(fai, wDoc, element); var paragraphStyleName = (string)element .Elements(W.pPr) .Elements(W.pStyle) .Attributes(W.val) .FirstOrDefault(); string defaultStyleName = (string)wDoc .MainDocumentPart .StyleDefinitionsPart .GetXDocument() .Root .Elements(W.style) .Where(s => (string)s.Attribute(W.type) == "paragraph" && s.Attribute(W._default).ToBoolean() == true) .Attributes(W.styleId) .FirstOrDefault(); if (paragraphStyleName == null) paragraphStyleName = defaultStyleName; XDocument stylesXDoc = wDoc .MainDocumentPart .StyleDefinitionsPart .GetXDocument(); // put together run props for list item. XElement lvlStyleRpr = ParaStyleRunPropsStack(wDoc, paragraphStyleName) .Aggregate(new XElement(W.rPr), (r, s) => { var newCharStyleRunProps = MergeStyleElement(s, r); return newCharStyleRunProps; }); var mergedRunProps = MergeStyleElement(lvlStyleRpr, paraStyleRunProps); var accumulatedRunProps = element.Elements(PtOpenXml.pPr).Elements(W.rPr).FirstOrDefault(); if (accumulatedRunProps != null) mergedRunProps = MergeStyleElement(accumulatedRunProps, mergedRunProps); var listItemLvl = listItemInfo.Lvl(ListItemRetriever.GetParagraphLevel(element)); var listItemLvlRunProps = listItemLvl.Elements(W.rPr).FirstOrDefault(); listItemRunProps = MergeStyleElement(listItemLvlRunProps, mergedRunProps); if ((string)listItemLvl.Elements(W.numFmt).Attributes(W.val).FirstOrDefault() == "bullet") { listItemRunProps.Elements(W.rtl).Remove(); } else { var pPr = element.Element(PtOpenXml.pPr); if (pPr != null) { XElement bidiel = pPr.Element(W.bidi); bool bidi = bidiel != null && (bidiel.Attribute(W.val) == null || bidiel.Attribute(W.val).ToBoolean() == true); if (bidi) { listItemRunProps = MergeStyleElement(new XElement(W.rPr, new XElement(W.rtl)), listItemRunProps); } } } } var listItemRun = new XElement(W.r, element.Attribute(PtOpenXml.FontName), element.Attribute(PtOpenXml.LanguageType), listItemRunProps, new XElement(W.t, new XAttribute(XNamespace.Xml + "space", "preserve"), li)); AdjustFontAttributes(wDoc, listItemRun, null, listItemRunProps, settings); var lvl = listItemInfo.Lvl(ListItemRetriever.GetParagraphLevel(element)); XElement suffix = new XElement(W.tab); var su = (string)lvl.Elements(W.suff).Attributes(W.val).FirstOrDefault(); if (su == "space") suffix = new XElement(W.t, new XAttribute(XNamespace.Xml + "space", "preserve"), " "); else if (su == "nothing") suffix = null; var jc = (string)lvl.Elements(W.lvlJc).Attributes(W.val).FirstOrDefault(); if (jc == "right") { var accumulatedParaProps = element.Element(PtOpenXml.pPr); var hangingAtt = accumulatedParaProps.Elements(W.ind).Attributes(W.hanging).FirstOrDefault(); if (hangingAtt == null) { var listItemRunLength = WordprocessingMLUtil.CalcWidthOfRunInTwips(listItemRun); var ind = accumulatedParaProps.Element(W.ind); if (ind == null) { ind = new XElement(W.ind); accumulatedParaProps.Add(ind); } ind.Add(new XAttribute(W.hanging, listItemRunLength.ToString())); } else { var hanging = (int)hangingAtt; var listItemRunLength = WordprocessingMLUtil.CalcWidthOfRunInTwips(listItemRun); hanging += listItemRunLength; // should be width of list item, in twips hangingAtt.Value = hanging.ToString(); } } else if (jc == "center") { var accumulatedParaProps = element.Element(PtOpenXml.pPr); var hangingAtt = accumulatedParaProps.Elements(W.ind).Attributes(W.hanging).FirstOrDefault(); if (hangingAtt == null) { var listItemRunLength = WordprocessingMLUtil.CalcWidthOfRunInTwips(listItemRun); var ind = accumulatedParaProps.Element(W.ind); if (ind == null) { ind = new XElement(W.ind); accumulatedParaProps.Add(ind); } ind.Add(new XAttribute(W.hanging, (listItemRunLength / 2).ToString())); } else { var hanging = (int)hangingAtt; var listItemRunLength = WordprocessingMLUtil.CalcWidthOfRunInTwips(listItemRun); hanging += (listItemRunLength / 2); // should be half of width of list item, in twips hangingAtt.Value = hanging.ToString(); } } AddTabAtLeftIndent(element.Element(PtOpenXml.pPr)); XElement newPara = new XElement(W.p, element.Attribute(PtOpenXml.FontName), element.Attribute(PtOpenXml.LanguageType), new XAttribute(PtOpenXml.AbstractNumId, abstractNumId), newParaProps, listItemRun, suffix != null ? new XElement(W.r, listItemRunProps, suffix) : null, element.Elements().Where(e => e.Name != W.pPr).Select(n => NormalizeListItemsTransform(fai, wDoc, n, settings))); return newPara; } } return new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => NormalizeListItemsTransform(fai, wDoc, n, settings))); } return node; }
private static void AnnotateRunProperties(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XElement runOrPara, FormattingAssemblerSettings settings) { XElement localRunProps = null; if (runOrPara.Name == W.p) { var rPr = runOrPara.Elements(W.pPr).Elements(W.rPr).FirstOrDefault(); if (rPr != null) { localRunProps = rPr; } } else { localRunProps = runOrPara.Element(W.rPr); } if (localRunProps == null) { localRunProps = new XElement(W.rPr); } // get run table props, to be merged. XElement tablerPr = null; var blockLevelContentContainer = runOrPara .Ancestors() .FirstOrDefault(a => a.Name == W.body || a.Name == W.tbl || a.Name == W.txbxContent || a.Name == W.ftr || a.Name == W.hdr || a.Name == W.footnote || a.Name == W.endnote); if (blockLevelContentContainer.Name == W.tbl) { XElement tbl = blockLevelContentContainer; XElement style = tbl.Element(PtOpenXml.pt + "style"); XElement cellCnf = runOrPara.Ancestors(W.tc).Take(1).Elements(W.tcPr).Elements(W.cnfStyle).FirstOrDefault(); XElement rowCnf = runOrPara.Ancestors(W.tr).Take(1).Elements(W.trPr).Elements(W.cnfStyle).FirstOrDefault(); if (style != null) { tablerPr = style.Element(W.rPr); if (tablerPr == null) tablerPr = new XElement(W.rPr); foreach (var ot in TableStyleOverrideTypes) { XName attName = TableStyleOverrideXNameMap[ot]; if ((cellCnf != null && cellCnf.Attribute(attName).ToBoolean() == true) || (rowCnf != null && rowCnf.Attribute(attName).ToBoolean() == true)) { XElement o = style .Elements(W.tblStylePr) .Where(tsp => (string)tsp.Attribute(W.type) == ot) .FirstOrDefault(); if (o != null) { XElement otrPr = o.Element(W.rPr); tablerPr = MergeStyleElement(otrPr, tablerPr); } } } } } XElement rolledRunProps = CharStyleRollup(fai, wDoc, runOrPara); var toggledRunProps = ToggleMergeRunProps(rolledRunProps, tablerPr); var currentRunProps = runOrPara.Element(PtOpenXml.rPr); // this is already stored on the run from previous aggregation of props var mergedRunProps = MergeStyleElement(toggledRunProps, currentRunProps); var newMergedRunProps = MergeStyleElement(localRunProps, mergedRunProps); XElement pPr = null; if (runOrPara.Name == W.p) pPr = runOrPara.Element(PtOpenXml.pPr); AdjustFontAttributes(wDoc, runOrPara, pPr, newMergedRunProps, settings); newMergedRunProps.Name = PtOpenXml.rPr; if (currentRunProps != null) { currentRunProps.ReplaceWith(newMergedRunProps); } else { runOrPara.Add(newMergedRunProps); } }
private static void AnnotateRuns(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XElement root, FormattingAssemblerSettings settings) { var runsOrParas = root.Descendants() .Where(rp => { return rp.Name == W.r || rp.Name == W.p; }); foreach (var runOrPara in runsOrParas) { AnnotateRunProperties(fai, wDoc, runOrPara, settings); } }
private static void NormalizeListItems(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, FormattingAssemblerSettings settings) { foreach (var part in wDoc.ContentParts()) { var pxd = part.GetXDocument(); XElement newRoot = (XElement)NormalizeListItemsTransform(fai, wDoc, pxd.Root, settings); if (newRoot.Attribute(XNamespace.Xmlns + "pt14") == null) newRoot.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName)); if (newRoot.Attribute(XNamespace.Xmlns + "mc") == null) newRoot.Add(new XAttribute(XNamespace.Xmlns + "mc", MC.mc.NamespaceName)); pxd.Root.ReplaceWith(newRoot); } }
private static void AnnotateParagraph(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XElement para, FormattingAssemblerSettings settings) { XElement localParaProps = para.Element(W.pPr); if (localParaProps == null) { localParaProps = new XElement(W.pPr); } // get para table props, to be merged. XElement tablepPr = null; var blockLevelContentContainer = para .Ancestors() .FirstOrDefault(a => a.Name == W.body || a.Name == W.tbl || a.Name == W.txbxContent || a.Name == W.ftr || a.Name == W.hdr || a.Name == W.footnote || a.Name == W.endnote); if (blockLevelContentContainer.Name == W.tbl) { XElement tbl = blockLevelContentContainer; XElement style = tbl.Element(PtOpenXml.pt + "style"); XElement cellCnf = para.Ancestors(W.tc).Take(1).Elements(W.tcPr).Elements(W.cnfStyle).FirstOrDefault(); XElement rowCnf = para.Ancestors(W.tr).Take(1).Elements(W.trPr).Elements(W.cnfStyle).FirstOrDefault(); if (style != null) { // roll up tblPr, trPr, and tcPr from within a specific style. // add each of these to the table, in PowerTools namespace. tablepPr = style.Element(W.pPr); if (tablepPr == null) tablepPr = new XElement(W.pPr); foreach (var ot in TableStyleOverrideTypes) { XName attName = TableStyleOverrideXNameMap[ot]; if ((cellCnf != null && cellCnf.Attribute(attName).ToBoolean() == true) || (rowCnf != null && rowCnf.Attribute(attName).ToBoolean() == true)) { XElement o = style .Elements(W.tblStylePr) .Where(tsp => (string)tsp.Attribute(W.type) == ot) .FirstOrDefault(); if (o != null) { XElement otpPr = o.Element(W.pPr); tablepPr = MergeStyleElement(otpPr, tablepPr); } } } } } var stylesPart = wDoc.MainDocumentPart.StyleDefinitionsPart; XDocument sXDoc = null; if (stylesPart != null) sXDoc = stylesPart.GetXDocument(); ListItemRetriever.ListItemInfo lif = para.Annotation<ListItemRetriever.ListItemInfo>(); XElement rolledParaProps = ParagraphStyleRollup(para, sXDoc, fai.DefaultParagraphStyleName); if (lif != null && lif.IsZeroNumId) rolledParaProps.Elements(W.ind).Remove(); XElement toggledParaProps = MergeStyleElement(rolledParaProps, tablepPr); XElement mergedParaProps = MergeStyleElement(localParaProps, toggledParaProps); string li = ListItemRetriever.RetrieveListItem(wDoc, para, settings.ListItemRetrieverSettings); if (lif != null && lif.IsListItem) { if (settings.RestrictToSupportedNumberingFormats) { string numFmtForLevel = (string)lif.Lvl(ListItemRetriever.GetParagraphLevel(para)).Elements(W.numFmt).Attributes(W.val).FirstOrDefault(); if (numFmtForLevel == null) { var numFmtElement = lif.Lvl(ListItemRetriever.GetParagraphLevel(para)).Elements(MC.AlternateContent).Elements(MC.Choice).Elements(W.numFmt).FirstOrDefault(); if (numFmtElement != null && (string)numFmtElement.Attribute(W.val) == "custom") numFmtForLevel = (string)numFmtElement.Attribute(W.format); } bool isLgl = lif.Lvl(ListItemRetriever.GetParagraphLevel(para)).Elements(W.isLgl).Any(); if (isLgl && numFmtForLevel != "decimalZero") numFmtForLevel = "decimal"; if (!AcceptableNumFormats.Contains(numFmtForLevel)) throw new UnsupportedNumberingFormatException(numFmtForLevel + " is not a supported numbering format"); } int paragraphLevel = ListItemRetriever.GetParagraphLevel(para); var numberingParaProps = lif .Lvl(paragraphLevel) .Elements(W.pPr) .FirstOrDefault(); if (numberingParaProps == null) { numberingParaProps = new XElement(W.pPr); } else { numberingParaProps .Elements() .Where(e => e.Name != W.ind) .Remove(); } // have: // - localParaProps // - toggledParaProps // - numberingParaProps // if a paragraph contains a numPr with a numId=0, in other words, it is NOT a numbered item, then the indentation from the style // hierarchy is ignored. ListItemRetriever.ListItemInfo lii = para.Annotation<ListItemRetriever.ListItemInfo>(); if (lii.FromParagraph != null) { // order // - toggledParaProps // - numberingParaProps // - localParaProps mergedParaProps = MergeStyleElement(numberingParaProps, toggledParaProps); mergedParaProps = MergeStyleElement(localParaProps, mergedParaProps); } else if (lii.FromStyle != null) { // order // - numberingParaProps // - toggledParaProps // - localParaProps mergedParaProps = MergeStyleElement(toggledParaProps, numberingParaProps); mergedParaProps = MergeStyleElement(localParaProps, mergedParaProps); } } else { mergedParaProps = MergeStyleElement(localParaProps, toggledParaProps); } // merge mergedParaProps with existing accumulatedParaProps, with mergedParaProps as high pri // replace accumulatedParaProps with newly merged XElement accumulatedParaProps = para.Element(PtOpenXml.pt + "pPr"); XElement newAccumulatedParaProps = MergeStyleElement(mergedParaProps, accumulatedParaProps); AdjustFontAttributes(wDoc, para, newAccumulatedParaProps, newAccumulatedParaProps.Element(W.rPr), settings); newAccumulatedParaProps.Name = PtOpenXml.pt + "pPr"; if (accumulatedParaProps != null) { accumulatedParaProps.ReplaceWith(newAccumulatedParaProps); } else { para.Add(newAccumulatedParaProps); } }
private static void AnnotateParagraphs(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XElement root, FormattingAssemblerSettings settings) { foreach (var para in root.Descendants(W.p)) { AnnotateParagraph(fai, wDoc, para, settings); } }
private static void FontAndCharSetAnalysis(WordprocessingDocument wDoc, List<XElement> metrics, List<string> notes) { FormattingAssemblerSettings settings = new FormattingAssemblerSettings { RemoveStyleNamesFromParagraphAndRunProperties = false, ClearStyles = true, RestrictToSupportedNumberingFormats = false, RestrictToSupportedLanguages = false, }; FormattingAssembler.AssembleFormatting(wDoc, settings); var formattingMetrics = new FormattingMetrics(); foreach (var part in wDoc.ContentParts()) { var xDoc = part.GetXDocument(); foreach (var run in xDoc.Descendants(W.r)) { formattingMetrics.RunCount++; AnalyzeRun(run, metrics, notes, formattingMetrics, part.Uri.ToString()); } } metrics.Add(new XElement(H.RunCount, new XAttribute(H.Val, formattingMetrics.RunCount))); if (formattingMetrics.RunWithoutRprCount > 0) metrics.Add(new XElement(H.RunWithoutRprCount, new XAttribute(H.Val, formattingMetrics.RunWithoutRprCount))); if (formattingMetrics.ZeroLengthText > 0) metrics.Add(new XElement(H.ZeroLengthText, new XAttribute(H.Val, formattingMetrics.ZeroLengthText))); if (formattingMetrics.MultiFontRun > 0) metrics.Add(new XElement(H.MultiFontRun, new XAttribute(H.Val, formattingMetrics.MultiFontRun))); if (formattingMetrics.AsciiCharCount > 0) metrics.Add(new XElement(H.AsciiCharCount, new XAttribute(H.Val, formattingMetrics.AsciiCharCount))); if (formattingMetrics.CSCharCount > 0) metrics.Add(new XElement(H.CSCharCount, new XAttribute(H.Val, formattingMetrics.CSCharCount))); if (formattingMetrics.EastAsiaCharCount > 0) metrics.Add(new XElement(H.EastAsiaCharCount, new XAttribute(H.Val, formattingMetrics.EastAsiaCharCount))); if (formattingMetrics.HAnsiCharCount > 0) metrics.Add(new XElement(H.HAnsiCharCount, new XAttribute(H.Val, formattingMetrics.HAnsiCharCount))); if (formattingMetrics.AsciiRunCount > 0) metrics.Add(new XElement(H.AsciiRunCount, new XAttribute(H.Val, formattingMetrics.AsciiRunCount))); if (formattingMetrics.CSRunCount > 0) metrics.Add(new XElement(H.CSRunCount, new XAttribute(H.Val, formattingMetrics.CSRunCount))); if (formattingMetrics.EastAsiaRunCount > 0) metrics.Add(new XElement(H.EastAsiaRunCount, new XAttribute(H.Val, formattingMetrics.EastAsiaRunCount))); if (formattingMetrics.HAnsiRunCount > 0) metrics.Add(new XElement(H.HAnsiRunCount, new XAttribute(H.Val, formattingMetrics.HAnsiRunCount))); if (formattingMetrics.Languages.Any()) { var uls = formattingMetrics.Languages.StringConcatenate(s => s + ",").TrimEnd(','); metrics.Add(new XElement(H.Languages, new XAttribute(H.Val, PtUtils.MakeValidXml(uls)))); } }
public static void CopyFormattingAssembledDocx(FileInfo source, FileInfo dest) { var ba = File.ReadAllBytes(source.FullName); using (MemoryStream ms = new MemoryStream()) { ms.Write(ba, 0, ba.Length); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(ms, true)) { RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveGoBackBookmark = true, ReplaceTabsWithSpaces = false, }; MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings); FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings { RemoveStyleNamesFromParagraphAndRunProperties = false, ClearStyles = false, RestrictToSupportedLanguages = false, RestrictToSupportedNumberingFormats = false, CreateHtmlConverterAnnotationAttributes = true, OrderElementsPerStandard = false, ListItemRetrieverSettings = new ListItemRetrieverSettings() { ListItemTextImplementations = ListItemRetrieverSettings.DefaultListItemTextImplementations, }, }; FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings); } var newBa = ms.ToArray(); File.WriteAllBytes(dest.FullName, newBa); } }