static void Main(string[] args)
 {
     DirectoryInfo di = new DirectoryInfo("../../");
     foreach (var file in di.GetFiles("*out.docx"))
         file.Delete();
     foreach (var file in di.GetFiles("*.docx"))
     {
         Console.WriteLine(file.Name);
         var newFile = new FileInfo("../../" + file.Name.Replace(".docx", "out.docx"));
         File.Copy(file.FullName, newFile.FullName);
         using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newFile.FullName, true))
         {
             FormattingAssemblerSettings settings = new FormattingAssemblerSettings()
             {
                 ClearStyles = true,
                 RemoveStyleNamesFromParagraphAndRunProperties = true,
                 CreateHtmlConverterAnnotationAttributes = true,
                 OrderElementsPerStandard = true,
                 RestrictToSupportedLanguages = true,
                 RestrictToSupportedNumberingFormats = true,
             };
             FormattingAssembler.AssembleFormatting(wDoc, settings);
         }
     }
 }
 public static WmlDocument AssembleFormatting(WmlDocument document, FormattingAssemblerSettings settings)
 {
     using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document))
     {
         using (WordprocessingDocument doc = streamDoc.GetWordprocessingDocument())
         {
             AssembleFormatting(doc, settings);
         }
         return streamDoc.GetModifiedWmlDocument();
     }
 }
 public static void AssembleFormatting(WordprocessingDocument wDoc, FormattingAssemblerSettings settings)
 {
     FormattingAssemblerInfo fai = new FormattingAssemblerInfo();
     XDocument sXDoc = wDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
     XElement defaultParagraphStyle = sXDoc
         .Root
         .Elements(W.style)
         .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
             (string)st.Attribute(W.type) == "paragraph");
     if (defaultParagraphStyle != null)
         fai.DefaultParagraphStyleName = (string)defaultParagraphStyle.Attribute(W.styleId);
     XElement defaultCharacterStyle = sXDoc
         .Root
         .Elements(W.style)
         .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
             (string)st.Attribute(W.type) == "character");
     if (defaultCharacterStyle != null)
         fai.DefaultCharacterStyleName = (string)defaultCharacterStyle.Attribute(W.styleId);
     XElement defaultTableStyle = sXDoc
         .Root
         .Elements(W.style)
         .FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
             (string)st.Attribute(W.type) == "table");
     if (defaultTableStyle != null)
         fai.DefaultTableStyleName = (string)defaultTableStyle.Attribute(W.styleId);
     ListItemRetrieverSettings listItemRetrieverSettings = new ListItemRetrieverSettings();
     AssembleListItemInformation(wDoc, settings.ListItemRetrieverSettings);
     foreach (var part in wDoc.ContentParts())
     {
         var pxd = part.GetXDocument();
         FixNonconformantHexValues(pxd.Root);
         AnnotateWithGlobalDefaults(wDoc, pxd.Root, settings);
         AnnotateTablesWithTableStyles(wDoc, pxd.Root);
         AnnotateParagraphs(fai, wDoc, pxd.Root, settings);
         AnnotateRuns(fai, wDoc, pxd.Root, settings);
     }
     NormalizeListItems(fai, wDoc, settings);
     if (settings.ClearStyles)
         ClearStyles(wDoc);
     foreach (var part in wDoc.ContentParts())
     {
         var pxd = part.GetXDocument();
         pxd.Root.Descendants().Attributes().Where(a => a.IsNamespaceDeclaration).Remove();
         FormattingAssembler.NormalizePropsForPart(pxd, settings);
         var newRoot = (XElement)CleanupTransform(pxd.Root);
         pxd.Root.ReplaceWith(newRoot);
         part.PutXDocument();
     }
 }
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings)
        {
            InitEntityMap();
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings
            {
                RemoveComments = true,
                RemoveContentControls = true,
                RemoveEndAndFootNotes = true,
                RemoveFieldCodes = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions = true,
                RemoveProof = true,
                RemoveRsidInfo = true,
                RemoveSmartTags = true,
                RemoveSoftHyphens = true,
                RemoveGoBackBookmark = true,
                ReplaceTabsWithSpaces = false,
            };
            MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings);

            FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings
            {
                RemoveStyleNamesFromParagraphAndRunProperties = false,
                ClearStyles = false,
                RestrictToSupportedLanguages = htmlConverterSettings.RestrictToSupportedLanguages,
                RestrictToSupportedNumberingFormats = htmlConverterSettings.RestrictToSupportedNumberingFormats,
                CreateHtmlConverterAnnotationAttributes = true,
                OrderElementsPerStandard = false,
                ListItemRetrieverSettings = new ListItemRetrieverSettings()
                {
                    ListItemTextImplementations = htmlConverterSettings.ListItemImplementations,
                },
            };

            FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings);

            InsertAppropriateNonbreakingSpaces(wordDoc);
            CalculateSpanWidthForTabs(wordDoc);
            ReverseTableBordersForRtlTables(wordDoc);
            AdjustTableBorders(wordDoc);
            XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root;
            FieldRetriever.AnnotateWithFieldInfo(wordDoc.MainDocumentPart);
            AnnotateForSections(wordDoc);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                rootElement, false, 0m);

            ReifyStylesAndClasses(htmlConverterSettings, xhtml);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            return xhtml;
        }
        private static void AnnotateWithGlobalDefaults(WordprocessingDocument wDoc, XElement rootElement, FormattingAssemblerSettings settings)
        {
            XElement globalDefaultParaProps = null;
            XElement globalDefaultParaPropsAsDefined = null;
            XElement globalDefaultRunProps = null;
            XElement globalDefaultRunPropsAsDefined = null;
            XDocument sXDoc = wDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
            var defaultParaStyleName = (string)sXDoc
                .Root
                .Elements(W.style)
                .Where(st => (string)st.Attribute(W.type) == "paragraph" && st.Attribute(W._default).ToBoolean() == true)
                .Attributes(W.styleId)
                .FirstOrDefault();
            var defaultCharStyleName = (string)sXDoc
                .Root
                .Elements(W.style)
                .Where(st => (string)st.Attribute(W.type) == "character" && st.Attribute(W._default).ToBoolean() == true)
                .Attributes(W.styleId)
                .FirstOrDefault();
            XElement docDefaults = sXDoc.Root.Element(W.docDefaults);
            if (docDefaults != null)
            {
                globalDefaultParaPropsAsDefined = docDefaults.Elements(W.pPrDefault).Elements(W.pPr)
                    .FirstOrDefault();
                if (globalDefaultParaPropsAsDefined == null)
                    globalDefaultParaPropsAsDefined = new XElement(W.pPr,
                        new XElement(W.rPr));
                globalDefaultRunPropsAsDefined = docDefaults.Elements(W.rPrDefault).Elements(W.rPr)
                    .FirstOrDefault();
                if (globalDefaultRunPropsAsDefined == null)
                    globalDefaultRunPropsAsDefined = new XElement(W.rPr);
                if (globalDefaultRunPropsAsDefined.Element(W.rFonts) == null)
                    globalDefaultRunPropsAsDefined.Add(
                        new XElement(W.rFonts,
                            new XAttribute(W.ascii, "Times New Roman"),
                            new XAttribute(W.hAnsi, "Times New Roman"),
                            new XAttribute(W.cs, "Times New Roman")));
                if (globalDefaultRunPropsAsDefined.Element(W.sz) == null)
                    globalDefaultRunPropsAsDefined.Add(
                        new XElement(W.sz,
                            new XAttribute(W.val, "20")));
                if (globalDefaultRunPropsAsDefined.Element(W.szCs) == null)
                    globalDefaultRunPropsAsDefined.Add(
                        new XElement(W.szCs,
                            new XAttribute(W.val, "20")));

                var runPropsForGlobalDefaultParaProps = MergeStyleElement(globalDefaultRunPropsAsDefined, globalDefaultParaPropsAsDefined.Element(W.rPr));
                globalDefaultParaProps = new XElement(globalDefaultParaPropsAsDefined.Name,
                    globalDefaultParaPropsAsDefined.Attributes(),
                    globalDefaultParaPropsAsDefined.Elements().Where(e => e.Name != W.rPr),
                    runPropsForGlobalDefaultParaProps);
                globalDefaultRunProps = MergeStyleElement(globalDefaultParaPropsAsDefined.Element(W.rPr), globalDefaultRunPropsAsDefined);
            }
            var rPr = new XElement(W.rPr,
                        new XElement(W.rFonts,
                                new XAttribute(W.ascii, "Times New Roman"),
                                new XAttribute(W.hAnsi, "Times New Roman"),
                                new XAttribute(W.cs, "Times New Roman")),
                            new XElement(W.sz,
                                new XAttribute(W.val, "20")),
                            new XElement(W.szCs,
                                new XAttribute(W.val, "20")));

            if (globalDefaultParaProps == null)
                globalDefaultParaProps = new XElement(W.pPr, rPr);

            if (globalDefaultRunProps == null)
                globalDefaultRunProps = rPr;

            XElement ptGlobalDefaultParaProps = new XElement(globalDefaultParaProps);
            XElement ptGlobalDefaultRunProps = new XElement(globalDefaultRunProps);
            ptGlobalDefaultParaProps.Name = PtOpenXml.pPr;
            ptGlobalDefaultRunProps.Name = PtOpenXml.rPr;
            var parasAndRuns = rootElement.Descendants().Where(d =>
            {
                return d.Name == W.p || d.Name == W.r;
            });
            if (settings.CreateHtmlConverterAnnotationAttributes)
            {
                foreach (var d in parasAndRuns)
                {
                    if (d.Name == W.p)
                    {
                        var pStyle = (string)d.Elements(W.pPr).Elements(W.pStyle).Attributes(W.val).FirstOrDefault();
                        if (pStyle == null)
                            pStyle = defaultParaStyleName;
                        if (pStyle != null)
                            d.Add(new XAttribute(PtOpenXml.StyleName, pStyle));
                        d.Add(ptGlobalDefaultParaProps);
                    }
                    else
                    {
                        var rStyle = (string)d.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault();
                        if (rStyle == null)
                            rStyle = defaultCharStyleName;
                        if (rStyle != null)
                            d.Add(new XAttribute(PtOpenXml.StyleName, rStyle));
                        d.Add(ptGlobalDefaultRunProps);
                    }
                }
            }
            else
            {
                foreach (var d in parasAndRuns)
                {
                    if (d.Name == W.p)
                    {
                        d.Add(ptGlobalDefaultParaProps);
                    }
                    else
                    {
                        d.Add(ptGlobalDefaultRunProps);
                    }
                }
            }
        }
        public static void NormalizePropsForPart(XDocument pxd, FormattingAssemblerSettings settings)
        {
            if (settings.CreateHtmlConverterAnnotationAttributes)
            {
                pxd.Root.Descendants().Attributes().Where(d => d.Name.Namespace == PtOpenXml.pt &&
                    !PtNamesToKeep.Contains(d.Name)).Remove();
                if (pxd.Root.Attribute(XNamespace.Xmlns + "pt14") == null)
                    pxd.Root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName));
                if (pxd.Root.Attribute(XNamespace.Xmlns + "mc") == null)
                    pxd.Root.Add(new XAttribute(XNamespace.Xmlns + "mc", MC.mc.NamespaceName));
                XAttribute mci = pxd.Root.Attribute(MC.Ignorable);
                if (mci != null)
                {
                    var ig = pxd.Root.Attribute(MC.Ignorable).Value + " pt14";
                    mci.Value = ig;
                }
                else
                {
                    pxd.Root.Add(new XAttribute(MC.Ignorable, "pt14"));
                }
            }
            else
            {
                pxd.Root.Descendants().Attributes().Where(d => d.Name.Namespace == PtOpenXml.pt).Remove();
            }
            var runProps = pxd.Root.Descendants(PtOpenXml.rPr).ToList();
            foreach (var item in runProps)
            {
                XElement newRunProps = new XElement(W.rPr,
                    item.Attributes(),
                    item.Elements());
                XElement parent = item.Parent;
                if (parent.Name == W.p)
                {
                    XElement existingParaProps = parent.Element(W.pPr);
                    if (existingParaProps == null)
                    {
                        existingParaProps = new XElement(W.pPr);
                        parent.Add(existingParaProps);
                    }
                    XElement existingRunProps = existingParaProps.Element(W.rPr);
                    if (existingRunProps != null)
                    {
                        if (!settings.RemoveStyleNamesFromParagraphAndRunProperties)
                        {
                            if (newRunProps.Element(W.rStyle) == null)
                                newRunProps.Add(existingRunProps.Element(W.rStyle));
                        }
                        existingRunProps.ReplaceWith(newRunProps);
                    }
                    else
                        existingParaProps.Add(newRunProps);
                }
                else
                {
                    XElement existingRunProps = parent.Element(W.rPr);
                    if (existingRunProps != null)
                    {
                        if (!settings.RemoveStyleNamesFromParagraphAndRunProperties)
                        {
                            if (newRunProps.Element(W.rStyle) == null)
                                newRunProps.Add(existingRunProps.Element(W.rStyle));
                        }
                        existingRunProps.ReplaceWith(newRunProps);
                    }
                    else
                        parent.Add(newRunProps);
                }
            }
            var paraProps = pxd.Root.Descendants(PtOpenXml.pPr).ToList();
            foreach (var item in paraProps)
            {
                var paraRunProps = item.Parent.Elements(W.pPr).Elements(W.rPr).FirstOrDefault();
                var merged = MergeStyleElement(item.Element(W.rPr), paraRunProps);
                if (!settings.RemoveStyleNamesFromParagraphAndRunProperties)
                {
                    if (merged.Element(W.rStyle) == null)
                    {
                        merged.Add(paraRunProps.Element(W.rStyle));
                    }
                }

                XElement newParaProps = new XElement(W.pPr,
                    item.Attributes(),
                    item.Elements().Where(e => e.Name != W.rPr),
                    merged);
                XElement para = item.Parent;
                XElement existingParaProps = para.Element(W.pPr);
                if (existingParaProps != null)
                {
                    if (!settings.RemoveStyleNamesFromParagraphAndRunProperties)
                    {
                        if (newParaProps.Element(W.pStyle) == null)
                            newParaProps.Add(existingParaProps.Element(W.pStyle));
                    }
                    existingParaProps.ReplaceWith(newParaProps);
                }
                else
                    para.Add(newParaProps);
            }
            var tblProps = pxd.Root.Descendants(PtOpenXml.tblPr).ToList();
            foreach (var item in tblProps)
            {
                XElement newTblProps = new XElement(item);
                newTblProps.Name = W.tblPr;
                XElement table = item.Parent;
                XElement existingTableProps = table.Element(W.tblPr);
                if (existingTableProps != null)
                    existingTableProps.ReplaceWith(newTblProps);
                else
                    table.AddFirst(newTblProps);
            }
            var trProps = pxd.Root.Descendants(PtOpenXml.trPr).ToList();
            foreach (var item in trProps)
            {
                XElement newTrProps = new XElement(item);
                newTrProps.Name = W.trPr;
                XElement row = item.Parent;
                XElement existingRowProps = row.Element(W.trPr);
                if (existingRowProps != null)
                    existingRowProps.ReplaceWith(newTrProps);
                else
                    row.AddFirst(newTrProps);
            }
            var tcProps = pxd.Root.Descendants(PtOpenXml.tcPr).ToList();
            foreach (var item in tcProps)
            {
                XElement newTcProps = new XElement(item);
                newTcProps.Name = W.tcPr;
                XElement row = item.Parent;
                XElement existingRowProps = row.Element(W.tcPr);
                if (existingRowProps != null)
                    existingRowProps.ReplaceWith(newTcProps);
                else
                    row.AddFirst(newTcProps);
            }
            pxd.Root.Descendants(W.numPr).Remove();
            if (settings.RemoveStyleNamesFromParagraphAndRunProperties)
            {
                pxd.Root.Descendants(W.pStyle).Where(ps => ps.Parent.Name == W.pPr).Remove();
                pxd.Root.Descendants(W.rStyle).Where(ps => ps.Parent.Name == W.rPr).Remove();
            }
            pxd.Root.Descendants(W.tblStyle).Where(ps => ps.Parent.Name == W.tblPr).Remove();
            pxd.Root.Descendants().Where(d => d.Name.Namespace == PtOpenXml.pt).Remove();
            if (settings.OrderElementsPerStandard)
            {
                XElement newRoot = (XElement)TransformAndOrderElements(pxd.Root);
                pxd.Root.ReplaceWith(newRoot);
            }
        }
        private static void AdjustFontAttributes(WordprocessingDocument wDoc, XElement paraOrRun, XElement pPr,
            XElement rPr, FormattingAssemblerSettings settings)
        {
            XDocument themeXDoc = null;
            if (wDoc.MainDocumentPart.ThemePart != null)
                themeXDoc = wDoc.MainDocumentPart.ThemePart.GetXDocument();

            XElement fontScheme = null;
            XElement majorFont = null;
            XElement minorFont = null;
            if (themeXDoc != null)
            {
                fontScheme = themeXDoc.Root.Element(A.themeElements).Element(A.fontScheme);
                majorFont = fontScheme.Element(A.majorFont);
                minorFont = fontScheme.Element(A.minorFont);
            }
            var rFonts = rPr.Element(W.rFonts);
            if (rFonts == null)
            {
                return;
            }
            var asciiTheme = (string)rFonts.Attribute(W.asciiTheme);
            var hAnsiTheme = (string)rFonts.Attribute(W.hAnsiTheme);
            var eastAsiaTheme = (string)rFonts.Attribute(W.eastAsiaTheme);
            var cstheme = (string)rFonts.Attribute(W.cstheme);
            string ascii = null;
            string hAnsi = null;
            string eastAsia = null;
            string cs = null;

            XElement minorLatin = null;
            string minorLatinTypeface = null;
            XElement majorLatin = null;
            string majorLatinTypeface = null;

            if (minorFont != null)
            {
                minorLatin = minorFont.Element(A.latin);
                minorLatinTypeface = (string)minorLatin.Attribute("typeface");
            }

            if (majorFont != null)
            {
                majorLatin = majorFont.Element(A.latin);
                majorLatinTypeface = (string)majorLatin.Attribute("typeface");
            }
            if (asciiTheme != null)
            {
                if (asciiTheme.StartsWith("minor") && minorLatinTypeface != null)
                {
                    ascii = minorLatinTypeface;
                }
                else if (asciiTheme.StartsWith("major") && majorLatinTypeface != null)
                {
                    ascii = majorLatinTypeface;
                }
            }
            if (hAnsiTheme != null)
            {
                if (hAnsiTheme.StartsWith("minor") && minorLatinTypeface != null)
                {
                    hAnsi = minorLatinTypeface;
                }
                else if (hAnsiTheme.StartsWith("major") && majorLatinTypeface != null)
                {
                    hAnsi = majorLatinTypeface;
                }
            }
            if (eastAsiaTheme != null)
            {
                if (eastAsiaTheme.StartsWith("minor") && minorLatinTypeface != null)
                {
                    eastAsia = minorLatinTypeface;
                }
                else if (eastAsiaTheme.StartsWith("major") && majorLatinTypeface != null)
                {
                    eastAsia = majorLatinTypeface;
                }
            }
            if (cstheme != null)
            {
                if (cstheme.StartsWith("minor") && minorFont != null)
                {
                    cs = (string)minorFont.Element(A.cs).Attribute("typeface");
                }
                else if (cstheme.StartsWith("major") && majorFont != null)
                {
                    cs = (string)majorFont.Element(A.cs).Attribute("typeface");
                }
            }

            if (ascii != null)
            {
                rFonts.SetAttributeValue(W.ascii, ascii);
            }
            if (hAnsi != null)
            {
                rFonts.SetAttributeValue(W.hAnsi, hAnsi);
            }
            if (eastAsia != null)
            {
                rFonts.SetAttributeValue(W.eastAsia, eastAsia);
            }
            if (cs != null)
            {
                rFonts.SetAttributeValue(W.cs, cs);
            }

            var firstTextNode = paraOrRun.Descendants(W.t).FirstOrDefault(t => t.Value.Length > 0);
            string str = " ";

            // if there is a run with no text in it, then no need to do any of the rest of this method.
            if (firstTextNode == null && paraOrRun.Name == W.r)
                return;

            if (firstTextNode != null)
                str = firstTextNode.Value;

            var csa = new CharStyleAttributes(pPr, rPr);

            // This module determines the font based on just the first character.
            // Technically, a run can contain characters from different Unicode code blocks, and hence should be rendered with different fonts.
            // However, Word breaks up runs that use more than one font into multiple runs.  Other producers of WordprocessingML may not, so in
            // that case, this routine may need to be augmented to look at all characters in a run.

            /*
            old code
            var fontFamilies = str.select(function (c) {
                var ft = Pav.DetermineFontTypeFromCharacter(c, csa);
                switch (ft) {
                    case Pav.FontType.Ascii:
                        return cast(rFonts.attribute(W.ascii));
                    case Pav.FontType.HAnsi:
                        return cast(rFonts.attribute(W.hAnsi));
                    case Pav.FontType.EastAsia:
                        return cast(rFonts.attribute(W.eastAsia));
                    case Pav.FontType.CS:
                        return cast(rFonts.attribute(W.cs));
                    default:
                        return null;
                }
            })
                .where(function (f) { return f != null && f != ""; })
                .distinct()
                .select(function (f) { return new Pav.FontFamily(f); })
                .toArray();
            */

            var charToExamine = str.FirstOrDefault(c => ! WeakAndNeutralDirectionalCharacters.Contains(c));
            if (charToExamine == '\0')
                charToExamine = str[0];

            var ft = DetermineFontTypeFromCharacter(charToExamine, csa);
            string fontType = null;
            string languageType = null;
            switch (ft)
            {
                case FontType.Ascii:
                    fontType = (string)rFonts.Attribute(W.ascii);
                    languageType = "western";
                    break;
                case FontType.HAnsi:
                    fontType = (string)rFonts.Attribute(W.hAnsi);
                    languageType = "western";
                    break;
                case FontType.EastAsia:
                    if (settings.RestrictToSupportedLanguages)
                        throw new UnsupportedLanguageException("EastAsia languages are not supported");
                    fontType = (string)rFonts.Attribute(W.eastAsia);
                    languageType = "eastAsia";
                    break;
                case FontType.CS:
                    if (settings.RestrictToSupportedLanguages)
                        throw new UnsupportedLanguageException("Complex script (RTL) languages are not supported");
                    fontType = (string)rFonts.Attribute(W.cs);
                    languageType = "bidi";
                    break;
            }

            if (fontType != null)
            {
                if (paraOrRun.Attribute(PtOpenXml.FontName) == null)
                {
                    XAttribute fta = new XAttribute(PtOpenXml.FontName, fontType.ToString());
                    paraOrRun.Add(fta);
                }
                else
                {
                    paraOrRun.Attribute(PtOpenXml.FontName).Value = fontType.ToString();
                }
            }
            if (languageType != null)
            {
                if (paraOrRun.Attribute(PtOpenXml.LanguageType) == null)
                {
                    XAttribute lta = new XAttribute(PtOpenXml.LanguageType, languageType);
                    paraOrRun.Add(lta);
                }
                else
                {
                    paraOrRun.Attribute(PtOpenXml.LanguageType).Value = languageType;
                }
            }
        }
        private static object NormalizeListItemsTransform(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XNode node, FormattingAssemblerSettings settings)
        {
            var element = node as XElement;
            if (element != null)
            {
                if (element.Name == W.p)
                {
                    var li = ListItemRetriever.RetrieveListItem(wDoc, element, settings.ListItemRetrieverSettings);
                    if (li != null)
                    {
                        ListItemRetriever.ListItemInfo listItemInfo = element.Annotation<ListItemRetriever.ListItemInfo>();

                        var newParaProps = new XElement(W.pPr,
                            element.Elements(W.pPr).Elements().Where(e => e.Name != W.numPr)
                        );

                        XElement listItemRunProps = null;
                        int? abstractNumId = null;
                        if (listItemInfo != null)
                        {
                            abstractNumId = listItemInfo.AbstractNumId;

                            var paraStyleRunProps = CharStyleRollup(fai, wDoc, element);

                            var paragraphStyleName = (string)element
                                .Elements(W.pPr)
                                .Elements(W.pStyle)
                                .Attributes(W.val)
                                .FirstOrDefault();

                            string defaultStyleName = (string)wDoc
                                    .MainDocumentPart
                                    .StyleDefinitionsPart
                                    .GetXDocument()
                                    .Root
                                    .Elements(W.style)
                                    .Where(s => (string)s.Attribute(W.type) == "paragraph" && s.Attribute(W._default).ToBoolean() == true)
                                    .Attributes(W.styleId)
                                    .FirstOrDefault();

                            if (paragraphStyleName == null)
                                paragraphStyleName = defaultStyleName;

                            XDocument stylesXDoc = wDoc
                                .MainDocumentPart
                                .StyleDefinitionsPart
                                .GetXDocument();

                            // put together run props for list item.

                            XElement lvlStyleRpr = ParaStyleRunPropsStack(wDoc, paragraphStyleName)
                                .Aggregate(new XElement(W.rPr),
                                    (r, s) =>
                                    {
                                        var newCharStyleRunProps = MergeStyleElement(s, r);
                                        return newCharStyleRunProps;
                                    });

                            var mergedRunProps = MergeStyleElement(lvlStyleRpr, paraStyleRunProps);

                            var accumulatedRunProps = element.Elements(PtOpenXml.pPr).Elements(W.rPr).FirstOrDefault();
                            if (accumulatedRunProps != null)
                                mergedRunProps = MergeStyleElement(accumulatedRunProps, mergedRunProps);

                            var listItemLvl = listItemInfo.Lvl(ListItemRetriever.GetParagraphLevel(element));
                            var listItemLvlRunProps = listItemLvl.Elements(W.rPr).FirstOrDefault();
                            listItemRunProps = MergeStyleElement(listItemLvlRunProps, mergedRunProps);

                            if ((string)listItemLvl.Elements(W.numFmt).Attributes(W.val).FirstOrDefault() == "bullet")
                            {
                                listItemRunProps.Elements(W.rtl).Remove();
                            }
                            else
                            {
                                var pPr = element.Element(PtOpenXml.pPr);
                                if (pPr != null)
                                {
                                    XElement bidiel = pPr.Element(W.bidi);
                                    bool bidi = bidiel != null && (bidiel.Attribute(W.val) == null || bidiel.Attribute(W.val).ToBoolean() == true);
                                    if (bidi)
                                    {
                                        listItemRunProps = MergeStyleElement(new XElement(W.rPr,
                                            new XElement(W.rtl)), listItemRunProps);
                                    }
                                }
                            }
                        }

                        var listItemRun = new XElement(W.r,
                            element.Attribute(PtOpenXml.FontName),
                            element.Attribute(PtOpenXml.LanguageType),
                            listItemRunProps,
                            new XElement(W.t,
                                new XAttribute(XNamespace.Xml + "space", "preserve"),
                                li));

                        AdjustFontAttributes(wDoc, listItemRun, null, listItemRunProps, settings);

                        var lvl = listItemInfo.Lvl(ListItemRetriever.GetParagraphLevel(element));
                        XElement suffix = new XElement(W.tab);
                        var su = (string)lvl.Elements(W.suff).Attributes(W.val).FirstOrDefault();
                        if (su == "space")
                            suffix = new XElement(W.t,
                                new XAttribute(XNamespace.Xml + "space", "preserve"),
                                " ");
                        else if (su == "nothing")
                            suffix = null;

                        var jc = (string)lvl.Elements(W.lvlJc).Attributes(W.val).FirstOrDefault();
                        if (jc == "right")
                        {
                            var accumulatedParaProps = element.Element(PtOpenXml.pPr);

                            var hangingAtt = accumulatedParaProps.Elements(W.ind).Attributes(W.hanging).FirstOrDefault();
                            if (hangingAtt == null)
                            {
                                var listItemRunLength = WordprocessingMLUtil.CalcWidthOfRunInTwips(listItemRun);
                                var ind = accumulatedParaProps.Element(W.ind);
                                if (ind == null)
                                {
                                    ind = new XElement(W.ind);
                                    accumulatedParaProps.Add(ind);
                                }
                                ind.Add(new XAttribute(W.hanging, listItemRunLength.ToString()));
                            }
                            else
                            {
                                var hanging = (int)hangingAtt;
                                var listItemRunLength = WordprocessingMLUtil.CalcWidthOfRunInTwips(listItemRun);
                                hanging += listItemRunLength; // should be width of list item, in twips
                                hangingAtt.Value = hanging.ToString();
                            }
                        }
                        else if (jc == "center")
                        {
                            var accumulatedParaProps = element.Element(PtOpenXml.pPr);

                            var hangingAtt = accumulatedParaProps.Elements(W.ind).Attributes(W.hanging).FirstOrDefault();
                            if (hangingAtt == null)
                            {
                                var listItemRunLength = WordprocessingMLUtil.CalcWidthOfRunInTwips(listItemRun);
                                var ind = accumulatedParaProps.Element(W.ind);
                                if (ind == null)
                                {
                                    ind = new XElement(W.ind);
                                    accumulatedParaProps.Add(ind);
                                }
                                ind.Add(new XAttribute(W.hanging, (listItemRunLength / 2).ToString()));
                            }
                            else
                            {
                                var hanging = (int)hangingAtt;
                                var listItemRunLength = WordprocessingMLUtil.CalcWidthOfRunInTwips(listItemRun);
                                hanging += (listItemRunLength / 2); // should be half of width of list item, in twips
                                hangingAtt.Value = hanging.ToString();
                            }
                        }
                        AddTabAtLeftIndent(element.Element(PtOpenXml.pPr));

                        XElement newPara = new XElement(W.p,
                            element.Attribute(PtOpenXml.FontName),
                            element.Attribute(PtOpenXml.LanguageType),
                            new XAttribute(PtOpenXml.AbstractNumId, abstractNumId),
                            newParaProps,
                            listItemRun,
                            suffix != null ?
                                new XElement(W.r,
                                    listItemRunProps,
                                    suffix) : null,
                            element.Elements().Where(e => e.Name != W.pPr).Select(n => NormalizeListItemsTransform(fai, wDoc, n, settings)));
                        return newPara;

                    }
                }

                return new XElement(element.Name,
                    element.Attributes(),
                    element.Nodes().Select(n => NormalizeListItemsTransform(fai, wDoc, n, settings)));
            }
            return node;
        }
        private static void AnnotateRunProperties(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XElement runOrPara, FormattingAssemblerSettings settings)
        {
            XElement localRunProps = null;
            if (runOrPara.Name == W.p)
            {
                var rPr = runOrPara.Elements(W.pPr).Elements(W.rPr).FirstOrDefault();
                if (rPr != null)
                {
                    localRunProps = rPr;
                }
            }
            else
            {
                localRunProps = runOrPara.Element(W.rPr);
            }
            if (localRunProps == null)
            {
                localRunProps = new XElement(W.rPr);
            }

            // get run table props, to be merged.
            XElement tablerPr = null;
            var blockLevelContentContainer = runOrPara
                .Ancestors()
                .FirstOrDefault(a => a.Name == W.body ||
                    a.Name == W.tbl ||
                    a.Name == W.txbxContent ||
                    a.Name == W.ftr ||
                    a.Name == W.hdr ||
                    a.Name == W.footnote ||
                    a.Name == W.endnote);
            if (blockLevelContentContainer.Name == W.tbl)
            {
                XElement tbl = blockLevelContentContainer;
                XElement style = tbl.Element(PtOpenXml.pt + "style");
                XElement cellCnf = runOrPara.Ancestors(W.tc).Take(1).Elements(W.tcPr).Elements(W.cnfStyle).FirstOrDefault();
                XElement rowCnf = runOrPara.Ancestors(W.tr).Take(1).Elements(W.trPr).Elements(W.cnfStyle).FirstOrDefault();

                if (style != null)
                {
                    tablerPr = style.Element(W.rPr);
                    if (tablerPr == null)
                        tablerPr = new XElement(W.rPr);

                    foreach (var ot in TableStyleOverrideTypes)
                    {
                        XName attName = TableStyleOverrideXNameMap[ot];
                        if ((cellCnf != null && cellCnf.Attribute(attName).ToBoolean() == true) ||
                            (rowCnf != null && rowCnf.Attribute(attName).ToBoolean() == true))
                        {
                            XElement o = style
                                .Elements(W.tblStylePr)
                                .Where(tsp => (string)tsp.Attribute(W.type) == ot)
                                .FirstOrDefault();
                            if (o != null)
                            {
                                XElement otrPr = o.Element(W.rPr);
                                tablerPr = MergeStyleElement(otrPr, tablerPr);
                            }
                        }
                    }
                }
            }
            XElement rolledRunProps = CharStyleRollup(fai, wDoc, runOrPara);
            var toggledRunProps = ToggleMergeRunProps(rolledRunProps, tablerPr);
            var currentRunProps = runOrPara.Element(PtOpenXml.rPr); // this is already stored on the run from previous aggregation of props
            var mergedRunProps = MergeStyleElement(toggledRunProps, currentRunProps);
            var newMergedRunProps = MergeStyleElement(localRunProps, mergedRunProps);
            XElement pPr = null;
            if (runOrPara.Name == W.p)
                pPr = runOrPara.Element(PtOpenXml.pPr);
            AdjustFontAttributes(wDoc, runOrPara, pPr, newMergedRunProps, settings);

            newMergedRunProps.Name = PtOpenXml.rPr;
            if (currentRunProps != null)
            {
                currentRunProps.ReplaceWith(newMergedRunProps);
            }
            else
            {
                runOrPara.Add(newMergedRunProps);
            }
        }
 private static void AnnotateRuns(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XElement root, FormattingAssemblerSettings settings)
 {
     var runsOrParas = root.Descendants()
         .Where(rp =>
         {
             return rp.Name == W.r || rp.Name == W.p;
         });
     foreach (var runOrPara in runsOrParas)
     {
         AnnotateRunProperties(fai, wDoc, runOrPara, settings);
     }
 }
 private static void NormalizeListItems(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, FormattingAssemblerSettings settings)
 {
     foreach (var part in wDoc.ContentParts())
     {
         var pxd = part.GetXDocument();
         XElement newRoot = (XElement)NormalizeListItemsTransform(fai, wDoc, pxd.Root, settings);
         if (newRoot.Attribute(XNamespace.Xmlns + "pt14") == null)
             newRoot.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName));
         if (newRoot.Attribute(XNamespace.Xmlns + "mc") == null)
             newRoot.Add(new XAttribute(XNamespace.Xmlns + "mc", MC.mc.NamespaceName));
         pxd.Root.ReplaceWith(newRoot);
     }
 }
        private static void AnnotateParagraph(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XElement para, FormattingAssemblerSettings settings)
        {
            XElement localParaProps = para.Element(W.pPr);
            if (localParaProps == null)
            {
                localParaProps = new XElement(W.pPr);
            }

            // get para table props, to be merged.
            XElement tablepPr = null;

            var blockLevelContentContainer = para
                .Ancestors()
                .FirstOrDefault(a => a.Name == W.body ||
                    a.Name == W.tbl ||
                    a.Name == W.txbxContent ||
                    a.Name == W.ftr ||
                    a.Name == W.hdr ||
                    a.Name == W.footnote ||
                    a.Name == W.endnote);
            if (blockLevelContentContainer.Name == W.tbl)
            {
                XElement tbl = blockLevelContentContainer;
                XElement style = tbl.Element(PtOpenXml.pt + "style");
                XElement cellCnf = para.Ancestors(W.tc).Take(1).Elements(W.tcPr).Elements(W.cnfStyle).FirstOrDefault();
                XElement rowCnf = para.Ancestors(W.tr).Take(1).Elements(W.trPr).Elements(W.cnfStyle).FirstOrDefault();

                if (style != null)
                {
                    // roll up tblPr, trPr, and tcPr from within a specific style.
                    // add each of these to the table, in PowerTools namespace.
                    tablepPr = style.Element(W.pPr);
                    if (tablepPr == null)
                        tablepPr = new XElement(W.pPr);

                    foreach (var ot in TableStyleOverrideTypes)
                    {
                        XName attName = TableStyleOverrideXNameMap[ot];
                        if ((cellCnf != null && cellCnf.Attribute(attName).ToBoolean() == true) ||
                            (rowCnf != null && rowCnf.Attribute(attName).ToBoolean() == true))
                        {
                            XElement o = style
                                .Elements(W.tblStylePr)
                                .Where(tsp => (string)tsp.Attribute(W.type) == ot)
                                .FirstOrDefault();
                            if (o != null)
                            {
                                XElement otpPr = o.Element(W.pPr);
                                tablepPr = MergeStyleElement(otpPr, tablepPr);
                            }
                        }
                    }
                }
            }
            var stylesPart = wDoc.MainDocumentPart.StyleDefinitionsPart;
            XDocument sXDoc = null;
            if (stylesPart != null)
                sXDoc = stylesPart.GetXDocument();

            ListItemRetriever.ListItemInfo lif = para.Annotation<ListItemRetriever.ListItemInfo>();

            XElement rolledParaProps = ParagraphStyleRollup(para, sXDoc, fai.DefaultParagraphStyleName);
            if (lif != null && lif.IsZeroNumId)
                rolledParaProps.Elements(W.ind).Remove();
            XElement toggledParaProps = MergeStyleElement(rolledParaProps, tablepPr);
            XElement mergedParaProps = MergeStyleElement(localParaProps, toggledParaProps);

            string li = ListItemRetriever.RetrieveListItem(wDoc, para, settings.ListItemRetrieverSettings);
            if (lif != null && lif.IsListItem)
            {
                if (settings.RestrictToSupportedNumberingFormats)
                {
                    string numFmtForLevel = (string)lif.Lvl(ListItemRetriever.GetParagraphLevel(para)).Elements(W.numFmt).Attributes(W.val).FirstOrDefault();
                    if (numFmtForLevel == null)
                    {
                        var numFmtElement = lif.Lvl(ListItemRetriever.GetParagraphLevel(para)).Elements(MC.AlternateContent).Elements(MC.Choice).Elements(W.numFmt).FirstOrDefault();
                        if (numFmtElement != null && (string)numFmtElement.Attribute(W.val) == "custom")
                            numFmtForLevel = (string)numFmtElement.Attribute(W.format);
                    }
                    bool isLgl = lif.Lvl(ListItemRetriever.GetParagraphLevel(para)).Elements(W.isLgl).Any();
                    if (isLgl && numFmtForLevel != "decimalZero")
                        numFmtForLevel = "decimal";
                    if (!AcceptableNumFormats.Contains(numFmtForLevel))
                        throw new UnsupportedNumberingFormatException(numFmtForLevel + " is not a supported numbering format");
                }

                int paragraphLevel = ListItemRetriever.GetParagraphLevel(para);
                var numberingParaProps = lif
                    .Lvl(paragraphLevel)
                    .Elements(W.pPr)
                    .FirstOrDefault();
                if (numberingParaProps == null)
                {
                    numberingParaProps = new XElement(W.pPr);
                }
                else
                {
                    numberingParaProps
                        .Elements()
                        .Where(e => e.Name != W.ind)
                        .Remove();
                }

                // have:
                // - localParaProps
                // - toggledParaProps
                // - numberingParaProps

                // if a paragraph contains a numPr with a numId=0, in other words, it is NOT a numbered item, then the indentation from the style
                // hierarchy is ignored.

                ListItemRetriever.ListItemInfo lii = para.Annotation<ListItemRetriever.ListItemInfo>();
                if (lii.FromParagraph != null)
                {
                    // order
                    // - toggledParaProps
                    // - numberingParaProps
                    // - localParaProps

                    mergedParaProps = MergeStyleElement(numberingParaProps, toggledParaProps);
                    mergedParaProps = MergeStyleElement(localParaProps, mergedParaProps);
                }
                else if (lii.FromStyle != null)
                {
                    // order
                    // - numberingParaProps
                    // - toggledParaProps
                    // - localParaProps
                    mergedParaProps = MergeStyleElement(toggledParaProps, numberingParaProps);
                    mergedParaProps = MergeStyleElement(localParaProps, mergedParaProps);
                }
            }
            else
            {
                mergedParaProps = MergeStyleElement(localParaProps, toggledParaProps);
            }

            // merge mergedParaProps with existing accumulatedParaProps, with mergedParaProps as high pri
            // replace accumulatedParaProps with newly merged

            XElement accumulatedParaProps = para.Element(PtOpenXml.pt + "pPr");
            XElement newAccumulatedParaProps = MergeStyleElement(mergedParaProps, accumulatedParaProps);

            AdjustFontAttributes(wDoc, para, newAccumulatedParaProps, newAccumulatedParaProps.Element(W.rPr), settings);
            newAccumulatedParaProps.Name = PtOpenXml.pt + "pPr";
            if (accumulatedParaProps != null)
            {
                accumulatedParaProps.ReplaceWith(newAccumulatedParaProps);
            }
            else
            {
                para.Add(newAccumulatedParaProps);
            }
        }
 private static void AnnotateParagraphs(FormattingAssemblerInfo fai, WordprocessingDocument wDoc, XElement root, FormattingAssemblerSettings settings)
 {
     foreach (var para in root.Descendants(W.p))
     {
         AnnotateParagraph(fai, wDoc, para, settings);
     }
 }
        private static void FontAndCharSetAnalysis(WordprocessingDocument wDoc, List<XElement> metrics, List<string> notes)
        {
            FormattingAssemblerSettings settings = new FormattingAssemblerSettings
            {
                RemoveStyleNamesFromParagraphAndRunProperties = false,
                ClearStyles = true,
                RestrictToSupportedNumberingFormats = false,
                RestrictToSupportedLanguages = false,
            };
            FormattingAssembler.AssembleFormatting(wDoc, settings);
            var formattingMetrics = new FormattingMetrics();

            foreach (var part in wDoc.ContentParts())
            {
                var xDoc = part.GetXDocument();
                foreach (var run in xDoc.Descendants(W.r))
                {
                    formattingMetrics.RunCount++;
                    AnalyzeRun(run, metrics, notes, formattingMetrics, part.Uri.ToString());
                }
            }

            metrics.Add(new XElement(H.RunCount, new XAttribute(H.Val, formattingMetrics.RunCount)));
            if (formattingMetrics.RunWithoutRprCount > 0)
                metrics.Add(new XElement(H.RunWithoutRprCount, new XAttribute(H.Val, formattingMetrics.RunWithoutRprCount)));
            if (formattingMetrics.ZeroLengthText > 0)
                metrics.Add(new XElement(H.ZeroLengthText, new XAttribute(H.Val, formattingMetrics.ZeroLengthText)));
            if (formattingMetrics.MultiFontRun > 0)
                metrics.Add(new XElement(H.MultiFontRun, new XAttribute(H.Val, formattingMetrics.MultiFontRun)));
            if (formattingMetrics.AsciiCharCount > 0)
                metrics.Add(new XElement(H.AsciiCharCount, new XAttribute(H.Val, formattingMetrics.AsciiCharCount)));
            if (formattingMetrics.CSCharCount > 0)
                metrics.Add(new XElement(H.CSCharCount, new XAttribute(H.Val, formattingMetrics.CSCharCount)));
            if (formattingMetrics.EastAsiaCharCount > 0)
                metrics.Add(new XElement(H.EastAsiaCharCount, new XAttribute(H.Val, formattingMetrics.EastAsiaCharCount)));
            if (formattingMetrics.HAnsiCharCount > 0)
                metrics.Add(new XElement(H.HAnsiCharCount, new XAttribute(H.Val, formattingMetrics.HAnsiCharCount)));
            if (formattingMetrics.AsciiRunCount > 0)
                metrics.Add(new XElement(H.AsciiRunCount, new XAttribute(H.Val, formattingMetrics.AsciiRunCount)));
            if (formattingMetrics.CSRunCount > 0)
                metrics.Add(new XElement(H.CSRunCount, new XAttribute(H.Val, formattingMetrics.CSRunCount)));
            if (formattingMetrics.EastAsiaRunCount > 0)
                metrics.Add(new XElement(H.EastAsiaRunCount, new XAttribute(H.Val, formattingMetrics.EastAsiaRunCount)));
            if (formattingMetrics.HAnsiRunCount > 0)
                metrics.Add(new XElement(H.HAnsiRunCount, new XAttribute(H.Val, formattingMetrics.HAnsiRunCount)));

            if (formattingMetrics.Languages.Any())
            {
                var uls = formattingMetrics.Languages.StringConcatenate(s => s + ",").TrimEnd(',');
                metrics.Add(new XElement(H.Languages, new XAttribute(H.Val, PtUtils.MakeValidXml(uls))));
            }
        }
        public static void CopyFormattingAssembledDocx(FileInfo source, FileInfo dest)
        {
            var ba = File.ReadAllBytes(source.FullName);
            using (MemoryStream ms = new MemoryStream())
            {
                ms.Write(ba, 0, ba.Length);
                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(ms, true))
                {

                    RevisionAccepter.AcceptRevisions(wordDoc);
                    SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings
                    {
                        RemoveComments = true,
                        RemoveContentControls = true,
                        RemoveEndAndFootNotes = true,
                        RemoveFieldCodes = false,
                        RemoveLastRenderedPageBreak = true,
                        RemovePermissions = true,
                        RemoveProof = true,
                        RemoveRsidInfo = true,
                        RemoveSmartTags = true,
                        RemoveSoftHyphens = true,
                        RemoveGoBackBookmark = true,
                        ReplaceTabsWithSpaces = false,
                    };
                    MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings);

                    FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings
                    {
                        RemoveStyleNamesFromParagraphAndRunProperties = false,
                        ClearStyles = false,
                        RestrictToSupportedLanguages = false,
                        RestrictToSupportedNumberingFormats = false,
                        CreateHtmlConverterAnnotationAttributes = true,
                        OrderElementsPerStandard = false,
                        ListItemRetrieverSettings =
                            new ListItemRetrieverSettings()
                            {
                                ListItemTextImplementations = ListItemRetrieverSettings.DefaultListItemTextImplementations,
                            },
                    };

                    FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings);
                }
                var newBa = ms.ToArray();
                File.WriteAllBytes(dest.FullName, newBa);
            }
        }