public static XElement GetDocxMetrics(WmlDocument wmlDoc, MetricsGetterSettings settings)
        {
            WmlDocument converted = new WmlDocument(wmlDoc, true);
            WmlDocument noTrackedRevisions = new WmlDocument(converted);

            try
            {
                using (OpenXmlMemoryStreamDocument noTrackedStreamDoc = new OpenXmlMemoryStreamDocument(noTrackedRevisions))
                using (WordprocessingDocument noTrackedDocument = noTrackedStreamDoc.GetWordprocessingDocument())
                using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(converted))
                using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
                {
                    if (RevisionAccepter.HasTrackedRevisions(noTrackedDocument))
                        RevisionAccepter.AcceptRevisions(noTrackedDocument);
                    return GetWmlMetrics(converted.FileName, false, document, noTrackedDocument, settings);
                }
            }
            catch (OpenXmlPowerToolsException e)
            {
                if (e.ToString().Contains("Invalid Hyperlink"))
                {
                    using (MemoryStream ms = new MemoryStream())
                    {
                        WmlDocument fixedWmlDoc = new WmlDocument(converted);
                        ms.Write(converted.DocumentByteArray, 0, converted.DocumentByteArray.Length);
#if !NET35
                        UriFixer.FixInvalidUri(ms, brokenUri => FixUri(brokenUri));
#endif
                        converted = new WmlDocument("dummy.docx", ms.ToArray());
                    }
                    noTrackedRevisions = new WmlDocument(converted);
                    using (OpenXmlMemoryStreamDocument noTrackedStreamDoc = new OpenXmlMemoryStreamDocument(noTrackedRevisions))
                    using (WordprocessingDocument noTrackedDocument = noTrackedStreamDoc.GetWordprocessingDocument())
                    using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(converted))
                    using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
                    {
                        if (RevisionAccepter.HasTrackedRevisions(noTrackedDocument))
                            RevisionAccepter.AcceptRevisions(noTrackedDocument);
                        return GetWmlMetrics(converted.FileName, true, document, noTrackedDocument, settings);
                    }
                }
            }
            var metrics = new XElement(H.Metrics,
                new XAttribute(H.FileName, converted.FileName),
                new XAttribute(H.FileType, "WordprocessingML"),
                new XAttribute(H.Error, "Unknown error, metrics not determined"));
            return metrics;
        }
        static void Main(string[] args)
        {
            MetricsGetterSettings settings = null;
            FileInfo fi = null;

            fi = new FileInfo("../../ContentControls.docx");
            settings = new MetricsGetterSettings();
            settings.IncludeTextInContentControls = false;
            Console.WriteLine("============== No text from content controls ==============");
            Console.WriteLine(fi.FullName);
            Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings));
            Console.WriteLine();

            fi = new FileInfo("../../ContentControls.docx");
            settings = new MetricsGetterSettings();
            settings.IncludeTextInContentControls = true;
            Console.WriteLine("============== With text from content controls ==============");
            Console.WriteLine(fi.FullName);
            Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings));
            Console.WriteLine();

            fi = new FileInfo("../../TrackedRevisions.docx");
            settings = new MetricsGetterSettings();
            settings.IncludeTextInContentControls = true;
            Console.WriteLine("============== Tracked Revisions ==============");
            Console.WriteLine(fi.FullName);
            Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings));
            Console.WriteLine();

            fi = new FileInfo("../../Styles.docx");
            settings = new MetricsGetterSettings();
            settings.IncludeTextInContentControls = false;
            Console.WriteLine("============== Style Hierarchy ==============");
            Console.WriteLine(fi.FullName);
            Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings));
            Console.WriteLine();

            fi = new FileInfo("../../Tables.xlsx");
            settings = new MetricsGetterSettings();
            settings.IncludeTextInContentControls = false;
            settings.IncludeXlsxTableCellData = true;
            Console.WriteLine("============== Spreadsheet Tables ==============");
            Console.WriteLine(fi.FullName);
            Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings));
            Console.WriteLine();
        }
 public static XElement GetMetrics(string fileName, MetricsGetterSettings settings)
 {
     FileInfo fi = new FileInfo(fileName);
     if (!fi.Exists)
         throw new FileNotFoundException("{0} does not exist.", fi.FullName);
     if (Util.IsWordprocessingML(fi.Extension))
     {
         WmlDocument wmlDoc = new WmlDocument(fi.FullName, true);
         return GetDocxMetrics(wmlDoc, settings);
     }
     if (Util.IsSpreadsheetML(fi.Extension))
     {
         SmlDocument smlDoc = new SmlDocument(fi.FullName, true);
         return GetXlsxMetrics(smlDoc, settings);
     }
     if (Util.IsPresentationML(fi.Extension))
     {
         PmlDocument pmlDoc = new PmlDocument(fi.FullName, true);
         return GetPptxMetrics(pmlDoc, settings);
     }
     return null;
 }
        public static DocxMetrics GetDocxMetrics(string fileName)
        {
            WmlDocument wmlDoc = new WmlDocument(fileName);
            MetricsGetterSettings settings = new MetricsGetterSettings();
            settings.IncludeTextInContentControls = false;
            settings.IncludeXlsxTableCellData = false;
            var metricsXml = MetricsGetter.GetDocxMetrics(wmlDoc, settings);
            DocxMetrics metrics = new DocxMetrics();
            metrics.FileName = wmlDoc.FileName;

            metrics.StyleHierarchy         = GetXmlDocumentForMetrics(metricsXml, H.StyleHierarchy);
            metrics.ContentControls        = GetXmlDocumentForMetrics(metricsXml, H.Parts);
            metrics.TextBox                = GetIntForMetrics(metricsXml, H.TextBox);
            metrics.ContentControlCount    = GetIntForMetrics(metricsXml, H.ContentControl);
            metrics.ComplexField           = GetIntForMetrics(metricsXml, H.ComplexField);
            metrics.SimpleField            = GetIntForMetrics(metricsXml, H.SimpleField);
            metrics.AltChunk               = GetIntForMetrics(metricsXml, H.AltChunk);
            metrics.Table                  = GetIntForMetrics(metricsXml, H.Table);
            metrics.Hyperlink              = GetIntForMetrics(metricsXml, H.Hyperlink);
            metrics.LegacyFrame            = GetIntForMetrics(metricsXml, H.LegacyFrame);
            metrics.ActiveX                = GetIntForMetrics(metricsXml, H.ActiveX);
            metrics.SubDocument            = GetIntForMetrics(metricsXml, H.SubDocument);
            metrics.ReferenceToNullImage   = GetIntForMetrics(metricsXml, H.ReferenceToNullImage);
            metrics.ElementCount           = GetIntForMetrics(metricsXml, H.ElementCount);
            metrics.AverageParagraphLength = GetIntForMetrics(metricsXml, H.AverageParagraphLength);
            metrics.RunCount               = GetIntForMetrics(metricsXml, H.RunCount);
            metrics.ZeroLengthText         = GetIntForMetrics(metricsXml, H.ZeroLengthText);
            metrics.MultiFontRun           = GetIntForMetrics(metricsXml, H.MultiFontRun);
            metrics.AsciiCharCount         = GetIntForMetrics(metricsXml, H.AsciiCharCount);
            metrics.CSCharCount            = GetIntForMetrics(metricsXml, H.CSCharCount);
            metrics.EastAsiaCharCount      = GetIntForMetrics(metricsXml, H.EastAsiaCharCount);
            metrics.HAnsiCharCount         = GetIntForMetrics(metricsXml, H.HAnsiCharCount);
            metrics.AsciiRunCount          = GetIntForMetrics(metricsXml, H.AsciiRunCount);
            metrics.CSRunCount             = GetIntForMetrics(metricsXml, H.CSRunCount);
            metrics.EastAsiaRunCount       = GetIntForMetrics(metricsXml, H.EastAsiaRunCount);
            metrics.HAnsiRunCount          = GetIntForMetrics(metricsXml, H.HAnsiRunCount);
            metrics.RevisionTracking       = GetBoolForMetrics(metricsXml, H.RevisionTracking);
            metrics.EmbeddedXlsx           = GetBoolForMetrics(metricsXml, H.EmbeddedXlsx);
            metrics.InvalidSaveThroughXslt = GetBoolForMetrics(metricsXml, H.InvalidSaveThroughXslt);
            metrics.TrackRevisionsEnabled  = GetBoolForMetrics(metricsXml, H.TrackRevisionsEnabled);
            metrics.DocumentProtection     = GetBoolForMetrics(metricsXml, H.DocumentProtection);
            metrics.Valid                  = GetBoolForMetrics(metricsXml, H.Valid);
            metrics.Languages              = GetStringForMetrics(metricsXml, H.Languages);
            metrics.NumberingFormatList    = GetStringForMetrics(metricsXml, H.NumberingFormatList);

            return metrics;
        }
        private static object GetContentControlsTransform(XNode node, MetricsGetterSettings settings)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                if (element == element.Document.Root)
                    return new XElement(H.ContentControls,
                        element.Nodes().Select(n => GetContentControlsTransform(n, settings)));

                if (element.Name == W.sdt)
                {
                    var tag = (string)element.Elements(W.sdtPr).Elements(W.tag).Attributes(W.val).FirstOrDefault();
                    XAttribute tagAttr = tag != null ? new XAttribute(H.Tag, tag) : null;

                    var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault();
                    XAttribute aliasAttr = alias != null ? new XAttribute(H.Alias, alias) : null;

                    var xPathAttr = new XAttribute(H.XPath, element.GetXPath());

                    var isText = element.Elements(W.sdtPr).Elements(W.text).Any();
                    var isBibliography = element.Elements(W.sdtPr).Elements(W.bibliography).Any();
                    var isCitation = element.Elements(W.sdtPr).Elements(W.citation).Any();
                    var isComboBox = element.Elements(W.sdtPr).Elements(W.comboBox).Any();
                    var isDate = element.Elements(W.sdtPr).Elements(W.date).Any();
                    var isDocPartList = element.Elements(W.sdtPr).Elements(W.docPartList).Any();
                    var isDocPartObj = element.Elements(W.sdtPr).Elements(W.docPartObj).Any();
                    var isDropDownList = element.Elements(W.sdtPr).Elements(W.dropDownList).Any();
                    var isEquation = element.Elements(W.sdtPr).Elements(W.equation).Any();
                    var isGroup = element.Elements(W.sdtPr).Elements(W.group).Any();
                    var isPicture = element.Elements(W.sdtPr).Elements(W.picture).Any();
                    var isRichText = element.Elements(W.sdtPr).Elements(W.richText).Any() ||
                        (! isText && 
                        ! isBibliography && 
                        ! isCitation && 
                        ! isComboBox && 
                        ! isDate && 
                        ! isDocPartList && 
                        ! isDocPartObj && 
                        ! isDropDownList && 
                        ! isEquation && 
                        ! isGroup && 
                        ! isPicture);
                    string type = null;
                    if (isText        ) type = "Text";
                    if (isBibliography) type = "Bibliography";
                    if (isCitation    ) type = "Citation";
                    if (isComboBox    ) type = "ComboBox";
                    if (isDate        ) type = "Date";
                    if (isDocPartList ) type = "DocPartList";
                    if (isDocPartObj  ) type = "DocPartObj";
                    if (isDropDownList) type = "DropDownList";
                    if (isEquation    ) type = "Equation";
                    if (isGroup       ) type = "Group";
                    if (isPicture     ) type = "Picture";
                    if (isRichText    ) type = "RichText";
                    var typeAttr = new XAttribute(H.Type, type);

                    return new XElement(H.ContentControl,
                        typeAttr,
                        tagAttr,
                        aliasAttr,
                        xPathAttr,
                        element.Nodes().Select(n => GetContentControlsTransform(n, settings)));
                }

                return element.Nodes().Select(n => GetContentControlsTransform(n, settings));
            }
            if (settings.IncludeTextInContentControls)
                return node;
            return null;
        }
 private static XElement GetMetricsForWmlPart(WordprocessingDocument noTrackedDocument, OpenXmlPart part, MetricsGetterSettings settings)
 {
     XElement contentControls = null;
     if (part is MainDocumentPart ||
         part is HeaderPart ||
         part is FooterPart ||
         part is FootnotesPart ||
         part is EndnotesPart)
     {
         var noTrackedPart = noTrackedDocument.GetAllParts().FirstOrDefault(p => p.Uri == part.Uri);
         if (noTrackedPart == null)
             throw new OpenXmlPowerToolsException("Internal error");
         var xd = noTrackedPart.GetXDocument();
         contentControls = (XElement)GetContentControlsTransform(xd.Root, settings);
         if (!contentControls.HasElements)
             contentControls = null;
     }
     var partMetrics = new XElement(H.Part,
         new XAttribute(H.ContentType, part.ContentType),
         new XAttribute(H.Uri, part.Uri.ToString()),
         contentControls);
     if (partMetrics.HasElements)
         return partMetrics;
     return null;
 }
        public static XElement GetPptxMetrics(PmlDocument pmlDoc, MetricsGetterSettings settings)
        {
            using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(pmlDoc))
            {
                using (PresentationDocument pDoc = streamDoc.GetPresentationDocument())
                {
                    List<XElement> metrics = new List<XElement>();

                    bool valid = ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2007, H.SdkValidationError2007);
                    valid |= ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2010, H.SdkValidationError2010);
#if !NET35
                    valid |= ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2013, H.SdkValidationError2013);
#endif
                    return new XElement(H.Metrics,
                        new XAttribute(H.FileName, pmlDoc.FileName),
                        new XAttribute(H.FileType, "PresentationML"),
                        metrics);
                }
            }
        }
 public static XElement GetTableInfoForSheet(SpreadsheetDocument spreadsheetDocument, WorksheetPart sheetPart, string sheetName,
     MetricsGetterSettings settings)
 {
     var xd = sheetPart.GetXDocument();
     XElement sheetInformation = new XElement(H.Sheet,
             new XAttribute(H.Name, sheetName),
             xd.Root.Elements(S.tableParts).Elements(S.tablePart).Select(tp =>
             {
                 string rId = (string)tp.Attribute(R.id);
                 TableDefinitionPart tablePart = (TableDefinitionPart)sheetPart.GetPartById(rId);
                 var txd = tablePart.GetXDocument();
                 var tableName = (string)txd.Root.Attribute("displayName");
                 XElement tableCellData = null;
                 if (settings.IncludeXlsxTableCellData)
                 {
                     var xlsxTable = spreadsheetDocument.Table(tableName);
                     tableCellData = new XElement(H.TableData,
                         xlsxTable.TableRows()
                             .Select(row =>
                             {
                                 var rowElement = new XElement(H.Row,
                                     xlsxTable.TableColumns().Select(col =>
                                     {
                                         var cellElement = new XElement(H.Cell,
                                             new XAttribute(H.Name, col.Name),
                                             new XAttribute(H.Val, (string)row[col.Name]));
                                         return cellElement;
                                     }));
                                 return rowElement;
                             }));
                 }
                 var table = new XElement(H.Table,
                     new XAttribute(H.Name, (string)txd.Root.Attribute("name")),
                     new XAttribute(H.DisplayName, tableName),
                     new XElement(H.Columns,
                         txd.Root.Element(S.tableColumns).Elements(S.tableColumn)
                         .Select(tc => new XElement(H.Column,
                             new XAttribute(H.Name, (string)tc.Attribute("name"))))),
                             tableCellData
                     );
                 return table;
             })
         );
     if (!sheetInformation.HasElements)
         return null;
     return sheetInformation;
 }
 private static XElement GetTableInfoForWorkbook(SpreadsheetDocument spreadsheet, MetricsGetterSettings settings)
 {
     var workbookPart = spreadsheet.WorkbookPart;
     var xd = workbookPart.GetXDocument();
     var partInformation =
         new XElement(H.Sheets,
             xd.Root
             .Element(S.sheets)
             .Elements(S.sheet)
             .Select(sh =>
             {
                 var rid = (string)sh.Attribute(R.id);
                 var sheetName = (string)sh.Attribute("name");
                 WorksheetPart worksheetPart = (WorksheetPart)workbookPart.GetPartById(rid);
                 return GetTableInfoForSheet(spreadsheet, worksheetPart, sheetName, settings);
             }));
     return partInformation;
 }
        public static XElement GetXlsxMetrics(SmlDocument smlDoc, MetricsGetterSettings settings)
        {
            using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(smlDoc))
            {
                using (SpreadsheetDocument sDoc = streamDoc.GetSpreadsheetDocument())
                {
                    List<XElement> metrics = new List<XElement>();

                    bool valid = ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2007, H.SdkValidationError2007);
                    valid |= ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2010, H.SdkValidationError2010);
#if !NET35
                    valid |= ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2013, H.SdkValidationError2013);
#endif

                    return new XElement(H.Metrics,
                        new XAttribute(H.FileName, smlDoc.FileName),
                        new XAttribute(H.FileType, "SpreadsheetML"),
                        metrics,
                        GetTableInfoForWorkbook(sDoc, settings));
                }
            }
        }
 private static XElement GetWmlMetrics(string fileName, bool invalidHyperlink, WordprocessingDocument document, WordprocessingDocument noTrackedDocument, MetricsGetterSettings settings)
 {
     var parts = new XElement(H.Parts,
         document.GetAllParts().Select(part =>
         {
             return GetMetricsForWmlPart(noTrackedDocument, part, settings);
         }));
     if (!parts.HasElements)
         parts = null;
     var metrics = new XElement(H.Metrics,
         new XAttribute(H.FileName, fileName),
         new XAttribute(H.FileType, "WordprocessingML"),
         GetStyleHierarchy(document),
         GetMiscWmlMetrics(document, noTrackedDocument, invalidHyperlink),
         parts);
     return metrics;
 }