public static XElement GetDocxMetrics(WmlDocument wmlDoc, MetricsGetterSettings settings) { WmlDocument converted = new WmlDocument(wmlDoc, true); WmlDocument noTrackedRevisions = new WmlDocument(converted); try { using (OpenXmlMemoryStreamDocument noTrackedStreamDoc = new OpenXmlMemoryStreamDocument(noTrackedRevisions)) using (WordprocessingDocument noTrackedDocument = noTrackedStreamDoc.GetWordprocessingDocument()) using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(converted)) using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { if (RevisionAccepter.HasTrackedRevisions(noTrackedDocument)) RevisionAccepter.AcceptRevisions(noTrackedDocument); return GetWmlMetrics(converted.FileName, false, document, noTrackedDocument, settings); } } catch (OpenXmlPowerToolsException e) { if (e.ToString().Contains("Invalid Hyperlink")) { using (MemoryStream ms = new MemoryStream()) { WmlDocument fixedWmlDoc = new WmlDocument(converted); ms.Write(converted.DocumentByteArray, 0, converted.DocumentByteArray.Length); #if !NET35 UriFixer.FixInvalidUri(ms, brokenUri => FixUri(brokenUri)); #endif converted = new WmlDocument("dummy.docx", ms.ToArray()); } noTrackedRevisions = new WmlDocument(converted); using (OpenXmlMemoryStreamDocument noTrackedStreamDoc = new OpenXmlMemoryStreamDocument(noTrackedRevisions)) using (WordprocessingDocument noTrackedDocument = noTrackedStreamDoc.GetWordprocessingDocument()) using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(converted)) using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { if (RevisionAccepter.HasTrackedRevisions(noTrackedDocument)) RevisionAccepter.AcceptRevisions(noTrackedDocument); return GetWmlMetrics(converted.FileName, true, document, noTrackedDocument, settings); } } } var metrics = new XElement(H.Metrics, new XAttribute(H.FileName, converted.FileName), new XAttribute(H.FileType, "WordprocessingML"), new XAttribute(H.Error, "Unknown error, metrics not determined")); return metrics; }
static void Main(string[] args) { MetricsGetterSettings settings = null; FileInfo fi = null; fi = new FileInfo("../../ContentControls.docx"); settings = new MetricsGetterSettings(); settings.IncludeTextInContentControls = false; Console.WriteLine("============== No text from content controls =============="); Console.WriteLine(fi.FullName); Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings)); Console.WriteLine(); fi = new FileInfo("../../ContentControls.docx"); settings = new MetricsGetterSettings(); settings.IncludeTextInContentControls = true; Console.WriteLine("============== With text from content controls =============="); Console.WriteLine(fi.FullName); Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings)); Console.WriteLine(); fi = new FileInfo("../../TrackedRevisions.docx"); settings = new MetricsGetterSettings(); settings.IncludeTextInContentControls = true; Console.WriteLine("============== Tracked Revisions =============="); Console.WriteLine(fi.FullName); Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings)); Console.WriteLine(); fi = new FileInfo("../../Styles.docx"); settings = new MetricsGetterSettings(); settings.IncludeTextInContentControls = false; Console.WriteLine("============== Style Hierarchy =============="); Console.WriteLine(fi.FullName); Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings)); Console.WriteLine(); fi = new FileInfo("../../Tables.xlsx"); settings = new MetricsGetterSettings(); settings.IncludeTextInContentControls = false; settings.IncludeXlsxTableCellData = true; Console.WriteLine("============== Spreadsheet Tables =============="); Console.WriteLine(fi.FullName); Console.WriteLine(MetricsGetter.GetMetrics(fi.FullName, settings)); Console.WriteLine(); }
public static XElement GetMetrics(string fileName, MetricsGetterSettings settings) { FileInfo fi = new FileInfo(fileName); if (!fi.Exists) throw new FileNotFoundException("{0} does not exist.", fi.FullName); if (Util.IsWordprocessingML(fi.Extension)) { WmlDocument wmlDoc = new WmlDocument(fi.FullName, true); return GetDocxMetrics(wmlDoc, settings); } if (Util.IsSpreadsheetML(fi.Extension)) { SmlDocument smlDoc = new SmlDocument(fi.FullName, true); return GetXlsxMetrics(smlDoc, settings); } if (Util.IsPresentationML(fi.Extension)) { PmlDocument pmlDoc = new PmlDocument(fi.FullName, true); return GetPptxMetrics(pmlDoc, settings); } return null; }
public static DocxMetrics GetDocxMetrics(string fileName) { WmlDocument wmlDoc = new WmlDocument(fileName); MetricsGetterSettings settings = new MetricsGetterSettings(); settings.IncludeTextInContentControls = false; settings.IncludeXlsxTableCellData = false; var metricsXml = MetricsGetter.GetDocxMetrics(wmlDoc, settings); DocxMetrics metrics = new DocxMetrics(); metrics.FileName = wmlDoc.FileName; metrics.StyleHierarchy = GetXmlDocumentForMetrics(metricsXml, H.StyleHierarchy); metrics.ContentControls = GetXmlDocumentForMetrics(metricsXml, H.Parts); metrics.TextBox = GetIntForMetrics(metricsXml, H.TextBox); metrics.ContentControlCount = GetIntForMetrics(metricsXml, H.ContentControl); metrics.ComplexField = GetIntForMetrics(metricsXml, H.ComplexField); metrics.SimpleField = GetIntForMetrics(metricsXml, H.SimpleField); metrics.AltChunk = GetIntForMetrics(metricsXml, H.AltChunk); metrics.Table = GetIntForMetrics(metricsXml, H.Table); metrics.Hyperlink = GetIntForMetrics(metricsXml, H.Hyperlink); metrics.LegacyFrame = GetIntForMetrics(metricsXml, H.LegacyFrame); metrics.ActiveX = GetIntForMetrics(metricsXml, H.ActiveX); metrics.SubDocument = GetIntForMetrics(metricsXml, H.SubDocument); metrics.ReferenceToNullImage = GetIntForMetrics(metricsXml, H.ReferenceToNullImage); metrics.ElementCount = GetIntForMetrics(metricsXml, H.ElementCount); metrics.AverageParagraphLength = GetIntForMetrics(metricsXml, H.AverageParagraphLength); metrics.RunCount = GetIntForMetrics(metricsXml, H.RunCount); metrics.ZeroLengthText = GetIntForMetrics(metricsXml, H.ZeroLengthText); metrics.MultiFontRun = GetIntForMetrics(metricsXml, H.MultiFontRun); metrics.AsciiCharCount = GetIntForMetrics(metricsXml, H.AsciiCharCount); metrics.CSCharCount = GetIntForMetrics(metricsXml, H.CSCharCount); metrics.EastAsiaCharCount = GetIntForMetrics(metricsXml, H.EastAsiaCharCount); metrics.HAnsiCharCount = GetIntForMetrics(metricsXml, H.HAnsiCharCount); metrics.AsciiRunCount = GetIntForMetrics(metricsXml, H.AsciiRunCount); metrics.CSRunCount = GetIntForMetrics(metricsXml, H.CSRunCount); metrics.EastAsiaRunCount = GetIntForMetrics(metricsXml, H.EastAsiaRunCount); metrics.HAnsiRunCount = GetIntForMetrics(metricsXml, H.HAnsiRunCount); metrics.RevisionTracking = GetBoolForMetrics(metricsXml, H.RevisionTracking); metrics.EmbeddedXlsx = GetBoolForMetrics(metricsXml, H.EmbeddedXlsx); metrics.InvalidSaveThroughXslt = GetBoolForMetrics(metricsXml, H.InvalidSaveThroughXslt); metrics.TrackRevisionsEnabled = GetBoolForMetrics(metricsXml, H.TrackRevisionsEnabled); metrics.DocumentProtection = GetBoolForMetrics(metricsXml, H.DocumentProtection); metrics.Valid = GetBoolForMetrics(metricsXml, H.Valid); metrics.Languages = GetStringForMetrics(metricsXml, H.Languages); metrics.NumberingFormatList = GetStringForMetrics(metricsXml, H.NumberingFormatList); return metrics; }
private static object GetContentControlsTransform(XNode node, MetricsGetterSettings settings) { XElement element = node as XElement; if (element != null) { if (element == element.Document.Root) return new XElement(H.ContentControls, element.Nodes().Select(n => GetContentControlsTransform(n, settings))); if (element.Name == W.sdt) { var tag = (string)element.Elements(W.sdtPr).Elements(W.tag).Attributes(W.val).FirstOrDefault(); XAttribute tagAttr = tag != null ? new XAttribute(H.Tag, tag) : null; var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault(); XAttribute aliasAttr = alias != null ? new XAttribute(H.Alias, alias) : null; var xPathAttr = new XAttribute(H.XPath, element.GetXPath()); var isText = element.Elements(W.sdtPr).Elements(W.text).Any(); var isBibliography = element.Elements(W.sdtPr).Elements(W.bibliography).Any(); var isCitation = element.Elements(W.sdtPr).Elements(W.citation).Any(); var isComboBox = element.Elements(W.sdtPr).Elements(W.comboBox).Any(); var isDate = element.Elements(W.sdtPr).Elements(W.date).Any(); var isDocPartList = element.Elements(W.sdtPr).Elements(W.docPartList).Any(); var isDocPartObj = element.Elements(W.sdtPr).Elements(W.docPartObj).Any(); var isDropDownList = element.Elements(W.sdtPr).Elements(W.dropDownList).Any(); var isEquation = element.Elements(W.sdtPr).Elements(W.equation).Any(); var isGroup = element.Elements(W.sdtPr).Elements(W.group).Any(); var isPicture = element.Elements(W.sdtPr).Elements(W.picture).Any(); var isRichText = element.Elements(W.sdtPr).Elements(W.richText).Any() || (! isText && ! isBibliography && ! isCitation && ! isComboBox && ! isDate && ! isDocPartList && ! isDocPartObj && ! isDropDownList && ! isEquation && ! isGroup && ! isPicture); string type = null; if (isText ) type = "Text"; if (isBibliography) type = "Bibliography"; if (isCitation ) type = "Citation"; if (isComboBox ) type = "ComboBox"; if (isDate ) type = "Date"; if (isDocPartList ) type = "DocPartList"; if (isDocPartObj ) type = "DocPartObj"; if (isDropDownList) type = "DropDownList"; if (isEquation ) type = "Equation"; if (isGroup ) type = "Group"; if (isPicture ) type = "Picture"; if (isRichText ) type = "RichText"; var typeAttr = new XAttribute(H.Type, type); return new XElement(H.ContentControl, typeAttr, tagAttr, aliasAttr, xPathAttr, element.Nodes().Select(n => GetContentControlsTransform(n, settings))); } return element.Nodes().Select(n => GetContentControlsTransform(n, settings)); } if (settings.IncludeTextInContentControls) return node; return null; }
private static XElement GetMetricsForWmlPart(WordprocessingDocument noTrackedDocument, OpenXmlPart part, MetricsGetterSettings settings) { XElement contentControls = null; if (part is MainDocumentPart || part is HeaderPart || part is FooterPart || part is FootnotesPart || part is EndnotesPart) { var noTrackedPart = noTrackedDocument.GetAllParts().FirstOrDefault(p => p.Uri == part.Uri); if (noTrackedPart == null) throw new OpenXmlPowerToolsException("Internal error"); var xd = noTrackedPart.GetXDocument(); contentControls = (XElement)GetContentControlsTransform(xd.Root, settings); if (!contentControls.HasElements) contentControls = null; } var partMetrics = new XElement(H.Part, new XAttribute(H.ContentType, part.ContentType), new XAttribute(H.Uri, part.Uri.ToString()), contentControls); if (partMetrics.HasElements) return partMetrics; return null; }
public static XElement GetPptxMetrics(PmlDocument pmlDoc, MetricsGetterSettings settings) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(pmlDoc)) { using (PresentationDocument pDoc = streamDoc.GetPresentationDocument()) { List<XElement> metrics = new List<XElement>(); bool valid = ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2007, H.SdkValidationError2007); valid |= ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2010, H.SdkValidationError2010); #if !NET35 valid |= ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2013, H.SdkValidationError2013); #endif return new XElement(H.Metrics, new XAttribute(H.FileName, pmlDoc.FileName), new XAttribute(H.FileType, "PresentationML"), metrics); } } }
public static XElement GetTableInfoForSheet(SpreadsheetDocument spreadsheetDocument, WorksheetPart sheetPart, string sheetName, MetricsGetterSettings settings) { var xd = sheetPart.GetXDocument(); XElement sheetInformation = new XElement(H.Sheet, new XAttribute(H.Name, sheetName), xd.Root.Elements(S.tableParts).Elements(S.tablePart).Select(tp => { string rId = (string)tp.Attribute(R.id); TableDefinitionPart tablePart = (TableDefinitionPart)sheetPart.GetPartById(rId); var txd = tablePart.GetXDocument(); var tableName = (string)txd.Root.Attribute("displayName"); XElement tableCellData = null; if (settings.IncludeXlsxTableCellData) { var xlsxTable = spreadsheetDocument.Table(tableName); tableCellData = new XElement(H.TableData, xlsxTable.TableRows() .Select(row => { var rowElement = new XElement(H.Row, xlsxTable.TableColumns().Select(col => { var cellElement = new XElement(H.Cell, new XAttribute(H.Name, col.Name), new XAttribute(H.Val, (string)row[col.Name])); return cellElement; })); return rowElement; })); } var table = new XElement(H.Table, new XAttribute(H.Name, (string)txd.Root.Attribute("name")), new XAttribute(H.DisplayName, tableName), new XElement(H.Columns, txd.Root.Element(S.tableColumns).Elements(S.tableColumn) .Select(tc => new XElement(H.Column, new XAttribute(H.Name, (string)tc.Attribute("name"))))), tableCellData ); return table; }) ); if (!sheetInformation.HasElements) return null; return sheetInformation; }
private static XElement GetTableInfoForWorkbook(SpreadsheetDocument spreadsheet, MetricsGetterSettings settings) { var workbookPart = spreadsheet.WorkbookPart; var xd = workbookPart.GetXDocument(); var partInformation = new XElement(H.Sheets, xd.Root .Element(S.sheets) .Elements(S.sheet) .Select(sh => { var rid = (string)sh.Attribute(R.id); var sheetName = (string)sh.Attribute("name"); WorksheetPart worksheetPart = (WorksheetPart)workbookPart.GetPartById(rid); return GetTableInfoForSheet(spreadsheet, worksheetPart, sheetName, settings); })); return partInformation; }
public static XElement GetXlsxMetrics(SmlDocument smlDoc, MetricsGetterSettings settings) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(smlDoc)) { using (SpreadsheetDocument sDoc = streamDoc.GetSpreadsheetDocument()) { List<XElement> metrics = new List<XElement>(); bool valid = ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2007, H.SdkValidationError2007); valid |= ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2010, H.SdkValidationError2010); #if !NET35 valid |= ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2013, H.SdkValidationError2013); #endif return new XElement(H.Metrics, new XAttribute(H.FileName, smlDoc.FileName), new XAttribute(H.FileType, "SpreadsheetML"), metrics, GetTableInfoForWorkbook(sDoc, settings)); } } }
private static XElement GetWmlMetrics(string fileName, bool invalidHyperlink, WordprocessingDocument document, WordprocessingDocument noTrackedDocument, MetricsGetterSettings settings) { var parts = new XElement(H.Parts, document.GetAllParts().Select(part => { return GetMetricsForWmlPart(noTrackedDocument, part, settings); })); if (!parts.HasElements) parts = null; var metrics = new XElement(H.Metrics, new XAttribute(H.FileName, fileName), new XAttribute(H.FileType, "WordprocessingML"), GetStyleHierarchy(document), GetMiscWmlMetrics(document, noTrackedDocument, invalidHyperlink), parts); return metrics; }