private static void AnalyzeRun(XElement run, List<XElement> attList, List<string> notes, FormattingMetrics formattingMetrics, string uri) { var runText = run.Elements() .Where(e => e.Name == W.t || e.Name == W.delText) .Select(t => (string)t) .StringConcatenate(); if (runText.Length == 0) { formattingMetrics.ZeroLengthText++; return; } var rPr = run.Element(W.rPr); if (rPr == null) { formattingMetrics.RunWithoutRprCount++; notes.Add(PtUtils.MakeValidXml(string.Format("Error in part {0}: run without rPr at {1}", uri, run.GetXPath()))); rPr = new XElement(W.rPr); } FormattingAssembler.CharStyleAttributes csa = new FormattingAssembler.CharStyleAttributes(null, rPr); var fontTypeArray = runText .Select(ch => FormattingAssembler.DetermineFontTypeFromCharacter(ch, csa)) .ToArray(); var distinctFontTypeArray = fontTypeArray .Distinct() .ToArray(); var distinctFonts = distinctFontTypeArray .Select(ft => { return GetFontFromFontType(csa, ft); }) .Distinct(); var languages = distinctFontTypeArray .Select(ft => { if (ft == FormattingAssembler.FontType.Ascii) return csa.LatinLang; if (ft == FormattingAssembler.FontType.CS) return csa.BidiLang; if (ft == FormattingAssembler.FontType.EastAsia) return csa.EastAsiaLang; //if (ft == FormattingAssembler.FontType.HAnsi) return csa.LatinLang; }) .Select(l => { if (l == "" || l == null) return /* "Dflt:" + */ CultureInfo.CurrentCulture.Name; return l; }) //.Where(l => l != null && l != "") .Distinct(); if (languages.Any(l => !formattingMetrics.Languages.Contains(l))) formattingMetrics.Languages = formattingMetrics.Languages.Concat(languages).Distinct().ToList(); var multiFontRun = distinctFonts.Count() > 1; if (multiFontRun) { formattingMetrics.MultiFontRun++; formattingMetrics.AsciiCharCount += fontTypeArray.Where(ft => ft == FormattingAssembler.FontType.Ascii).Count(); formattingMetrics.CSCharCount += fontTypeArray.Where(ft => ft == FormattingAssembler.FontType.CS).Count(); formattingMetrics.EastAsiaCharCount += fontTypeArray.Where(ft => ft == FormattingAssembler.FontType.EastAsia).Count(); formattingMetrics.HAnsiCharCount += fontTypeArray.Where(ft => ft == FormattingAssembler.FontType.HAnsi).Count(); } else { switch (fontTypeArray[0]) { case FormattingAssembler.FontType.Ascii: formattingMetrics.AsciiCharCount += runText.Length; formattingMetrics.AsciiRunCount++; break; case FormattingAssembler.FontType.CS: formattingMetrics.CSCharCount += runText.Length; formattingMetrics.CSRunCount++; break; case FormattingAssembler.FontType.EastAsia: formattingMetrics.EastAsiaCharCount += runText.Length; formattingMetrics.EastAsiaRunCount++; break; case FormattingAssembler.FontType.HAnsi: formattingMetrics.HAnsiCharCount += runText.Length; formattingMetrics.HAnsiRunCount++; break; } } }
private static void FontAndCharSetAnalysis(WordprocessingDocument wDoc, List<XElement> metrics, List<string> notes) { FormattingAssemblerSettings settings = new FormattingAssemblerSettings { RemoveStyleNamesFromParagraphAndRunProperties = false, ClearStyles = true, RestrictToSupportedNumberingFormats = false, RestrictToSupportedLanguages = false, }; FormattingAssembler.AssembleFormatting(wDoc, settings); var formattingMetrics = new FormattingMetrics(); foreach (var part in wDoc.ContentParts()) { var xDoc = part.GetXDocument(); foreach (var run in xDoc.Descendants(W.r)) { formattingMetrics.RunCount++; AnalyzeRun(run, metrics, notes, formattingMetrics, part.Uri.ToString()); } } metrics.Add(new XElement(H.RunCount, new XAttribute(H.Val, formattingMetrics.RunCount))); if (formattingMetrics.RunWithoutRprCount > 0) metrics.Add(new XElement(H.RunWithoutRprCount, new XAttribute(H.Val, formattingMetrics.RunWithoutRprCount))); if (formattingMetrics.ZeroLengthText > 0) metrics.Add(new XElement(H.ZeroLengthText, new XAttribute(H.Val, formattingMetrics.ZeroLengthText))); if (formattingMetrics.MultiFontRun > 0) metrics.Add(new XElement(H.MultiFontRun, new XAttribute(H.Val, formattingMetrics.MultiFontRun))); if (formattingMetrics.AsciiCharCount > 0) metrics.Add(new XElement(H.AsciiCharCount, new XAttribute(H.Val, formattingMetrics.AsciiCharCount))); if (formattingMetrics.CSCharCount > 0) metrics.Add(new XElement(H.CSCharCount, new XAttribute(H.Val, formattingMetrics.CSCharCount))); if (formattingMetrics.EastAsiaCharCount > 0) metrics.Add(new XElement(H.EastAsiaCharCount, new XAttribute(H.Val, formattingMetrics.EastAsiaCharCount))); if (formattingMetrics.HAnsiCharCount > 0) metrics.Add(new XElement(H.HAnsiCharCount, new XAttribute(H.Val, formattingMetrics.HAnsiCharCount))); if (formattingMetrics.AsciiRunCount > 0) metrics.Add(new XElement(H.AsciiRunCount, new XAttribute(H.Val, formattingMetrics.AsciiRunCount))); if (formattingMetrics.CSRunCount > 0) metrics.Add(new XElement(H.CSRunCount, new XAttribute(H.Val, formattingMetrics.CSRunCount))); if (formattingMetrics.EastAsiaRunCount > 0) metrics.Add(new XElement(H.EastAsiaRunCount, new XAttribute(H.Val, formattingMetrics.EastAsiaRunCount))); if (formattingMetrics.HAnsiRunCount > 0) metrics.Add(new XElement(H.HAnsiRunCount, new XAttribute(H.Val, formattingMetrics.HAnsiRunCount))); if (formattingMetrics.Languages.Any()) { var uls = formattingMetrics.Languages.StringConcatenate(s => s + ",").TrimEnd(','); metrics.Add(new XElement(H.Languages, new XAttribute(H.Val, PtUtils.MakeValidXml(uls)))); } }