public void Analyze(DqDocument document) { var parts = new[] { document.Structure.Toc, document.Structure.Introduction, document.Structure.MainPart, document.Structure.Conclusion, document.Structure.Bibliography, document.Structure.Appendixes, }.Concat(document.Structure.Abstracts).Where(p => p != null); foreach (var dqPart in parts) { if (dqPart is DqMainPart dqMainPart) { AnalyzeMainPart(dqMainPart, document); } else { AnalyzeNonMainPart(dqPart, document); } FindSpaces(dqPart); } }
private void AnalyzeNonMainPart(DqPart dqPart, DqDocument dqDocument) { var centeredParts = new[] { PartType.Bibliography, PartType.Abstract, PartType.Introduction, PartType.Conclusion, PartType.Toc }; if (!centeredParts.Contains(dqPart.Type)) { return; } var dqParagraph = dqPart.Start; if (!dqParagraph.Text.StartsWith("{PageBreak}") && !dqDocument.Paragraphs[dqParagraph.Index - 1].Text.EndsWith("{PageBreak}")) { dqParagraph.Meta.Errors.Add(new DqError("Перед разделом отсутсвует разрыв страницы.")); } if (dqParagraph.Style.Aligment != DqAligment.Center) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Заголовки разделов должны быть выровнены по центру.")); } else if (dqParagraph.Style.Indent != 0) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"При выравнивании по центру должен отсутвовать абзацный отступ.")); } if (string.Equals(dqParagraph.GetPureText(), "список использованной литературы", StringComparison.OrdinalIgnoreCase)) { dqParagraph.Meta.Errors.Add(new DqError($"Нормативное название части — «Список использованных источников».")); } }
public void Analyze2(DqDocument dqDocument) { if (dqDocument.Structure.Bibliography == null) { return; } foreach (var content in dqDocument.Structure.Bibliography.Paragraphs) { var sources = content.Meta.SourceDeclarations.OfType <DqSource>().ToList(); if (sources.Count == 0) { continue; } Recognize(sources); var source = sources.First(); if (source.SourceType == SourceType.Unknown) { content.Meta.Errors.Add(new DqError("Источник имеет неверный формат")); } else if (source.SourceType == SourceType.WebUnknown) { content.Meta.Errors.Add(new DqError("Web-источник имеет неверный формат")); } else if (source.Formatted != source.Paragraph.Text) { content.Meta.Errors.Add(new DqError($"Ожидаемый формат данных: {source.Formatted}")); } } }
public IList <Token> GetHeaders(DqDocument document) { if (document.Structure.Introduction == null) { return(new List <Token>()); } var dqParagraphs = document.Structure.Introduction.Paragraphs .Skip(1) .Where(IsNumberedParagraph) .ToList(); var list = new List <Token>(); var mainPartHeaders = new List <Token>(); foreach (var dqParagraph in dqParagraphs) { if (!list.Any()) { list.Add(CreateToken(dqParagraph)); continue; } if (dqParagraph.Index - list.Last().Paragraph.Index == 1 && !IsChapter(dqParagraph)) { list.Add(new Token(dqParagraph, GetLevel(dqParagraph))); } else { var isGreater = true; for (var i = 1; i < list.Count; ++i) { if (list[i - 1].Level >= list[i].Level) { isGreater = false; } } if (isGreater) { mainPartHeaders.AddRange(list); } list.Clear(); list.Add(new Token(dqParagraph, GetLevel(dqParagraph))); } } if (list.Count == 1) { mainPartHeaders.AddRange(list); } foreach (var token in mainPartHeaders.Concat(GetNonMainHeaders(document))) { token.Paragraph.Meta.IsHeader = true; } return(mainPartHeaders); }
public void Parse(DqParagraph paragraph, DqDocument dqDocument) { var m = Regex.Matches(paragraph.Text, @"[^]]\[(\d+)(?:,\s*c(?:тр)\.\d+)?\]", RegexOptions.IgnoreCase); if (m.Count != 0) { paragraph.Meta.Structure.AddRange(m.Cast <Match>().Select(mm => mm.Groups[1].Value).Select(t => new DqNumberedElement(paragraph, DqStructureElementType.SourceReference) { Number = t })); } var bibliographyPart = dqDocument.Structure.Bibliography; if (bibliographyPart == null || !bibliographyPart.Paragraphs.Contains(paragraph)) { return; } var match = Regex.Match(paragraph.Text, @"^\s*(\d+)", RegexOptions.IgnoreCase); if (!match.Success) { return; } paragraph.Meta.Structure.Add(new DqSource(paragraph, DqStructureElementType.SourceDeclaration) { Number = match.Groups[1].Value }); }
private int?TryFindByTitle(DqDocument document, IReadOnlyCollection <string> titles, int startIndex) { for (var i = startIndex + 1; i < document.Paragraphs.Count; ++i) { var paragraph = document.Paragraphs[i]; var title = titles.FirstOrDefault(t => string.Equals(paragraph.GetPureText(), t, StringComparison.OrdinalIgnoreCase)); if (title != null) { return(i); } } return(null); }
private void AnalyzeMainPart(DqMainPart dqMainPart, DqDocument dqDocument) { foreach (var dqParagraph in dqMainPart.Paragraphs) { AnalyzeMainPartParagraph(dqParagraph, dqDocument); } foreach (var chapter in dqMainPart.Children) { var dqParagraph = chapter.Start; if (!dqParagraph.Text.StartsWith("{PageBreak}") && !dqDocument.Paragraphs[dqParagraph.Index - 1].Text.EndsWith("{PageBreak}")) { dqParagraph.Meta.Errors.Add(new DqError("Перед разделом отсутсвует разрыв страницы.")); } } }
public void Analyze(DqDocument dqDocument) { var bibliography = dqDocument.Structure.Bibliography; if (bibliography == null) { return; } var sources = bibliography.Paragraphs.Skip(1).Where(p => p.Meta.SourceDeclarations.Any()); foreach (var dqParagraph in sources) { Analyze(dqParagraph); } }
private IEnumerable <Token> GetNonMainHeaders(DqDocument dqDocument) { var report = dqDocument.Structure; return(report.Abstracts.Concat(new[] { report.Toc, report.Introduction, report.Conclusion, report.Bibliography, report.Appendixes, }) .Select(p => p?.Paragraphs.FirstOrDefault()) .Where(p => p != null) .Select(CreateToken)); }
public DqDocument Parse(string path) { var tempPath = Path.GetTempFileName(); File.Copy(path, tempPath, overwrite: true); try { using (var doc = WordprocessingDocument.Open(tempPath, isEditable: false)) { var dqDocument = new DqDocument(); var fontScheme = _dqStyleParser.GetFontScheme(doc); dqDocument.NumberingTable = _dqNumberingParser.ParseNumberingTable(doc); dqDocument.StyleTable = _dqStyleParser.ParseStyleTable(doc); var body = doc.MainDocumentPart.Document.Body; dqDocument.Sections.AddRange(GetSections(body)); List <DqParagraph> paragraphs = new List <DqParagraph>(); foreach (var element in body.Elements()) { if (element is Paragraph p) { paragraphs.Add(Convert(p, dqDocument.StyleTable, fontScheme, dqDocument.NumberingTable)); } else if (element is DocumentFormat.OpenXml.Wordprocessing.Table) { paragraphs.Add(new DqParagraph("{TBL}", dqDocument.StyleTable.Paragraph.Default)); } } dqDocument.Paragraphs.AddRange(paragraphs.Where(p => !string.IsNullOrWhiteSpace(p.Text))); for (int i = 0; i < dqDocument.Paragraphs.Count; i++) { dqDocument.Paragraphs[i].Index = i; } return(dqDocument); } } finally { File.Delete(tempPath); } }
public void RestoreNumbering(DqDocument document) { var numberedParagraphs = document.Paragraphs.Where(IsNumbered).ToList(); var countersById = numberedParagraphs .Select(p => p.Style.Numbering?.Id) .Distinct() .ToDictionary(k => k, k => Enumerable.Repeat(element: 0, count: 9).ToList()); foreach (var paragraph in numberedParagraphs) { if (paragraph.Style.OutlineLevel < paragraph.Style.Numbering?.Levels.Count) { UpdateCounters(paragraph, countersById[paragraph.Style.Numbering.Id]); paragraph.Number = CalculateNumbers(paragraph, countersById[paragraph.Style.Numbering.Id]); paragraph.Text = paragraph.Number + " " + paragraph.Text; } } }
private void AnalyzeMainPartParagraph(DqParagraph dqParagraph, DqDocument dqDocument) { if (string.IsNullOrWhiteSpace(dqParagraph.Text.Replace("{IMG}", "").Replace("{TBL}", "").Replace("{PageBreak}", ""))) { return; } if (dqParagraph.Meta.FigureDeclarations.Any()) { if (dqParagraph.Style.FontSize > Settings.Default.ExpectedFontSize) { dqParagraph.Meta.Errors.Add(new DqFontSizeError($"Неверный размер шрифта ({dqParagraph.Style.FontSize} пт). Подписи риcунков должны использовать шрифт не больше основного ({Settings.Default.ExpectedFontSize} пт).")); } } else if (dqParagraph.Meta.TableDeclarations.Any()) { if (dqParagraph.Style.FontSize > Settings.Default.ExpectedFontSize) { dqParagraph.Meta.Errors.Add(new DqFontSizeError($"Неверный размер шрифта ({dqParagraph.Style.FontSize} пт). Заголовки таблиц должны использовать шрифт не больше основного ({Settings.Default.ExpectedFontSize} пт).")); } } else if (dqParagraph.Meta.IsHeader) { if (dqParagraph.Style.FontSize < Settings.Default.ExpectedFontSize) { dqParagraph.Meta.Errors.Add(new DqFontSizeError($"Неверный размер шрифта ({dqParagraph.Style.FontSize} пт). Заголовки должны использовать шрифт не меньше основного ({Settings.Default.ExpectedFontSize} пт).")); } if (dqParagraph.GetPureText().Trim().EndsWith(".")) { dqParagraph.Meta.Errors.Add(new DqError($"В конце заголовков точка не ставится.")); } var number = Regex.Match(dqParagraph.GetPureText().TrimStart(), @"^((?:\d+\.)*\d+?)"); if (number.Success && number.Value.EndsWith(".")) { dqParagraph.Meta.Errors.Add(new DqError($"В конце номера (под)раздела точка не ставится.")); } } if (dqParagraph.Style.FontName != Settings.Default.ExpectedFontName) { dqParagraph.Meta.Errors.Add(new DqFontError($"Неверный шрифт ({dqParagraph.Style.FontName}). Иcпользуйте {Settings.Default.ExpectedFontName}.")); } if (dqParagraph.Meta.IsHeader) { if (dqParagraph.Style.Aligment == DqAligment.Center || dqParagraph.Style.Aligment == DqAligment.Right) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Заголовки главной части должны быть выровнены по левому краю.")); } else if (dqParagraph.Style.Indent == 0) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Отсутвует абзацный отступ.")); } } else if (dqParagraph.Meta.FigureDeclarations.Any()) { if (dqParagraph.Style.Aligment != DqAligment.Center) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Подпись рисунка должна быть выровнена по центру.")); } else if (dqParagraph.Style.Indent != 0) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"При выравнивании по центру должен отсутвовать абзацный отступ.")); } } else if (dqParagraph.Meta.TableDeclarations.Any()) { if (dqParagraph.Style.Aligment == DqAligment.Center || dqParagraph.Style.Aligment == DqAligment.Right) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Заголовок таблицы должен быть выровнен по левому краю.")); } else if (dqParagraph.Style.Indent != 0) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Заголовок таблицы помещают без абзацного отступа.")); } } else if (!dqParagraph.Meta.IsHeader) { if (dqParagraph.Style.Indent == 0) { dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Отсутствует абзацный отступ.")); } } if (dqParagraph.Style.SpacingBetweenLines != Settings.Default.ExpectedSpacingBetweenLines && !dqParagraph.Meta.IsHeader && !dqParagraph.Meta.FigureDeclarations.Any() && !dqParagraph.Meta.TableDeclarations.Any()) { dqParagraph.Meta.Errors.Add(new DqError($"Неверный междустрочный интервал ({dqParagraph.Style.SpacingBetweenLines}). Ожидается {Settings.Default.ExpectedSpacingBetweenLines}-ый интервал.")); } }
public DqStructure PrimaryParse(DqDocument document) { var partTypeByTitle = TitlesByPartType .SelectMany(kv => kv.Value.Select(v => (kv.Key, v))) .ToDictionary(kv => kv.v, kv => kv.Key); var partStart = 0; var dqParts = new List <DqPart>(); while (true) { var nextPartStart = TryFindByTitle(document, partTypeByTitle.Keys, partStart); if (nextPartStart == null) { break; } var start = document.Paragraphs[nextPartStart.Value]; dqParts.Add(new DqPart { Type = partTypeByTitle[start.GetPureText().ToLower()], Start = start, }); partStart = nextPartStart.Value; } for (var i = 0; i < dqParts.Count - 1; ++i) { var dqPart = dqParts[i]; dqPart.Paragraphs.AddRange(document.Paragraphs.GetRange(dqPart.Start.Index, dqParts[i + 1].Start.Index - dqPart.Start.Index)); } if (!dqParts.Any()) { var mainPart = new DqMainPart(); mainPart.Paragraphs.AddRange(document.Paragraphs); mainPart.Start = document.Paragraphs.First(); return(new DqStructure { MainPart = mainPart }); } var lastPart = dqParts.Last(); lastPart.Paragraphs.AddRange(document.Paragraphs.GetRange(lastPart.Start.Index, document.Paragraphs.Count - lastPart.Start.Index)); var report = new DqStructure(); report.Title = CopyContent(new DqPart { Type = PartType.Title, Start = document.Paragraphs.First() }, document, 0, dqParts.First().Start.Index); foreach (var dqPart in dqParts) { switch (dqPart.Type) { case PartType.Abstract: report.Abstracts.Add(dqPart); break; case PartType.Toc: report.Toc = dqPart; break; case PartType.Introduction: report.Introduction = dqPart; break; case PartType.Conclusion: report.Conclusion = dqPart; break; case PartType.Bibliography: report.Bibliography = dqPart; break; case PartType.Annex: report.Appendixes = dqPart; break; } } return(report); }
private DqPart CopyContent(DqPart dqPart, DqDocument dqDocument, int partStart, int?partEnd) { dqPart.Paragraphs.AddRange(dqDocument.Paragraphs.GetRange(partStart, partEnd.Value - partStart)); return(dqPart); }
public void ParseReferences(DqDocument document) { var figureReferenceParser = new DqFigureReferenceParser(); var tableReferenceParser = new DqTableReferenceParser(); var sourceReferenceParser = new DqSourceReferenceParser(); foreach (var paragraph in document.Paragraphs) { figureReferenceParser.Parse(paragraph); tableReferenceParser.Parse(paragraph); sourceReferenceParser.Parse(paragraph, document); } var structure = document.Paragraphs.SelectMany(p => p.Meta.Structure).OfType <DqNumberedElement>().ToList(); var allFigureDeclarations = structure.Where(s => s.Type == DqStructureElementType.FigureDeclaration).ToList(); var allFigureReferences = structure.Where(s => s.Type == DqStructureElementType.FigureReference).ToList(); foreach (var figureReference in allFigureReferences) { figureReference.IsMissing = allFigureDeclarations.All(fd => fd.Number != figureReference.Number); } for (var i = 1; i < allFigureDeclarations.Count; i++) { var n1 = DqNumber.TryParse(allFigureDeclarations[i - 1].Number); var n2 = DqNumber.TryParse(allFigureDeclarations[i].Number); if (n1 != null && n2 != null && n1.CompareTo(n2) >= 0) { allFigureDeclarations[i].Paragraph.Meta.Errors.Add( new DqError($"Неправильный порядок нумерации ({allFigureDeclarations[i].Number} после {allFigureDeclarations[i - 1].Number})")); } } foreach (var figureDeclaration in allFigureDeclarations) { var firstReference = allFigureReferences.FirstOrDefault(r => r.Number == figureDeclaration.Number); if (firstReference == null) { figureDeclaration.IsMissing = true; } else if (firstReference.Paragraph.Index >= figureDeclaration.Paragraph.Index) { firstReference.Paragraph.Meta.Errors.Add(new DqError("Первая ссылка должна быть до рисунка.")); } } var allTableDeclarations = structure.Where(s => s.Type == DqStructureElementType.TableDeclaration).ToList(); var allTableReferences = structure.Where(s => s.Type == DqStructureElementType.TableReference).ToList(); foreach (var tableReference in allTableReferences) { tableReference.IsMissing = allTableDeclarations.All(fd => fd.Number != tableReference.Number); } for (var i = 1; i < allTableDeclarations.Count; i++) { var n1 = DqNumber.TryParse(allTableDeclarations[i - 1].Number); var n2 = DqNumber.TryParse(allTableDeclarations[i].Number); if (n1 != null && n2 != null && n1.CompareTo(n2) >= 0) { allTableDeclarations[i].Paragraph.Meta.Errors.Add( new DqError($"Неправильный порядок нумерации ({allTableDeclarations[i].Number} после {allTableDeclarations[i - 1].Number})")); } } foreach (var tableDeclaration in allTableDeclarations) { var firstReference = allTableReferences.FirstOrDefault(r => r.Number == tableDeclaration.Number); if (firstReference == null) { tableDeclaration.IsMissing = true; } else if (firstReference.Paragraph.Index >= tableDeclaration.Paragraph.Index) { firstReference.Paragraph.Meta.Errors.Add(new DqError("Первая ссылка должна быть до таблицы.")); } } var allSourceDeclarations = structure.Where(s => s.Type == DqStructureElementType.SourceDeclaration).ToList(); var allSourceReferences = structure.Where(s => s.Type == DqStructureElementType.SourceReference).ToList(); foreach (var sourceReference in allSourceReferences) { sourceReference.IsMissing = allSourceDeclarations.All(fd => fd.Number != sourceReference.Number); } foreach (var sourceDeclaration in allSourceDeclarations) { var firstReference = allSourceReferences.FirstOrDefault(r => r.Number == sourceDeclaration.Number); if (firstReference == null) { sourceDeclaration.IsMissing = true; } } }