Exemple #1
0
        public void Analyze(DqDocument document)
        {
            var parts = new[]
            {
                document.Structure.Toc,
                document.Structure.Introduction,
                document.Structure.MainPart,
                document.Structure.Conclusion,
                document.Structure.Bibliography,
                document.Structure.Appendixes,
            }.Concat(document.Structure.Abstracts).Where(p => p != null);

            foreach (var dqPart in parts)
            {
                if (dqPart is DqMainPart dqMainPart)
                {
                    AnalyzeMainPart(dqMainPart, document);
                }
                else
                {
                    AnalyzeNonMainPart(dqPart, document);
                }
                FindSpaces(dqPart);
            }
        }
Exemple #2
0
        private void AnalyzeNonMainPart(DqPart dqPart, DqDocument dqDocument)
        {
            var centeredParts = new[] { PartType.Bibliography, PartType.Abstract, PartType.Introduction, PartType.Conclusion, PartType.Toc };

            if (!centeredParts.Contains(dqPart.Type))
            {
                return;
            }

            var dqParagraph = dqPart.Start;

            if (!dqParagraph.Text.StartsWith("{PageBreak}") && !dqDocument.Paragraphs[dqParagraph.Index - 1].Text.EndsWith("{PageBreak}"))
            {
                dqParagraph.Meta.Errors.Add(new DqError("Перед разделом отсутсвует разрыв страницы."));
            }

            if (dqParagraph.Style.Aligment != DqAligment.Center)
            {
                dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Заголовки разделов должны быть выровнены по центру."));
            }
            else if (dqParagraph.Style.Indent != 0)
            {
                dqParagraph.Meta.Errors.Add(new DqAlignmentError($"При выравнивании по центру должен отсутвовать абзацный отступ."));
            }

            if (string.Equals(dqParagraph.GetPureText(), "список использованной литературы", StringComparison.OrdinalIgnoreCase))
            {
                dqParagraph.Meta.Errors.Add(new DqError($"Нормативное название части — «Список использованных источников»."));
            }
        }
Exemple #3
0
        public void Analyze2(DqDocument dqDocument)
        {
            if (dqDocument.Structure.Bibliography == null)
            {
                return;
            }
            foreach (var content in dqDocument.Structure.Bibliography.Paragraphs)
            {
                var sources = content.Meta.SourceDeclarations.OfType <DqSource>().ToList();
                if (sources.Count == 0)
                {
                    continue;
                }

                Recognize(sources);
                var source = sources.First();
                if (source.SourceType == SourceType.Unknown)
                {
                    content.Meta.Errors.Add(new DqError("Источник имеет неверный формат"));
                }
                else if (source.SourceType == SourceType.WebUnknown)
                {
                    content.Meta.Errors.Add(new DqError("Web-источник имеет неверный формат"));
                }
                else if (source.Formatted != source.Paragraph.Text)
                {
                    content.Meta.Errors.Add(new DqError($"Ожидаемый формат данных: {source.Formatted}"));
                }
            }
        }
Exemple #4
0
        public IList <Token> GetHeaders(DqDocument document)
        {
            if (document.Structure.Introduction == null)
            {
                return(new List <Token>());
            }

            var dqParagraphs = document.Structure.Introduction.Paragraphs
                               .Skip(1)
                               .Where(IsNumberedParagraph)
                               .ToList();

            var list            = new List <Token>();
            var mainPartHeaders = new List <Token>();

            foreach (var dqParagraph in dqParagraphs)
            {
                if (!list.Any())
                {
                    list.Add(CreateToken(dqParagraph));
                    continue;
                }

                if (dqParagraph.Index - list.Last().Paragraph.Index == 1 && !IsChapter(dqParagraph))
                {
                    list.Add(new Token(dqParagraph, GetLevel(dqParagraph)));
                }
                else
                {
                    var isGreater = true;
                    for (var i = 1; i < list.Count; ++i)
                    {
                        if (list[i - 1].Level >= list[i].Level)
                        {
                            isGreater = false;
                        }
                    }

                    if (isGreater)
                    {
                        mainPartHeaders.AddRange(list);
                    }

                    list.Clear();
                    list.Add(new Token(dqParagraph, GetLevel(dqParagraph)));
                }
            }

            if (list.Count == 1)
            {
                mainPartHeaders.AddRange(list);
            }

            foreach (var token in mainPartHeaders.Concat(GetNonMainHeaders(document)))
            {
                token.Paragraph.Meta.IsHeader = true;
            }

            return(mainPartHeaders);
        }
        public void Parse(DqParagraph paragraph, DqDocument dqDocument)
        {
            var m = Regex.Matches(paragraph.Text, @"[^]]\[(\d+)(?:,\s*c(?:тр)\.\d+)?\]", RegexOptions.IgnoreCase);

            if (m.Count != 0)
            {
                paragraph.Meta.Structure.AddRange(m.Cast <Match>().Select(mm => mm.Groups[1].Value).Select(t => new DqNumberedElement(paragraph, DqStructureElementType.SourceReference)
                {
                    Number = t
                }));
            }

            var bibliographyPart = dqDocument.Structure.Bibliography;

            if (bibliographyPart == null || !bibliographyPart.Paragraphs.Contains(paragraph))
            {
                return;
            }

            var match = Regex.Match(paragraph.Text, @"^\s*(\d+)", RegexOptions.IgnoreCase);

            if (!match.Success)
            {
                return;
            }
            paragraph.Meta.Structure.Add(new DqSource(paragraph, DqStructureElementType.SourceDeclaration)
            {
                Number = match.Groups[1].Value
            });
        }
Exemple #6
0
        private int?TryFindByTitle(DqDocument document, IReadOnlyCollection <string> titles, int startIndex)
        {
            for (var i = startIndex + 1; i < document.Paragraphs.Count; ++i)
            {
                var paragraph = document.Paragraphs[i];
                var title     = titles.FirstOrDefault(t => string.Equals(paragraph.GetPureText(), t, StringComparison.OrdinalIgnoreCase));
                if (title != null)
                {
                    return(i);
                }
            }

            return(null);
        }
Exemple #7
0
        private void AnalyzeMainPart(DqMainPart dqMainPart, DqDocument dqDocument)
        {
            foreach (var dqParagraph in dqMainPart.Paragraphs)
            {
                AnalyzeMainPartParagraph(dqParagraph, dqDocument);
            }

            foreach (var chapter in dqMainPart.Children)
            {
                var dqParagraph = chapter.Start;
                if (!dqParagraph.Text.StartsWith("{PageBreak}") && !dqDocument.Paragraphs[dqParagraph.Index - 1].Text.EndsWith("{PageBreak}"))
                {
                    dqParagraph.Meta.Errors.Add(new DqError("Перед разделом отсутсвует разрыв страницы."));
                }
            }
        }
Exemple #8
0
        public void Analyze(DqDocument dqDocument)
        {
            var bibliography = dqDocument.Structure.Bibliography;

            if (bibliography == null)
            {
                return;
            }

            var sources = bibliography.Paragraphs.Skip(1).Where(p => p.Meta.SourceDeclarations.Any());

            foreach (var dqParagraph in sources)
            {
                Analyze(dqParagraph);
            }
        }
Exemple #9
0
        private IEnumerable <Token> GetNonMainHeaders(DqDocument dqDocument)
        {
            var report = dqDocument.Structure;

            return(report.Abstracts.Concat(new[]
            {
                report.Toc,
                report.Introduction,
                report.Conclusion,
                report.Bibliography,
                report.Appendixes,
            })
                   .Select(p => p?.Paragraphs.FirstOrDefault())
                   .Where(p => p != null)
                   .Select(CreateToken));
        }
Exemple #10
0
        public DqDocument Parse(string path)
        {
            var tempPath = Path.GetTempFileName();

            File.Copy(path, tempPath, overwrite: true);

            try
            {
                using (var doc = WordprocessingDocument.Open(tempPath, isEditable: false))
                {
                    var dqDocument = new DqDocument();

                    var fontScheme = _dqStyleParser.GetFontScheme(doc);
                    dqDocument.NumberingTable = _dqNumberingParser.ParseNumberingTable(doc);
                    dqDocument.StyleTable     = _dqStyleParser.ParseStyleTable(doc);

                    var body = doc.MainDocumentPart.Document.Body;
                    dqDocument.Sections.AddRange(GetSections(body));

                    List <DqParagraph> paragraphs = new List <DqParagraph>();
                    foreach (var element in body.Elements())
                    {
                        if (element is Paragraph p)
                        {
                            paragraphs.Add(Convert(p, dqDocument.StyleTable, fontScheme, dqDocument.NumberingTable));
                        }
                        else if (element is DocumentFormat.OpenXml.Wordprocessing.Table)
                        {
                            paragraphs.Add(new DqParagraph("{TBL}", dqDocument.StyleTable.Paragraph.Default));
                        }
                    }

                    dqDocument.Paragraphs.AddRange(paragraphs.Where(p => !string.IsNullOrWhiteSpace(p.Text)));

                    for (int i = 0; i < dqDocument.Paragraphs.Count; i++)
                    {
                        dqDocument.Paragraphs[i].Index = i;
                    }

                    return(dqDocument);
                }
            }
            finally
            {
                File.Delete(tempPath);
            }
        }
Exemple #11
0
        public void RestoreNumbering(DqDocument document)
        {
            var numberedParagraphs = document.Paragraphs.Where(IsNumbered).ToList();

            var countersById = numberedParagraphs
                               .Select(p => p.Style.Numbering?.Id)
                               .Distinct()
                               .ToDictionary(k => k, k => Enumerable.Repeat(element: 0, count: 9).ToList());

            foreach (var paragraph in numberedParagraphs)
            {
                if (paragraph.Style.OutlineLevel < paragraph.Style.Numbering?.Levels.Count)
                {
                    UpdateCounters(paragraph, countersById[paragraph.Style.Numbering.Id]);

                    paragraph.Number = CalculateNumbers(paragraph, countersById[paragraph.Style.Numbering.Id]);
                    paragraph.Text   = paragraph.Number + " " + paragraph.Text;
                }
            }
        }
Exemple #12
0
        private void AnalyzeMainPartParagraph(DqParagraph dqParagraph, DqDocument dqDocument)
        {
            if (string.IsNullOrWhiteSpace(dqParagraph.Text.Replace("{IMG}", "").Replace("{TBL}", "").Replace("{PageBreak}", "")))
            {
                return;
            }

            if (dqParagraph.Meta.FigureDeclarations.Any())
            {
                if (dqParagraph.Style.FontSize > Settings.Default.ExpectedFontSize)
                {
                    dqParagraph.Meta.Errors.Add(new DqFontSizeError($"Неверный размер шрифта ({dqParagraph.Style.FontSize} пт). Подписи риcунков должны использовать шрифт не больше основного ({Settings.Default.ExpectedFontSize} пт)."));
                }
            }
            else if (dqParagraph.Meta.TableDeclarations.Any())
            {
                if (dqParagraph.Style.FontSize > Settings.Default.ExpectedFontSize)
                {
                    dqParagraph.Meta.Errors.Add(new DqFontSizeError($"Неверный размер шрифта ({dqParagraph.Style.FontSize} пт). Заголовки таблиц должны использовать шрифт не больше основного ({Settings.Default.ExpectedFontSize} пт)."));
                }
            }
            else if (dqParagraph.Meta.IsHeader)
            {
                if (dqParagraph.Style.FontSize < Settings.Default.ExpectedFontSize)
                {
                    dqParagraph.Meta.Errors.Add(new DqFontSizeError($"Неверный размер шрифта ({dqParagraph.Style.FontSize} пт). Заголовки должны использовать шрифт не меньше основного ({Settings.Default.ExpectedFontSize} пт)."));
                }

                if (dqParagraph.GetPureText().Trim().EndsWith("."))
                {
                    dqParagraph.Meta.Errors.Add(new DqError($"В конце заголовков точка не ставится."));
                }

                var number = Regex.Match(dqParagraph.GetPureText().TrimStart(), @"^((?:\d+\.)*\d+?)");
                if (number.Success && number.Value.EndsWith("."))
                {
                    dqParagraph.Meta.Errors.Add(new DqError($"В конце номера (под)раздела точка не ставится."));
                }
            }

            if (dqParagraph.Style.FontName != Settings.Default.ExpectedFontName)
            {
                dqParagraph.Meta.Errors.Add(new DqFontError($"Неверный шрифт ({dqParagraph.Style.FontName}). Иcпользуйте {Settings.Default.ExpectedFontName}."));
            }

            if (dqParagraph.Meta.IsHeader)
            {
                if (dqParagraph.Style.Aligment == DqAligment.Center || dqParagraph.Style.Aligment == DqAligment.Right)
                {
                    dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Заголовки главной части должны быть выровнены по левому краю."));
                }
                else if (dqParagraph.Style.Indent == 0)
                {
                    dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Отсутвует абзацный отступ."));
                }
            }
            else if (dqParagraph.Meta.FigureDeclarations.Any())
            {
                if (dqParagraph.Style.Aligment != DqAligment.Center)
                {
                    dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Подпись рисунка должна быть выровнена по центру."));
                }
                else if (dqParagraph.Style.Indent != 0)
                {
                    dqParagraph.Meta.Errors.Add(new DqAlignmentError($"При выравнивании по центру должен отсутвовать абзацный отступ."));
                }
            }
            else if (dqParagraph.Meta.TableDeclarations.Any())
            {
                if (dqParagraph.Style.Aligment == DqAligment.Center || dqParagraph.Style.Aligment == DqAligment.Right)
                {
                    dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Заголовок таблицы должен быть выровнен по левому краю."));
                }
                else if (dqParagraph.Style.Indent != 0)
                {
                    dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Заголовок таблицы помещают без абзацного отступа."));
                }
            }
            else if (!dqParagraph.Meta.IsHeader)
            {
                if (dqParagraph.Style.Indent == 0)
                {
                    dqParagraph.Meta.Errors.Add(new DqAlignmentError($"Отсутствует абзацный отступ."));
                }
            }

            if (dqParagraph.Style.SpacingBetweenLines != Settings.Default.ExpectedSpacingBetweenLines && !dqParagraph.Meta.IsHeader && !dqParagraph.Meta.FigureDeclarations.Any() && !dqParagraph.Meta.TableDeclarations.Any())
            {
                dqParagraph.Meta.Errors.Add(new DqError($"Неверный междустрочный интервал ({dqParagraph.Style.SpacingBetweenLines}). Ожидается {Settings.Default.ExpectedSpacingBetweenLines}-ый интервал."));
            }
        }
Exemple #13
0
        public DqStructure PrimaryParse(DqDocument document)
        {
            var partTypeByTitle = TitlesByPartType
                                  .SelectMany(kv => kv.Value.Select(v => (kv.Key, v)))
                                  .ToDictionary(kv => kv.v, kv => kv.Key);

            var partStart = 0;

            var dqParts = new List <DqPart>();

            while (true)
            {
                var nextPartStart = TryFindByTitle(document, partTypeByTitle.Keys, partStart);
                if (nextPartStart == null)
                {
                    break;
                }

                var start = document.Paragraphs[nextPartStart.Value];
                dqParts.Add(new DqPart
                {
                    Type  = partTypeByTitle[start.GetPureText().ToLower()],
                    Start = start,
                });

                partStart = nextPartStart.Value;
            }

            for (var i = 0; i < dqParts.Count - 1; ++i)
            {
                var dqPart = dqParts[i];
                dqPart.Paragraphs.AddRange(document.Paragraphs.GetRange(dqPart.Start.Index, dqParts[i + 1].Start.Index - dqPart.Start.Index));
            }

            if (!dqParts.Any())
            {
                var mainPart = new DqMainPart();
                mainPart.Paragraphs.AddRange(document.Paragraphs);
                mainPart.Start = document.Paragraphs.First();
                return(new DqStructure {
                    MainPart = mainPart
                });
            }

            var lastPart = dqParts.Last();

            lastPart.Paragraphs.AddRange(document.Paragraphs.GetRange(lastPart.Start.Index, document.Paragraphs.Count - lastPart.Start.Index));

            var report = new DqStructure();

            report.Title = CopyContent(new DqPart
            {
                Type  = PartType.Title,
                Start = document.Paragraphs.First()
            }, document, 0, dqParts.First().Start.Index);

            foreach (var dqPart in dqParts)
            {
                switch (dqPart.Type)
                {
                case PartType.Abstract:
                    report.Abstracts.Add(dqPart);
                    break;

                case PartType.Toc:
                    report.Toc = dqPart;
                    break;

                case PartType.Introduction:
                    report.Introduction = dqPart;
                    break;

                case PartType.Conclusion:
                    report.Conclusion = dqPart;
                    break;

                case PartType.Bibliography:
                    report.Bibliography = dqPart;
                    break;

                case PartType.Annex:
                    report.Appendixes = dqPart;
                    break;
                }
            }

            return(report);
        }
Exemple #14
0
 private DqPart CopyContent(DqPart dqPart, DqDocument dqDocument, int partStart, int?partEnd)
 {
     dqPart.Paragraphs.AddRange(dqDocument.Paragraphs.GetRange(partStart, partEnd.Value - partStart));
     return(dqPart);
 }
Exemple #15
0
        public void ParseReferences(DqDocument document)
        {
            var figureReferenceParser = new DqFigureReferenceParser();
            var tableReferenceParser  = new DqTableReferenceParser();
            var sourceReferenceParser = new DqSourceReferenceParser();

            foreach (var paragraph in document.Paragraphs)
            {
                figureReferenceParser.Parse(paragraph);
                tableReferenceParser.Parse(paragraph);
                sourceReferenceParser.Parse(paragraph, document);
            }

            var structure = document.Paragraphs.SelectMany(p => p.Meta.Structure).OfType <DqNumberedElement>().ToList();

            var allFigureDeclarations = structure.Where(s => s.Type == DqStructureElementType.FigureDeclaration).ToList();
            var allFigureReferences   = structure.Where(s => s.Type == DqStructureElementType.FigureReference).ToList();

            foreach (var figureReference in allFigureReferences)
            {
                figureReference.IsMissing = allFigureDeclarations.All(fd => fd.Number != figureReference.Number);
            }

            for (var i = 1; i < allFigureDeclarations.Count; i++)
            {
                var n1 = DqNumber.TryParse(allFigureDeclarations[i - 1].Number);
                var n2 = DqNumber.TryParse(allFigureDeclarations[i].Number);
                if (n1 != null && n2 != null && n1.CompareTo(n2) >= 0)
                {
                    allFigureDeclarations[i].Paragraph.Meta.Errors.Add(
                        new DqError($"Неправильный порядок нумерации ({allFigureDeclarations[i].Number} после {allFigureDeclarations[i - 1].Number})"));
                }
            }

            foreach (var figureDeclaration in allFigureDeclarations)
            {
                var firstReference = allFigureReferences.FirstOrDefault(r => r.Number == figureDeclaration.Number);
                if (firstReference == null)
                {
                    figureDeclaration.IsMissing = true;
                }
                else if (firstReference.Paragraph.Index >= figureDeclaration.Paragraph.Index)
                {
                    firstReference.Paragraph.Meta.Errors.Add(new DqError("Первая ссылка должна быть до рисунка."));
                }
            }

            var allTableDeclarations = structure.Where(s => s.Type == DqStructureElementType.TableDeclaration).ToList();
            var allTableReferences   = structure.Where(s => s.Type == DqStructureElementType.TableReference).ToList();

            foreach (var tableReference in allTableReferences)
            {
                tableReference.IsMissing = allTableDeclarations.All(fd => fd.Number != tableReference.Number);
            }

            for (var i = 1; i < allTableDeclarations.Count; i++)
            {
                var n1 = DqNumber.TryParse(allTableDeclarations[i - 1].Number);
                var n2 = DqNumber.TryParse(allTableDeclarations[i].Number);
                if (n1 != null && n2 != null && n1.CompareTo(n2) >= 0)
                {
                    allTableDeclarations[i].Paragraph.Meta.Errors.Add(
                        new DqError($"Неправильный порядок нумерации ({allTableDeclarations[i].Number} после {allTableDeclarations[i - 1].Number})"));
                }
            }

            foreach (var tableDeclaration in allTableDeclarations)
            {
                var firstReference = allTableReferences.FirstOrDefault(r => r.Number == tableDeclaration.Number);
                if (firstReference == null)
                {
                    tableDeclaration.IsMissing = true;
                }
                else if (firstReference.Paragraph.Index >= tableDeclaration.Paragraph.Index)
                {
                    firstReference.Paragraph.Meta.Errors.Add(new DqError("Первая ссылка должна быть до таблицы."));
                }
            }

            var allSourceDeclarations = structure.Where(s => s.Type == DqStructureElementType.SourceDeclaration).ToList();
            var allSourceReferences   = structure.Where(s => s.Type == DqStructureElementType.SourceReference).ToList();

            foreach (var sourceReference in allSourceReferences)
            {
                sourceReference.IsMissing = allSourceDeclarations.All(fd => fd.Number != sourceReference.Number);
            }

            foreach (var sourceDeclaration in allSourceDeclarations)
            {
                var firstReference = allSourceReferences.FirstOrDefault(r => r.Number == sourceDeclaration.Number);
                if (firstReference == null)
                {
                    sourceDeclaration.IsMissing = true;
                }
            }
        }