internal override void Process(DocxNode node, ref Paragraph paragraph) { if (node.IsNull() || IsHidden(node)) { return; } string link = node.ExtractAttributeValue(href); link = CleanUrl(link); if (Uri.IsWellFormedUriString(link, UriKind.Absolute)) { Uri uri = new Uri(link); var relationship = context.MainDocumentPart.AddHyperlinkRelationship(uri, uri.IsAbsoluteUri); var hyperLink = new Hyperlink() { History = true, Id = relationship.Id }; foreach (DocxNode child in node.Children) { if (child.IsText) { if (!IsEmptyText(child.InnerHtml)) { Run run = hyperLink.AppendChild <Run>(new Run(new Text() { Text = ClearHtml(child.InnerHtml), Space = SpaceProcessingModeValues.Preserve })); run.RunProperties = new RunProperties((new RunStyle() { Val = "Hyperlink" })); RunCreated(node, run); } } else { child.Parent = hyperLink; node.CopyExtentedStyles(child); ProcessTextElement(child); } } CreateParagraph(node, ref paragraph); paragraph.Append(hyperLink); } else { ProcessNonLinkText(node, ref paragraph); } }
public void TestBookmarkRange_HandlesNestedRuns() { OxmlDocument doc = new OxmlDocument(this.oxml); doc._paragraphs[0] = new Paragraph(); Run run1 = new Run(); Run run2 = new Run(); Hyperlink hyp = new Hyperlink(); hyp.AppendChild<Run>(run2); doc._paragraphs[0].AppendChild<Run>(run1); doc._paragraphs[0].AppendChild<Hyperlink>(hyp); run1.AppendChild<Text>(new Text("This is a ")); run2.AppendChild<Text>(new Text("test string.")); doc.BookmarkRange(new OxmlRange_Accessor(0, 4, 15), "testbookmark"); Assert.AreEqual(4, doc._paragraphs[0].Descendants<Run>().ToArray().Length); Assert.AreEqual(1, doc._paragraphs[0].Descendants<BookmarkStart>().ToArray().Length); Assert.AreEqual(1, doc._paragraphs[0].Descendants<BookmarkEnd>().ToArray().Length); }
IEnumerable <OpenXmlElement> Span2Elements(MarkdownSpan md, bool nestedSpan = false) { reporter.CurrentSpan = md; if (md.IsLiteral) { var mdl = md as MarkdownSpan.Literal; var s = MarkdownUtilities.UnescapeLiteral(mdl); foreach (var r in Literal2Elements(s, nestedSpan)) { yield return(r); } } else if (md.IsStrong || md.IsEmphasis) { IEnumerable <MarkdownSpan> spans = (md.IsStrong ? (md as MarkdownSpan.Strong).body : (md as MarkdownSpan.Emphasis).body); // Workaround for https://github.com/tpetricek/FSharp.formatting/issues/389 - the markdown parser // turns *this_is_it* into a nested Emphasis["this", Emphasis["is"], "it"] instead of Emphasis["this_is_it"] // What we'll do is preprocess it into Emphasis["this_is_it"] if (md.IsEmphasis) { var spans2 = spans.Select(s => { var _ = ""; if (s.IsEmphasis) { s = (s as MarkdownSpan.Emphasis).body.Single(); _ = "_"; } if (s.IsLiteral) { return(_ + (s as MarkdownSpan.Literal).text + _); } reporter.Error("MD15", $"something odd inside emphasis '{s.GetType().Name}' - only allowed emphasis and literal"); return(""); }); spans = new List <MarkdownSpan>() { MarkdownSpan.NewLiteral(string.Join("", spans2), FSharpOption <MarkdownRange> .None) }; } // Convention is that ***term*** is used to define a term. // That's parsed as Strong, which contains Emphasis, which contains one Literal string literal = null; TermRef termdef = null; if (!nestedSpan && md.IsStrong && spans.Count() == 1 && spans.First().IsEmphasis) { var spans2 = (spans.First() as MarkdownSpan.Emphasis).body; if (spans2.Count() == 1 && spans2.First().IsLiteral) { literal = (spans2.First() as MarkdownSpan.Literal).text; termdef = new TermRef(literal, reporter.Location); if (context.Terms.ContainsKey(literal)) { var def = context.Terms[literal]; reporter.Warning("MD16", $"Term '{literal}' defined a second time"); reporter.Warning("MD16b", $"Here was the previous definition of term '{literal}'", def.Loc); } else { context.Terms.Add(literal, termdef); context.TermKeys.Clear(); } } } // Convention inside our specs is that emphasis only ever contains literals, // either to emphasis some human-text or to refer to an ANTLR-production ProductionRef prodref = null; if (!nestedSpan && md.IsEmphasis && (spans.Count() != 1 || !spans.First().IsLiteral)) { reporter.Error("MD17", $"something odd inside emphasis"); } if (!nestedSpan && md.IsEmphasis && spans.Count() == 1 && spans.First().IsLiteral) { literal = (spans.First() as MarkdownSpan.Literal).text; prodref = productions.FirstOrDefault(pr => pr.Names.Contains(literal)); context.Italics.Add(new ItalicUse(literal, prodref != null ? ItalicUse.ItalicUseKind.Production : ItalicUse.ItalicUseKind.Italic, reporter.Location)); } if (prodref != null) { var props = new RunProperties(new Color { Val = "6A5ACD" }, new Underline { Val = UnderlineValues.Single }); var run = new Run(new Text(literal) { Space = SpaceProcessingModeValues.Preserve }) { RunProperties = props }; var link = new Hyperlink(run) { Anchor = prodref.BookmarkName }; yield return(link); } else if (termdef != null) { context.MaxBookmarkId.Value += 1; yield return(new BookmarkStart { Name = termdef.BookmarkName, Id = context.MaxBookmarkId.Value.ToString() }); var props = new RunProperties(new Italic(), new Bold()); yield return(new Run(new Text(literal) { Space = SpaceProcessingModeValues.Preserve }) { RunProperties = props }); yield return(new BookmarkEnd { Id = context.MaxBookmarkId.Value.ToString() }); } else { foreach (var e in Spans2Elements(spans, true)) { var style = (md.IsStrong ? new Bold() as OpenXmlElement : new Italic()); var run = e as Run; if (run != null) { run.InsertAt(new RunProperties(style), 0); } yield return(e); } } } else if (md.IsInlineCode) { var mdi = md as MarkdownSpan.InlineCode; var code = mdi.code; var txt = new Text(BugWorkaroundDecode(code)) { Space = SpaceProcessingModeValues.Preserve }; var props = new RunProperties(new RunStyle { Val = "CodeEmbedded" }); var run = new Run(txt) { RunProperties = props }; yield return(run); } else if (md.IsLatexInlineMath) { var latex = md as MarkdownSpan.LatexInlineMath; var code = latex.code; // TODO: Make this look nice - if we actually need it. It's possible that it's only present // before subscripts are replaced. var txt = new Text(BugWorkaroundDecode(code)) { Space = SpaceProcessingModeValues.Preserve }; var props = new RunProperties(new RunStyle { Val = "CodeEmbedded" }); var run = new Run(txt) { RunProperties = props }; yield return(run); } else if (md.IsDirectLink || md.IsIndirectLink) { IEnumerable <MarkdownSpan> spans; string url = "", alt = ""; if (md.IsDirectLink) { var mddl = md as MarkdownSpan.DirectLink; spans = mddl.body; url = mddl.link; alt = mddl.title.Option(); } else { var mdil = md as MarkdownSpan.IndirectLink; var original = mdil.original; var id = mdil.key; spans = mdil.body; if (markdownDocument.DefinedLinks.ContainsKey(id)) { url = markdownDocument.DefinedLinks[id].Item1; alt = markdownDocument.DefinedLinks[id].Item2.Option(); } } var anchor = ""; if (spans.Count() == 1 && spans.First().IsLiteral) { anchor = MarkdownUtilities.UnescapeLiteral(spans.First() as MarkdownSpan.Literal); } else if (spans.Count() == 1 && spans.First().IsInlineCode) { anchor = (spans.First() as MarkdownSpan.InlineCode).code; } else { reporter.Error("MD18", $"Link anchor must be Literal or InlineCode, not '{md.GetType().Name}'"); yield break; } if (sections.ContainsKey(url)) { var section = sections[url]; // If we're linking to something with a section number, we know what the link text should be. // (There are a few links that aren't to numbered sections, e.g. to "Annex C".) if (section.Number is object) { var expectedAnchor = "§" + section.Number; if (anchor != expectedAnchor) { reporter.Warning("MD19", $"Mismatch: link anchor is '{anchor}', should be '{expectedAnchor}'"); } } var txt = new Text(anchor) { Space = SpaceProcessingModeValues.Preserve }; var run = new Hyperlink(new Run(txt)) { Anchor = section.BookmarkName }; yield return(run); } else if (url.StartsWith("http:") || url.StartsWith("https:")) { var style = new RunStyle { Val = "Hyperlink" }; var hyperlink = new Hyperlink { DocLocation = url, Tooltip = alt }; foreach (var element in Spans2Elements(spans)) { var run = element as Run; if (run != null) { run.InsertAt(new RunProperties(style), 0); } hyperlink.AppendChild(run); } yield return(hyperlink); } else { // TODO: Make this report an error unconditionally once the subscript "latex-like" Markdown is removed. if (url != "") { reporter.Error("MD28", $"Hyperlink url '{url}' unrecognized - not a recognized heading, and not http"); } } } else if (md.IsHardLineBreak) { // I've only ever seen this arise from dodgy markdown parsing, so I'll ignore it... } else { reporter.Error("MD20", $"Unrecognized markdown element {md.GetType().Name}"); yield return(new Run(new Text($"[{md.GetType().Name}]"))); } }
IEnumerable <OpenXmlElement> Span2Elements(MarkdownSpan md) { if (md.IsLiteral) { var mdl = md as MarkdownSpan.Literal; var s = mdunescape(mdl); yield return(new Run(new Text(s) { Space = SpaceProcessingModeValues.Preserve })); } else if (md.IsStrong || md.IsEmphasis) { IEnumerable <MarkdownSpan> spans = (md.IsStrong ? (md as MarkdownSpan.Strong).Item : (md as MarkdownSpan.Emphasis).Item); // Workaround for https://github.com/tpetricek/FSharp.formatting/issues/389 - the markdown parser // turns *this_is_it* into a nested Emphasis["this", Emphasis["is"], "it"] instead of Emphasis["this_is_it"] // What we'll do is preprocess it into Emphasis["this", "_", "is" "_", "it"] if (md.IsEmphasis) { var spans2 = new List <MarkdownSpan>(); foreach (var s in spans) { if (!s.IsEmphasis) { spans2.Add(s); continue; } spans2.Add(MarkdownSpan.NewLiteral("_")); foreach (var ss in (s as MarkdownSpan.Emphasis).Item) { spans2.Add(ss); } spans2.Add(MarkdownSpan.NewLiteral("_")); } spans = spans2; } foreach (var e in Spans2Elements(spans)) { var style = (md.IsStrong ? new Bold() as OpenXmlElement : new Italic()); var run = e as Run; if (run != null) { run.InsertAt(new RunProperties(style), 0); } yield return(e); } } else if (md.IsInlineCode) { var mdi = md as MarkdownSpan.InlineCode; var code = mdi.Item; var txt = new Text(BugWorkaroundDecode(code)) { Space = SpaceProcessingModeValues.Preserve }; var props = new RunProperties(new RunStyle { Val = "CodeEmbedded" }); var run = new Run(txt) { RunProperties = props }; yield return(run); } else if (md.IsDirectLink || md.IsIndirectLink) { IEnumerable <MarkdownSpan> spans; string url = "", alt = ""; if (md.IsDirectLink) { var mddl = md as MarkdownSpan.DirectLink; spans = mddl.Item1; url = mddl.Item2.Item1; alt = mddl.Item2.Item2.Option(); } else { var mdil = md as MarkdownSpan.IndirectLink; var original = mdil.Item2; var id = mdil.Item3; spans = mdil.Item1; if (mddoc.DefinedLinks.ContainsKey(id)) { url = mddoc.DefinedLinks[id].Item1; alt = mddoc.DefinedLinks[id].Item2.Option(); } } var anchor = ""; if (spans.Count() == 1 && spans.First().IsLiteral) { anchor = mdunescape(spans.First() as MarkdownSpan.Literal); } else if (spans.Count() == 1 && spans.First().IsInlineCode) { anchor = (spans.First() as MarkdownSpan.InlineCode).Item; } else { throw new NotImplementedException("Link anchor must be Literal or InlineCode, not " + md.ToString()); } if (sections.ContainsKey(url)) { var section = sections[url]; if (anchor != section.Title) { throw new Exception($"Mismatch: link anchor is '{anchor}', should be '{section.Title}'"); } var txt = new Text("§" + section.Number) { Space = SpaceProcessingModeValues.Preserve }; var run = new Hyperlink(new Run(txt)) { Anchor = section.BookmarkName }; yield return(run); } else if (url.StartsWith("http:") || url.StartsWith("https:")) { var style = new RunStyle { Val = "Hyperlink" }; var hyperlink = new Hyperlink { DocLocation = url, Tooltip = alt }; foreach (var element in Spans2Elements(spans)) { var run = element as Run; if (run != null) { run.InsertAt(new RunProperties(style), 0); } hyperlink.AppendChild(run); } yield return(hyperlink); } else { throw new Exception("Absent hyperlink in " + md.ToString()); } } else if (md.IsHardLineBreak) { // I've only ever seen this arise from dodgy markdown parsing, so I'll ignore it... } else { yield return(new Run(new Text($"[{md.GetType().Name}]"))); } }
internal override void Process(DocxNode node, ref Paragraph paragraph) { if (node.IsNull() || IsHidden(node)) { return; } string link = node.ExtractAttributeValue(href); link = CleanUrl(link); if (Uri.IsWellFormedUriString(link, UriKind.Absolute)) { Uri uri = new Uri(link); var relationship = context.MainDocumentPart.AddHyperlinkRelationship(uri, uri.IsAbsoluteUri); var hyperLink = new Hyperlink() { History = true, Id = relationship.Id }; foreach (DocxNode child in node.Children) { if (child.IsText) { if (!IsEmptyText(child.InnerHtml)) { Run run = hyperLink.AppendChild<Run>(new Run(new Text() { Text = ClearHtml(child.InnerHtml), Space = SpaceProcessingModeValues.Preserve })); run.RunProperties = new RunProperties((new RunStyle() { Val = "Hyperlink" })); RunCreated(node, run); } } else { child.Parent = hyperLink; node.CopyExtentedStyles(child); ProcessTextElement(child); } } CreateParagraph(node, ref paragraph); paragraph.Append(hyperLink); } else { ProcessNonLinkText(node, ref paragraph); } }