public void ManyLinksTest() { string markdown = @" This is a link to my [Web site][1]. And some more text And here is some more [text][2]. Here is [another link][3]. Done. [1]: http://west-wind.com [2]: http://weblog.west-wind.com [3]: http://markdownmonster.west-wind.com"; var result = MarkdownUtilities.AddLinkReference(markdown, new SelectionRange { StartColumn = 0, StartRow = 3, EndColumn = 8, EndRow = 3 }, "https://websurge.west-wind.com"); Console.WriteLine(result.Markdown); Assert.IsNotNull(result); }
public void ParseMicrosoftDocsUrl() { var url = "https://docs.microsoft.com/en-us/dotnet/csharp/getting-started/"; var fixedupUrl = MarkdownUtilities.ParseMarkdownUrl(url); Assert.IsTrue(fixedupUrl.Contains("index.md", StringComparison.InvariantCultureIgnoreCase)); Console.WriteLine(fixedupUrl); }
public void ParseGithubUrl() { var url = "https://github.com/RickStrahl/MarkdownMonster"; var fixedupUrl = MarkdownUtilities.ParseMarkdownUrl(url); Assert.IsTrue(fixedupUrl.Contains("readme.md", StringComparison.InvariantCultureIgnoreCase)); Console.WriteLine(fixedupUrl); }
public void ParseGithubUrlAlreadyRaw() { var url = "https://github.com/RickStrahl/MarkdownMonster/blob/master/Todo.md"; var fixedupUrl = MarkdownUtilities.ParseMarkdownUrl(url); Console.WriteLine(fixedupUrl); // shouldn't be updated as that's already a raw url with different syntax Assert.IsTrue(fixedupUrl.Contains("/raw/") || fixedupUrl.Contains("raw.githubusercontent.com")); }
/// <summary> /// Retrieves Weblog Metadata and Post Data from a Jekyll post on disk /// </summary> /// <param name="jekyllPostFilename">Full path to a Jekyll post on disk</param> /// <returns></returns> private WeblogPostMetadata GetPostMetaDataFromFile(string jekyllPostFilename, Post post) { string content = null; try { content = File.ReadAllText(jekyllPostFilename); if (string.IsNullOrEmpty(content)) { return(null); } } catch { return(null); } var meta = WeblogPostMetadata.GetPostYamlConfigFromMarkdown(content, post); if (meta == null) { return(null); } string dateString = MarkdownUtilities.ExtractYamlValue(meta.YamlFrontMatter, "date"); DateTime date; if (!DateTime.TryParse(dateString, out date)) { dateString = jekyllPostFilename.Substring(0, 10); if (!DateTime.TryParse(dateString, out date)) { date = DateTime.Now.Date; } } post.DateCreated = date; meta.PostDate = date; content = Markdown.ToPlainText(meta.MarkdownBody); post.mt_excerpt = StringUtils.TextAbstract(content, 180); return(meta); }
public void AddFirstLinkTest() { string markdown = @" This is a link to my Web site. And some more text And here is some more text. Here is link. Done. "; var result = MarkdownUtilities.AddLinkReference(markdown, new SelectionRange { StartColumn = 0, StartRow = 3, EndColumn = 8, EndRow = 3 }, "https://websurge.west-wind.com"); Console.WriteLine(result.SelectionLength); Console.WriteLine(result.Markdown); Assert.IsNotNull(result); }
/// <summary> /// Strips the Markdown Meta data from the message and populates /// the post structure with the meta data values. /// </summary> /// <param name="markdown">The raw markdown document with YAML header (optional)</param> /// <param name="post">Optional empty <seealso cref="Post"/> object that is filled with the meta data in.</param> public static WeblogPostMetadata GetPostYamlConfigFromMarkdown(string markdown, Post post = null) { var meta = new WeblogPostMetadata() { RawMarkdownBody = markdown, MarkdownBody = markdown, WeblogName = WeblogAddinConfiguration.Current.LastWeblogAccessed, CustomFields = new Dictionary <string, CustomField>() }; if (string.IsNullOrEmpty(markdown)) { return(meta); } markdown = markdown.Trim(); if (!markdown.StartsWith("---\n") && !markdown.StartsWith("---\r")) { return(meta); } // YAML with --- so we can replace string extractedYaml = MarkdownUtilities.ExtractFrontMatter(markdown, false); if (string.IsNullOrEmpty(extractedYaml)) { return(meta); } // just the YAML text var yaml = extractedYaml.Trim('-', ' ', '\r', '\n'); var input = new StringReader(yaml); var deserializer = new DeserializerBuilder() .IgnoreUnmatchedProperties() .WithNamingConvention(new CamelCaseNamingConvention()) .Build(); WeblogPostMetadata yamlMeta = null; try { yamlMeta = deserializer.Deserialize <WeblogPostMetadata>(input); } catch { return(meta); } if (yamlMeta == null) { return(meta); } if (meta.CustomFields == null) { meta.CustomFields = new Dictionary <string, CustomField>(); } meta = yamlMeta; meta.MarkdownBody = markdown.Replace(extractedYaml, ""); meta.RawMarkdownBody = markdown; meta.YamlFrontMatter = yaml; if (string.IsNullOrEmpty(meta.WeblogName)) { meta.WeblogName = WeblogAddinConfiguration.Current.LastWeblogAccessed; } if (post != null) { post.Title = meta.Title?.Trim(); if (!string.IsNullOrEmpty(meta.Categories)) { post.Categories = meta.Categories.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < post.Categories.Length; i++) { post.Categories[i] = post.Categories[i].Trim(); } } if (!string.IsNullOrEmpty(meta.Keywords)) { post.Tags = meta.Keywords.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < post.Tags.Length; i++) { post.Tags[i] = post.Tags[i].Trim(); } } post.Permalink = meta.Permalink; post.DateCreated = meta.PostDate; if (post.DateCreated < new DateTime(2000, 1, 1)) { post.DateCreated = DateTime.Now; } post.mt_excerpt = meta.Abstract; post.mt_keywords = meta.Keywords; if (meta.CustomFields != null) { post.CustomFields = meta.CustomFields.Values.ToArray(); } } return(meta); }
public void CreateDownloadedPostOnDisk(Post post, string weblogName) { string filename = SafeFilename(post.Title); var folder = Path.Combine(WeblogAddinConfiguration.Current.PostsFolder, "Downloaded", weblogName, filename); if (!Directory.Exists(folder)) { Directory.CreateDirectory(folder); } var outputFile = Path.Combine(folder, StringUtils.ToCamelCase(filename) + ".md"); bool isMarkdown = false; string body = post.Body; string featuredImage = null; if (post.CustomFields != null) { var cf = post.CustomFields.FirstOrDefault(custf => custf.ID == "mt_markdown"); if (cf != null) { body = cf.Value; isMarkdown = true; } cf = post.CustomFields.FirstOrDefault(custf => custf.ID == "wp_post_thumbnail"); if (cf != null) { featuredImage = cf.Value; } } if (!isMarkdown) { if (!string.IsNullOrEmpty(post.mt_text_more)) { // Wordpress ReadMore syntax - SERIOUSLY??? if (string.IsNullOrEmpty(post.mt_excerpt)) { post.mt_excerpt = HtmlUtils.StripHtml(post.Body); } body = MarkdownUtilities.HtmlToMarkdown(body) + "\n\n<!--more-->\n\n" + MarkdownUtilities.HtmlToMarkdown(post.mt_text_more); } else { body = MarkdownUtilities.HtmlToMarkdown(body); } } string categories = null; if (post.Categories != null && post.Categories.Length > 0) { categories = string.Join(",", post.Categories); } // Create the new post by creating a file with title preset string newPostMarkdown = NewWeblogPost(new WeblogPostMetadata() { Title = post.Title, MarkdownBody = body, Categories = categories, Keywords = post.mt_keywords, Abstract = post.mt_excerpt, PostId = post.PostID.ToString(), WeblogName = weblogName, FeaturedImageUrl = featuredImage }); File.WriteAllText(outputFile, newPostMarkdown); mmApp.Configuration.LastFolder = Path.GetDirectoryName(outputFile); if (isMarkdown) { string html = post.Body; string path = mmApp.Configuration.LastFolder; // do this synchronously so images show up :-< ShowStatus("Downloading post images..."); SaveMarkdownImages(html, path); ShowStatus("Post download complete.", 5000); //new Action<string,string>(SaveImages).BeginInvoke(html,path,null, null); } Model.Window.OpenTab(outputFile); }
/// <summary> /// /// mm htmltomarkdown [inputfile] [outputFile] -open /// </summary> /// <param name="inputFile"></param> /// <param name="outputFile"></param> /// <param name="openOutputFile"></param> public void HtmlToMarkdown() { Processor.ConsoleHeader(); string inputFile = Arguments.InputFile; string outputFile = Arguments.OutputFile; if (string.IsNullOrEmpty(inputFile) || !File.Exists(inputFile)) { var fd = new OpenFileDialog { DefaultExt = ".html", Filter = "HTML files (*.html, *.htm)|*.html;*.htm|" + "All files (*.*)|*.*", CheckFileExists = true, RestoreDirectory = true, Title = "Open HTML File", InitialDirectory = Environment.CurrentDirectory }; var res = fd.ShowDialog(); if (res == null) { return; } inputFile = fd.FileName; } if (string.IsNullOrEmpty(outputFile)) { var fd = new SaveFileDialog { DefaultExt = ".md", Filter = "Markdown files (*.md,*.markdown,*.mdcrypt)|*.md;*.markdown;*.mdcrypt|" + "All files (*.*)|*.*", CheckFileExists = false, RestoreDirectory = true, Title = "Save as Markdown File", InitialDirectory = Path.GetDirectoryName(inputFile), FileName = Path.ChangeExtension(Path.GetFileName(inputFile), "md") }; var res = fd.ShowDialog(); if (res == null) { return; } outputFile = fd.FileName; } string md; try { var html = File.ReadAllText(inputFile); md = MarkdownUtilities.HtmlToMarkdown(html, true); } catch { ColorConsole.WriteError("Failed: Couldn't read input file."); Processor.ConsoleFooter(); return; } if (!string.IsNullOrEmpty(outputFile)) { try { File.WriteAllText(outputFile, md); } catch { ColorConsole.WriteError("Failed: Couldn't write output file."); Processor.ConsoleFooter(); return; } if (Arguments.OpenOutputFile) { ShellUtils.ExecuteProcess("markdownmonster.exe", $"'{outputFile}'"); } ColorConsole.WriteSuccess($"Created Markdown file: {outputFile}"); Processor.ConsoleFooter(); } }
public void CreateDownloadedPostOnDisk(Post post, string weblogName) { string filename = FileUtils.SafeFilename(post.Title); var folder = Path.Combine(WeblogAddinConfiguration.Current.PostsFolder, "Downloaded", weblogName, filename); if (!Directory.Exists(folder)) { Directory.CreateDirectory(folder); } var outputFile = Path.Combine(folder, StringUtils.ToCamelCase(filename) + ".md"); bool isMarkdown = false; string body = post.Body; string featuredImage = null; if (post.CustomFields != null) { var cf = post.CustomFields.FirstOrDefault(custf => custf.Id == "mt_markdown"); if (cf != null) { body = cf.Value; isMarkdown = true; } cf = post.CustomFields.FirstOrDefault(custf => custf.Id == "wp_post_thumbnail"); if (cf != null) { featuredImage = cf.Value; } } if (!isMarkdown) { if (!string.IsNullOrEmpty(post.mt_text_more)) { // Wordpress ReadMore syntax - SERIOUSLY??? if (string.IsNullOrEmpty(post.mt_excerpt)) { post.mt_excerpt = HtmlUtils.StripHtml(post.Body); } body = MarkdownUtilities.HtmlToMarkdown(body) + $"{mmApp.NewLine}{mmApp.NewLine}<!--more-->{mmApp.NewLine}{mmApp.NewLine}" + MarkdownUtilities.HtmlToMarkdown(post.mt_text_more); } else { body = MarkdownUtilities.HtmlToMarkdown(body); } } string categories = null; if (post.Categories != null && post.Categories.Length > 0) { categories = string.Join(",", post.Categories); } // Create the new post by creating a file with title preset var meta = new WeblogPostMetadata() { Title = post.Title, MarkdownBody = body, Categories = categories, Keywords = post.mt_keywords, Abstract = post.mt_excerpt, PostId = post.PostId.ToString(), WeblogName = weblogName, FeaturedImageUrl = featuredImage, PostDate = post.DateCreated, PostStatus = post.PostStatus, Permalink = post.Permalink }; string newPostMarkdown = NewWeblogPost(meta); try { File.WriteAllText(outputFile, newPostMarkdown); } catch (Exception ex) { MessageBox.Show($@"Couldn't write new file at: {outputFile} {ex.Message} ", "Weblog Entry File not created", MessageBoxButton.OK, MessageBoxImage.Warning); return; } mmApp.Configuration.LastFolder = Path.GetDirectoryName(outputFile); if (isMarkdown) { string html = post.Body; string path = mmApp.Configuration.LastFolder; // do this synchronously so images show up :-< ShowStatus("Downloading post images...", mmApp.Configuration.StatusMessageTimeout); SaveMarkdownImages(html, path); ShowStatus("Post download complete.", mmApp.Configuration.StatusMessageTimeout); //new Action<string,string>(SaveImages).BeginInvoke(html,path,null, null); } Model.Window.OpenTab(outputFile); Model.Window.ShowFolderBrowser(folder: Path.GetDirectoryName(outputFile)); }
IEnumerable <OpenXmlElement> Span2Elements(MarkdownSpan md, bool nestedSpan = false) { reporter.CurrentSpan = md; if (md.IsLiteral) { var mdl = md as MarkdownSpan.Literal; var s = MarkdownUtilities.UnescapeLiteral(mdl); foreach (var r in Literal2Elements(s, nestedSpan)) { yield return(r); } } else if (md.IsStrong || md.IsEmphasis) { IEnumerable <MarkdownSpan> spans = (md.IsStrong ? (md as MarkdownSpan.Strong).body : (md as MarkdownSpan.Emphasis).body); // Workaround for https://github.com/tpetricek/FSharp.formatting/issues/389 - the markdown parser // turns *this_is_it* into a nested Emphasis["this", Emphasis["is"], "it"] instead of Emphasis["this_is_it"] // What we'll do is preprocess it into Emphasis["this_is_it"] if (md.IsEmphasis) { var spans2 = spans.Select(s => { var _ = ""; if (s.IsEmphasis) { s = (s as MarkdownSpan.Emphasis).body.Single(); _ = "_"; } if (s.IsLiteral) { return(_ + (s as MarkdownSpan.Literal).text + _); } reporter.Error("MD15", $"something odd inside emphasis '{s.GetType().Name}' - only allowed emphasis and literal"); return(""); }); spans = new List <MarkdownSpan>() { MarkdownSpan.NewLiteral(string.Join("", spans2), FSharpOption <MarkdownRange> .None) }; } // Convention is that ***term*** is used to define a term. // That's parsed as Strong, which contains Emphasis, which contains one Literal string literal = null; TermRef termdef = null; if (!nestedSpan && md.IsStrong && spans.Count() == 1 && spans.First().IsEmphasis) { var spans2 = (spans.First() as MarkdownSpan.Emphasis).body; if (spans2.Count() == 1 && spans2.First().IsLiteral) { literal = (spans2.First() as MarkdownSpan.Literal).text; termdef = new TermRef(literal, reporter.Location); if (context.Terms.ContainsKey(literal)) { var def = context.Terms[literal]; reporter.Warning("MD16", $"Term '{literal}' defined a second time"); reporter.Warning("MD16b", $"Here was the previous definition of term '{literal}'", def.Loc); } else { context.Terms.Add(literal, termdef); context.TermKeys.Clear(); } } } // Convention inside our specs is that emphasis only ever contains literals, // either to emphasis some human-text or to refer to an ANTLR-production ProductionRef prodref = null; if (!nestedSpan && md.IsEmphasis && (spans.Count() != 1 || !spans.First().IsLiteral)) { reporter.Error("MD17", $"something odd inside emphasis"); } if (!nestedSpan && md.IsEmphasis && spans.Count() == 1 && spans.First().IsLiteral) { literal = (spans.First() as MarkdownSpan.Literal).text; prodref = productions.FirstOrDefault(pr => pr.Names.Contains(literal)); context.Italics.Add(new ItalicUse(literal, prodref != null ? ItalicUse.ItalicUseKind.Production : ItalicUse.ItalicUseKind.Italic, reporter.Location)); } if (prodref != null) { var props = new RunProperties(new Color { Val = "6A5ACD" }, new Underline { Val = UnderlineValues.Single }); var run = new Run(new Text(literal) { Space = SpaceProcessingModeValues.Preserve }) { RunProperties = props }; var link = new Hyperlink(run) { Anchor = prodref.BookmarkName }; yield return(link); } else if (termdef != null) { context.MaxBookmarkId.Value += 1; yield return(new BookmarkStart { Name = termdef.BookmarkName, Id = context.MaxBookmarkId.Value.ToString() }); var props = new RunProperties(new Italic(), new Bold()); yield return(new Run(new Text(literal) { Space = SpaceProcessingModeValues.Preserve }) { RunProperties = props }); yield return(new BookmarkEnd { Id = context.MaxBookmarkId.Value.ToString() }); } else { foreach (var e in Spans2Elements(spans, true)) { var style = (md.IsStrong ? new Bold() as OpenXmlElement : new Italic()); var run = e as Run; if (run != null) { run.InsertAt(new RunProperties(style), 0); } yield return(e); } } } else if (md.IsInlineCode) { var mdi = md as MarkdownSpan.InlineCode; var code = mdi.code; var txt = new Text(BugWorkaroundDecode(code)) { Space = SpaceProcessingModeValues.Preserve }; var props = new RunProperties(new RunStyle { Val = "CodeEmbedded" }); var run = new Run(txt) { RunProperties = props }; yield return(run); } else if (md.IsLatexInlineMath) { var latex = md as MarkdownSpan.LatexInlineMath; var code = latex.code; // TODO: Make this look nice - if we actually need it. It's possible that it's only present // before subscripts are replaced. var txt = new Text(BugWorkaroundDecode(code)) { Space = SpaceProcessingModeValues.Preserve }; var props = new RunProperties(new RunStyle { Val = "CodeEmbedded" }); var run = new Run(txt) { RunProperties = props }; yield return(run); } else if (md.IsDirectLink || md.IsIndirectLink) { IEnumerable <MarkdownSpan> spans; string url = "", alt = ""; if (md.IsDirectLink) { var mddl = md as MarkdownSpan.DirectLink; spans = mddl.body; url = mddl.link; alt = mddl.title.Option(); } else { var mdil = md as MarkdownSpan.IndirectLink; var original = mdil.original; var id = mdil.key; spans = mdil.body; if (markdownDocument.DefinedLinks.ContainsKey(id)) { url = markdownDocument.DefinedLinks[id].Item1; alt = markdownDocument.DefinedLinks[id].Item2.Option(); } } var anchor = ""; if (spans.Count() == 1 && spans.First().IsLiteral) { anchor = MarkdownUtilities.UnescapeLiteral(spans.First() as MarkdownSpan.Literal); } else if (spans.Count() == 1 && spans.First().IsInlineCode) { anchor = (spans.First() as MarkdownSpan.InlineCode).code; } else { reporter.Error("MD18", $"Link anchor must be Literal or InlineCode, not '{md.GetType().Name}'"); yield break; } if (sections.ContainsKey(url)) { var section = sections[url]; // If we're linking to something with a section number, we know what the link text should be. // (There are a few links that aren't to numbered sections, e.g. to "Annex C".) if (section.Number is object) { var expectedAnchor = "§" + section.Number; if (anchor != expectedAnchor) { reporter.Warning("MD19", $"Mismatch: link anchor is '{anchor}', should be '{expectedAnchor}'"); } } var txt = new Text(anchor) { Space = SpaceProcessingModeValues.Preserve }; var run = new Hyperlink(new Run(txt)) { Anchor = section.BookmarkName }; yield return(run); } else if (url.StartsWith("http:") || url.StartsWith("https:")) { var style = new RunStyle { Val = "Hyperlink" }; var hyperlink = new Hyperlink { DocLocation = url, Tooltip = alt }; foreach (var element in Spans2Elements(spans)) { var run = element as Run; if (run != null) { run.InsertAt(new RunProperties(style), 0); } hyperlink.AppendChild(run); } yield return(hyperlink); } else { // TODO: Make this report an error unconditionally once the subscript "latex-like" Markdown is removed. if (url != "") { reporter.Error("MD28", $"Hyperlink url '{url}' unrecognized - not a recognized heading, and not http"); } } } else if (md.IsHardLineBreak) { // I've only ever seen this arise from dodgy markdown parsing, so I'll ignore it... } else { reporter.Error("MD20", $"Unrecognized markdown element {md.GetType().Name}"); yield return(new Run(new Text($"[{md.GetType().Name}]"))); } }