예제 #1
0
        public void ConvertDoc(string inputDoc, string rootOutputDirectory)
        {
            var outputDir  = GetOutputDirectory(inputDoc, rootOutputDirectory);
            var outputFile = Path.Combine(outputDir, GetOuputFilename(inputDoc));

            if (!Directory.Exists(outputDir))
            {
                Console.WriteLine("Output Directory Doesn't Exist: '" + outputDir + "'");
                return;
            }
            if (!File.Exists(inputDoc))
            {
                Console.WriteLine("Input File Doesn't Exist: '" + inputDoc + "'");
                return;
            }

            var converter = new Html2Markdown.Converter();
            var markdown  = converter.ConvertFile(inputDoc);

            markdown = ReplaceCodeLinks(markdown);
            markdown = ReplaceRepoLinks(markdown);

            // Remove <doctype>
            markdown = DocType.Replace(markdown, string.Empty);

            File.WriteAllText(outputFile, markdown, Encoding.UTF8);
        }
예제 #2
0
        public const string SpanOuterFind = @"(?:\<span).*(?:\<\/span>)"; // @"(?<=\<span).*(?=\<\/span\>)";
        public static string Markdown(string input)
        {
            // handle any input that the converted below cant
            input = input.Replace("<p>", "");
            input = input.Replace("</p>", "");
            input = input.Replace("<br>", "\r\n");
            input = input.Replace("<u>", "");
            input = input.Replace("</u>", "");

            var spanRemove = new Regex(SpanOuterFind);
            var matches    = spanRemove.Matches(input);

            foreach (Match match in matches)
            {
                var str = match.Value;
                str   = str.Replace("</span>", "");
                str   = str.Substring(str.IndexOf(">") + 1); // assume first > closes the span tag
                input = input.Replace(match.Value, str);     // replace it.
            }

            var    converter = new Html2Markdown.Converter();
            string markdown  = converter.Convert(input);

            return(markdown);
        }
예제 #3
0
        public static async Task <(bool, string)> GetReadMeMdAsync(int repoId, string url, bool fromCache = true)
        {
            var buffer = await BlobCache.LocalMachine.DownloadUrl(url, fetchAlways : !fromCache);

            string md = System.Text.Encoding.UTF8.GetString(buffer, 0, buffer.Length);

            try
            {
                // Html2Markdown needs HtmlAgilityPack.NugetPkg
                md = new Html2Markdown.Converter().Convert(md).Trim();
                removeUnSupported();
                if (md == null || md.Length < 2)
                {
                    return(false, "> No README.MD 🤔");
                }
                return(true, md);
            }
            catch { return(false, md); }

            void removeUnSupported()
            {
                md = md.Replace("[`", "[");
                md = md.Replace("`]", "]");
                md = md.Replace("<<", "");
            }
        }
예제 #4
0
        static string ConvertHtmlToMarkdown(string source)
        {
            //Change the code blocks without a language specified in a general lang to create fenced code later
            source = _htmlCodeRegex.Replace(source, "<pre><code class=\"language-none\">$1</code></pre>");
            var    converter = new Html2Markdown.Converter();
            string res       = converter.Convert(source);

            res = res.Trim(' ', '\r', '\n');  //Remove extra spaces and new lines
            return(res);
        }
        private string ConvertWikiToMarkdown(string wiki, int implementationGuideId, int templateId, string property, int?constraintId = null)
        {
            var    converter = new Html2Markdown.Converter(this.markdown);
            string markdown  = wiki;
            string cleanWiki = wiki
                               .Trim()
                               .Replace(" [1..0]", " [[]1..0]")
                               .Replace(" [0..1]", " [[]0..1]")
                               .Replace(" [0..*]", " [[]0..*]")
                               .Replace(" [1..1]", " [[]1..1]")
                               .Replace(" [1..*]", " [[]1..*]")
                               .Replace(" [urn:", " [[]urn:");

            if (string.IsNullOrEmpty(cleanWiki))
            {
                return(string.Empty);
            }

            try
            {
                var html = this.ConvertWikiToHtml(cleanWiki);
                html = this.CleanHtml(html);

                if (string.IsNullOrEmpty(html))
                {
                    throw new Exception("Error converting WIKI content to HTML");
                }

                markdown = converter.Convert(html);
                markdown = this.CleanMarkdown(markdown);
            }
            catch
            {
                this.WriteVerbose("Could not convert from WIKI to HTML due to syntax error. Converting original content directly to MarkDown.");
                markdown = converter.Convert(wiki);
            }

            if (markdown != wiki)
            {
                this.logs.Add(new MigrateMarkdownLog()
                {
                    TemplateId   = templateId,
                    ConstraintId = constraintId,
                    Property     = property,
                    Original     = wiki,
                    New          = markdown
                });
            }

            return(markdown);
        }
예제 #6
0
        public static string ConvertHtmlToMarkdown(string htmlBody)
        {
            try
            {
                var converter = new Html2Markdown.Converter();
                return(converter.Convert(htmlBody));
            }
            catch (FormatException e)
            {
                _log.Write(LogLevel.Warn, "Failed to convert HTML to Markdown", e);
            }

            return(htmlBody);
        }
예제 #7
0
        private async void HandleDataPackage(DataPackageView data, string imagefilename)
        {
            if (data.Contains(StandardDataFormats.StorageItems))
            {
                foreach (var file in await data.GetStorageItemsAsync())
                {
                    AddAttachement(file as StorageFile);
                }
            }
            else if (data.Contains(StandardDataFormats.Bitmap))
            {
                var bmpDPV = await data.GetBitmapAsync();

                var bmpSTR = await ApplicationData.Current.TemporaryFolder.CreateFileAsync(imagefilename + ".png", CreationCollisionOption.OpenIfExists);

                using (var writeStream = (await bmpSTR.OpenStreamForWriteAsync()).AsRandomAccessStream())
                    using (var readStream = await bmpDPV.OpenReadAsync())
                    {
                        BitmapDecoder decoder = await BitmapDecoder.CreateAsync(readStream.CloneStream());

                        BitmapEncoder encoder = await BitmapEncoder.CreateAsync(BitmapEncoder.PngEncoderId, writeStream);

                        encoder.SetSoftwareBitmap(await decoder.GetSoftwareBitmapAsync());
                        await encoder.FlushAsync();

                        AddAttachement(bmpSTR);
                    }
            }
            else if (data.Contains(StandardDataFormats.Text))
            {
                Editor.Text = await data.GetTextAsync();
            }
            else if (data.Contains(StandardDataFormats.WebLink))
            {
                Editor.Text = (await data.GetWebLinkAsync()).ToString();
            }
            else if (data.Contains(StandardDataFormats.ApplicationLink))
            {
                Editor.Text = (await data.GetApplicationLinkAsync()).ToString();
            }
            else if (data.Contains(StandardDataFormats.Html))
            {
                var converter = new Html2Markdown.Converter();
                Editor.Text = converter.Convert(await data.GetHtmlFormatAsync());
            }
        }
        public string ConvertHtmlToMarkdown(string html, int implementationGuideId, string property, int?sectionId = null)
        {
            if (string.IsNullOrEmpty(html))
            {
                return(html);
            }

            string cleanHtml = html
                               .Replace("<div>", "")
                               .Replace("</div>", "")
                               .Replace("<br>", "")
                               .Replace("<br />", "")
                               .Replace("<br/>", "");

            cleanHtml = this.CleanHtml(cleanHtml);

            var    converter = new Html2Markdown.Converter(this.markdown);
            string markdown;

            try
            {
                markdown = converter.Convert(cleanHtml);
            }
            catch (Exception ex)
            {
                this.WriteVerbose("Error converting to html: " + ex.Message);
                this.WriteVerbose(cleanHtml);
                return(html);
            }

            var cleanMarkdown = this.CleanMarkdown(markdown);

            if (html != cleanMarkdown)
            {
                this.logs.Add(new MigrateMarkdownLog()
                {
                    ImplementationGuideId = implementationGuideId,
                    SectionId             = sectionId,
                    Property = property,
                    Original = html,
                    New      = cleanMarkdown
                });
            }

            return(!string.IsNullOrEmpty(markdown) ? cleanMarkdown : html);
        }
예제 #9
0
        public void ConvertDoc(string inputDoc, string rootOutputDirectory)
        {
            var outputDir  = GetOutputDirectory(inputDoc, rootOutputDirectory);
            var outputFile = Path.Combine(outputDir, GetOuputFilename(inputDoc));

            if (!Directory.Exists(outputDir))
            {
                Console.WriteLine("Output Directory Doesn't Exist: '" + outputDir + "'");
                return;
            }
            if (!File.Exists(inputDoc))
            {
                Console.WriteLine("Input File Doesn't Exist: '" + inputDoc + "'");
                return;
            }

            var converter = new Html2Markdown.Converter();
            var markdown  = converter.ConvertFile(inputDoc);

            var ns = ExtractNamespaceFromFile(outputFile);

            if (NamespaceFileMappings.TryGetValue(ns, out var realNs))
            {
                ns = realNs;
            }

            foreach (var r in JavaDocFormatters.Replacers)
            {
                markdown = r.Replace(markdown);
            }
            if (JavaDocFormatters.CustomReplacers.TryGetValue(ns, out var replacers))
            {
                foreach (var r in replacers)
                {
                    markdown = r.Replace(markdown);
                }
            }

            var appendYamlHeader = ShouldAppendYamlHeader(inputDoc, ns);

            var fileContent = appendYamlHeader ? AppendYamlHeader(ns, markdown) : markdown;

            File.WriteAllText(outputFile, fileContent, Encoding.UTF8);
        }
예제 #10
0
        /// <summary>
        /// https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md#basics
        /// </summary>
        /// <param name="item"></param>
        /// <param name="properties"></param>
        private static void FillBasicsProperties(AtomItem item, Dictionary <string, object> properties)
        {
            // title
            if (item.Title != null)
            {
                properties.Remove("title");
                properties.Add("title", item.Title.Text);
            }

            // description
            if (item.Summary != null)
            {
                properties.Remove("description");
                var summary = item.Summary.Text;
                if (item.Summary.Type.Contains("html", StringComparison.InvariantCultureIgnoreCase))
                {
                    var converter = new Html2Markdown.Converter();
                    summary = converter.Convert(item.Summary.Text);
                }
                properties.Add("description", summary);
            }
        }
예제 #11
0
        public static string Converter2(string mixedHtmlAndMarkdown, Action <MarkdownDocument> transform, MarkdownPipeline pipeline)
        {
            var    converter    = new Html2Markdown.Converter();
            string markdownOnly = converter.Convert(mixedHtmlAndMarkdown);

            pipeline = new MarkdownPipelineBuilder()
                       .UseAdvancedExtensions()
                       .UsePipeTables()
                       .Build();
            pipeline.Extensions.AddIfNotAlready <SoftlineBreakAsHardlineExtension>();
            MarkdownDocument ast = Markdown.Parse(markdownOnly, pipeline);

            transform(ast);
            using (var writer = new StringWriter())
            {
                var renderer = new HtmlRenderer(writer);
                pipeline.Setup(renderer);
                renderer.Render(ast);
                writer.Flush();
                return(writer.ToString());
            }
        }
예제 #12
0
        public override void Transform(ExtensionHtmlRenderer extensionHtmlRenderer, WorkflowNotesBlock block, Diagram diagram)
        {
            var(element, elementsEnumerable) = _provider.GetBpmnElements(new EaProvider.Path(diagram.Name));
            var elements = elementsEnumerable.ToList();

            elements.Sort(new BpmnElement.AliasComparer());
            var sb = new StringBuilder();

            sb.AppendLine($@"# {element.Name}");
            var converter = new Html2Markdown.Converter();

            foreach (BpmnElement e in elements)
            {
                string           name    = string.IsNullOrEmpty(e.Name) ? e.Alias : e.Name;
                string           notes   = converter.Convert(e.Notes);
                MarkdownDocument notesMd = Markdown.Parse(notes);
                notesMd.IncreaseHeadingLevel(2);
                string normalizedNotes = null;
                using (var writer = new StringWriter())
                {
                    var pipeline = new MarkdownPipelineBuilder().Build();
                    pipeline.Extensions.AddIfNotAlready <SoftlineBreakAsHardlineExtension>();
                    var renderer = new NormalizeRenderer(writer);
                    pipeline.Setup(renderer);
                    renderer.Render(notesMd);
                    writer.Flush();
                    normalizedNotes = writer.ToString();
                }

                sb.AppendLine($@"## {name}");
                sb.AppendLine($@"Lane: {e.Lane}");
                sb.AppendLine();
                sb.AppendLine($@"Description:");
                sb.AppendLine(normalizedNotes);
            }
            MarkdownDocument document = Markdown.Parse(sb.ToString());

            Replace(block, document);
        }
예제 #13
0
        private static string HtmlToMarkdown(string html)
        {
            // Add spaces inside <pre> elements - bug in the Html2Markdown library
            var doc = new HtmlDocument();

            doc.LoadHtml(html);
            var preNodes = doc.DocumentNode.SelectNodes("//pre");

            if (preNodes != null)
            {
                foreach (var item in preNodes)
                {
                    item.InnerHtml = Environment.NewLine + item.InnerHtml + Environment.NewLine;
                }
            }
            html = doc.DocumentNode.InnerHtml;

            var mdc = new Html2Markdown.Converter();
            var md  = mdc.Convert(html);

            return(md);
        }
예제 #14
0
 static TextHandler()
 {
     formatter = new MarkdownSharp.Markdown();
     converter = new Html2Markdown.Converter();
 }
예제 #15
0
 public void NestedList_test_with_Html2Markdown()
 {
     var    converter = new Html2Markdown.Converter();
     string result    = converter.Convert(html);
 }
예제 #16
0
        static void Main(string[] args)
        {
            // Create Posts object
            var Posts = new List <Post>();

            // Read configuration file
            var config = JsonConvert.DeserializeObject <CloudScribeSettings>(File.ReadAllText("cloudscribe.json"));

            // Create sql connection
            var builder = new SqlConnectionStringBuilder
            {
                DataSource     = config.server,
                UserID         = config.username,
                Password       = config.password,
                InitialCatalog = config.database
            };

            // Open sql conneciton to build post object
            using (var connection = new SqlConnection(builder.ConnectionString))
            {
                connection.Open();

                // Build post query
                var post_sql_sb = new StringBuilder();
                post_sql_sb.Append("SELECT p.Id, p.Title, p.Slug, p.PubDate, p.DraftPubDate, p.IsPublished, p.CategoriesCsv, p.Content, p.DraftContent, u.DisplayName, u.Email, c.BlogPageText ");
                post_sql_sb.Append("FROM cs_Post p LEFT JOIN cs_User u ON p.Author = u.Email LEFT JOIN cs_ContentProject c ON p.BlogId = c.Id");

                if (!string.IsNullOrEmpty(config.blogid))
                {
                    post_sql_sb.Append("WHERE p.BlogId = '" + config.blogid + "'");
                }

                var post_sql = post_sql_sb.ToString();

                // Read each post
                using (var post_command = new SqlCommand(post_sql, connection))
                {
                    using (var post_reader = post_command.ExecuteReader())
                    {
                        while (post_reader.Read())
                        {
                            Posts.Add(new Post()
                            {
                                id            = post_reader[0].ToString(),
                                title         = post_reader[1].ToString(),
                                slug          = post_reader[2].ToString(),
                                pubdate       = post_reader[3].ToString(),
                                draftpubdate  = post_reader[4].ToString(),
                                isPublished   = bool.Parse(post_reader[5].ToString()),
                                categoriescsv = post_reader[6].ToString(),
                                content       = post_reader[7].ToString(),
                                draftcontent  = post_reader[8].ToString(),
                                displayname   = post_reader[9].ToString(),
                                email         = post_reader[10].ToString(),
                                blogurl       = post_reader[11].ToString()
                            });
                        }
                    }
                }

                // If we need to get comments, let's grab the comments and update accordingly
                if (config.comments)
                {
                    // Build comment query
                    var post_comment_sb = new StringBuilder();
                    post_comment_sb.Append("SELECT Id, Content, PubDate, Author, Email, Website FROM cs_PostComment ");
                    post_comment_sb.Append("WHERE IsApproved = 1 AND PostEntityId = '{0}'");

                    var post_comment = post_comment_sb.ToString();

                    foreach (var post_for_comment in Posts)
                    {
                        post_for_comment.comments = new List <Comment>();

                        // Read each post
                        using (var comment_command = new SqlCommand(post_comment.Replace("{0}", post_for_comment.id), connection))
                        {
                            using (var comment_reader = comment_command.ExecuteReader())
                            {
                                while (comment_reader.Read())
                                {
                                    post_for_comment.comments.Add(new Comment()
                                    {
                                        id      = comment_reader[0].ToString(),
                                        content = comment_reader[1].ToString(),
                                        pubdate = comment_reader[2].ToString(),
                                        author  = comment_reader[3].ToString(),
                                        email   = comment_reader[4].ToString(),
                                        website = comment_reader[5].ToString()
                                    });
                                }
                            }
                        }
                    } // end foreach
                }     //end if comments
            }

            // Now that SQL is done, we can start processing the files.
            var markdownConverter = new Html2Markdown.Converter();
            var folderPath        = config.jekyll_location;

            Directory.CreateDirectory(folderPath + "\\_posts\\");
            Directory.CreateDirectory(folderPath + "\\_drafts\\");

            foreach (var BlogPost in Posts)
            {
                var subdir    = BlogPost.isPublished ? "\\_posts\\" : "\\_drafts\\";
                var pubDate   = string.IsNullOrEmpty(BlogPost.pubdate) ? DateTime.Parse(BlogPost.draftpubdate) : DateTime.Parse(BlogPost.pubdate);
                var permalink = "/" + BlogPost.slug;
                if (!string.IsNullOrEmpty(BlogPost.blogurl))
                {
                    permalink = "/" + BlogPost.blogurl + permalink;
                }

                var dateString = pubDate.ToString("yyyy-MM-dd");
                var filename   = BlogPost.isPublished ? string.Format("{0}-{1}", dateString, BlogPost.slug) : BlogPost.slug;

                var filePath = folderPath + subdir + filename + "." + config.filetype;
                using (var file = File.CreateText(filePath))
                {
                    file.WriteLine("---");
                    file.WriteLine("title: \"" + BlogPost.title + "\"");
                    file.WriteLine("author: " + BlogPost.displayname);
                    file.WriteLine("email: " + BlogPost.email);
                    file.WriteLine("cloudscribe_id: \"" + BlogPost.id + "\"");
                    file.WriteLine("cloudscribe_path: \"" + permalink + "\"");
                    file.WriteLine("permalink: " + permalink);
                    file.WriteLine("date: " + dateString);
                    file.WriteLine("categories: [" + BlogPost.categoriescsv.Replace(",", ", ") + "]");
                    file.WriteLine("tags: ");

                    if (config.comments)
                    {
                        file.WriteLine("comments: ");
                    }
                    file.WriteLine("---");
                    file.WriteLine("");

                    var content = BlogPost.isPublished ? BlogPost.content : BlogPost.draftcontent;
                    content += "\n";
                    if (config.filetype.ToLower() == "md")
                    {
                        file.Write(markdownConverter.Convert(content));
                    }
                    else
                    {
                        file.Write(content);
                    }
                }
            }
        }
		public MarkdownConverter()
		{
			_converter = new Html2Markdown.Converter();

		}
예제 #18
0
 public EaNormalizer()
 {
     _converter = new Html2Markdown.Converter();
 }
예제 #19
0
        static void Main(string[] args)
        {
            var sanitizer = new HtmlSanitizer();
            var html      = @"<script>alert('xss')</script><div onload=""alert('xss')"""
                            + @"style=""background-color: test"">Test<img src=""test.gif"""
                            + @"style=""background-image: url(javascript:alert('xss')); margin: 10px""></div>";
            var memStream = new MemoryStream(Encoding.UTF8.GetBytes(html));

            Stopwatch st;

            for (var j = 0; j < 6; j++)
            {
                st = Stopwatch.StartNew();
                for (var i = 0; i < 5000; i++)
                {
                    var sanitized = sanitizer.Sanitize(html, "http://www.example.com");
                }
                Console.WriteLine("HtmlSanitizer {0} ms", st.ElapsedMilliseconds);

                st = Stopwatch.StartNew();
                for (var i = 0; i < 5000; i++)
                {
                    var sanitized = Html.Sanitize(html);
                }
                Console.WriteLine("BracketPipe {0} ms", st.ElapsedMilliseconds);

                st = Stopwatch.StartNew();
                for (var i = 0; i < 5000; i++)
                {
                    memStream.Position = 0;
                    var sanitized = Html.Sanitize(memStream);
                }
                Console.WriteLine("BracketPipe {0} ms", st.ElapsedMilliseconds);
            }


            const string htmlInput = @"<!DOCTYPE html>
<html>
    <head>
        <meta charset=""utf-8"" />
        <title>The test document</title>
        <link href=""favicon.ico"" rel=""shortcut icon"" type=""image/x-icon"" />
        <meta name=""viewport"" content=""width=device-width"" />
        <link rel=""stylesheet"" type=""text/css"" href=""/Content/Site.css"" />
    </head>
    <body>
        <p>Lorem ipsum dolor sit amet...</p>

        <script src=""http://ajax.aspnetcdn.com/ajax/jQuery/jquery-1.9.1.min.js""></script>
        <script>
            (window.jquery) || document.write('<script src=""/Scripts/jquery-1.9.1.min.js""><\/script>');
        </script>
    </body>
</html>";

            memStream = new MemoryStream(Encoding.UTF8.GetBytes(htmlInput));

            var htmlMinifier = new HtmlMinifier();

            for (var j = 0; j < 5; j++)
            {
                st = Stopwatch.StartNew();
                for (var i = 0; i < 5000; i++)
                {
                    var result = htmlMinifier.Minify(htmlInput);
                }
                Console.WriteLine("WebMarkupMin {0} ms", st.ElapsedMilliseconds);

                st = Stopwatch.StartNew();
                for (var i = 0; i < 5000; i++)
                {
                    var result = Html.Minify(htmlInput);
                }
                Console.WriteLine("BracketPipe {0} ms", st.ElapsedMilliseconds);

                st = Stopwatch.StartNew();
                for (var i = 0; i < 5000; i++)
                {
                    memStream.Position = 0;
                    var result = Html.Minify(memStream);
                }
                Console.WriteLine("BracketPipe {0} ms", st.ElapsedMilliseconds);
            }

            var          mdConverter  = new Html2Markdown.Converter();
            var          revConverter = new ReverseMarkdown.Converter();
            const string mdHtml       = "<p>This is the second part of a two part series about building real-time web applications with server-sent events.</p>\r\n\r\n<ul>\r\n<li><a href=\"http://bayn.es/real-time-web-applications-with-server-sent-events-pt-1/\">Building Web Apps with Server-Sent Events - Part 1</a></li>\r\n</ul>\r\n\r\n<h2 id=\"reconnecting\">Reconnecting</h2>\r\n\r\n<p>In this post we are going to look at handling reconnection if the browser loses contact with the server. Thankfully the native JavaScript functionality for SSEs (the <a href=\"https://developer.mozilla.org/en-US/docs/Web/API/EventSource\">EventSource</a>) handles this natively. You just need to make sure that your server-side implementation supports the mechanism.</p>\r\n\r\n<p>When the server reconnects your SSE end point it will send a special HTTP header <code>Last-Event-Id</code> in the reconnection request. In the previous part of this blog series we looked at just sending events with the <code>data</code> component. Which looked something like this:-</p>\r\n\r\n<pre><code>data: The payload we are sending\\n\\n\r\n</code></pre>\r\n\r\n<p>Now while this is enough to make the events make it to your client-side implementation. We need more information to handle reconnection. To do this we need to add an event id to the output.</p>\r\n\r\n<p>E.g.</p>\r\n\r\n<pre><code>id: 1439887379635\\n\r\ndata: The payload we are sending\\n\\n\r\n</code></pre>\r\n\r\n<p>The important thing to understand here is that each event needs a unique identifier, so that the client can communicate back to the server (using the <code>Last-Event-Id</code> header) which was the last event it received on reconnection.</p>\r\n\r\n<h2 id=\"persistence\">Persistence</h2>\r\n\r\n<p>In our previous example we used <a href=\"http://redis.io/topics/pubsub\">Redis Pub/Sub</a> to inform <a href=\"https://nodejs.org/\">Node.js</a> that it needs to push a new SSE to the client. Redis Pub/Sub is a topic communication which means it will be delivered to all <em>connected clients</em>, and then it will be removed from the topic. So there is no persistence for when clients reconnect. To implement this we need to add a persistence layer and so in this demo I have chosen to use <a href=\"https://www.mongodb.org/\">MongoDB</a>.</p>\r\n\r\n<p>Essentially we will be pushing events into both Redis and MongoDB. Redis will still be our method of initiating an SSE getting sent to the browser, but we will also be be storing that event into MongoDB so we can query it on a reconnection to get anything we've missed.</p>\r\n\r\n<h2 id=\"thecode\">The Code</h2>\r\n\r\n<p>OK so let us look at how we can actually implement this.</p>\r\n\r\n<h3 id=\"updateserverevent\">Update ServerEvent</h3>\r\n\r\n<p>We need to update the ServerEvent object to support having an <code>id</code> for an event.</p>\r\n\r\n<pre><code>function ServerEvent(name) {\r\n    this.name = name || \"\";\r\n    this.data = \"\";\r\n};\r\n\r\nServerEvent.prototype.addData = function(data) {\r\n    var lines = data.split(/\\n/);\r\n\r\n    for (var i = 0; i &lt; lines.length; i++) {\r\n        var element = lines[i];\r\n        this.data += \"data:\" + element + \"\\n\";\r\n    }\r\n}\r\n\r\nServerEvent.prototype.payload = function() {\r\n    var payload = \"\";\r\n    if (this.name != \"\") {\r\n        payload += \"id: \" + this.name + \"\\n\";\r\n    }\r\n\r\n    payload += this.data;\r\n    return payload + \"\\n\";\r\n}\r\n</code></pre>\r\n\r\n<p>This is pretty straightforward string manipulation and won't impress anyone, but it is foundation for what will follow.</p>\r\n\r\n<h3 id=\"storeeventsinmongodb\">Store Events in MongoDB</h3>\r\n\r\n<p>We need to update the <code>post.js</code> code to also store new events in MongoDB.</p>\r\n\r\n<pre><code>app.put(\"/api/post-update\", function(req, res) {\r\n    var json = req.body;\r\n    json.timestamp = Date.now();\r\n\r\n    eventStorage.save(json).then(function(doc) {\r\n        dataChannel.publish(JSON.stringify(json));\r\n    }, errorHandling);\r\n\r\n    res.status(204).end();\r\n});\r\n</code></pre>\r\n\r\n<p>The <code>event-storage</code> module looks as follows:</p>\r\n\r\n<pre><code>var Q = require(\"q\"),\r\n    config = require(\"./config\"),\r\n    mongo = require(\"mongojs\"),\r\n    db = mongo(config.mongoDatabase),\r\n    collection = db.collection(config.mongoScoresCollection);\r\n\r\nmodule.exports.save = function(data) {\r\n    var deferred = Q.defer();\r\n    collection.save(data, function(err, doc){\r\n        if(err) {\r\n            deferred.reject(err);\r\n        }\r\n        else {\r\n            deferred.resolve(doc);\r\n        }\r\n    });\r\n\r\n    return deferred.promise;\r\n};\r\n</code></pre>\r\n\r\n<p>Here we are just using basic MongoDB commands to save a new event into the collection. Yep that is it, we are now additionally persisting the events so they can be retrieved later.</p>\r\n\r\n<h3 id=\"retrievingeventsonreconnection\">Retrieving Events on Reconnection</h3>\r\n\r\n<p>When an <code>EventSource</code> reconnects after a disconnection it passes a special header <code>Last-Event-Id</code>. So we need to look for that and return the events that got broadcast while the client was disconnected.</p>\r\n\r\n<pre><code>app.get(\"/api/updates\", function(req, res){\r\n    initialiseSSE(req, res);\r\n\r\n    if (typeof(req.headers[\"last-event-id\"]) != \"undefined\") {\r\n        replaySSEs(req, res);\r\n    }\r\n});\r\n\r\nfunction replaySSEs(req, res) {\r\n    var lastId = req.headers[\"last-event-id\"];\r\n\r\n    eventStorage.findEventsSince(lastId).then(function(docs) {\r\n        for (var index = 0; index &lt; docs.length; index++) {\r\n            var doc = docs[index];\r\n            var messageEvent = new ServerEvent(doc.timestamp);\r\n            messageEvent.addData(doc.update);\r\n            outputSSE(req, res, messageEvent.payload());\r\n        }\r\n    }, errorHandling);\r\n};\r\n</code></pre>\r\n\r\n<p>What we are doing here is querying MongoDB for the events that were missed. We then iterate over them and output them to the browser.</p>\r\n\r\n<p>The code for querying MongoDB is as follows:</p>\r\n\r\n<pre><code>module.exports.findEventsSince = function(lastEventId) {\r\n    var deferred = Q.defer();\r\n\r\n    collection.find({\r\n        timestamp: {$gt: Number(lastEventId)}\r\n    })\r\n    .sort({timestamp: 1}, function(err, docs) {\r\n        if (err) {\r\n            deferred.reject(err);\r\n        }\r\n        else {\r\n            deferred.resolve(docs);\r\n        }\r\n    });\r\n\r\n    return deferred.promise;\r\n};\r\n</code></pre>\r\n\r\n<h2 id=\"testing\">Testing</h2>\r\n\r\n<p>To test this you will need to run both apps at the same time.</p>\r\n\r\n<pre><code>node app.js\r\n</code></pre>\r\n\r\n<p>and </p>\r\n\r\n<pre><code>node post.js\r\n</code></pre>\r\n\r\n<p>Once they are running open two browser windows <a href=\"http://localhost:8181/\">http://localhost:8181/</a> and <a href=\"http://localhost:8082/api/post-update\">http://localhost:8082/api/post-update</a></p>\r\n\r\n<p>Now you can post updates as before. If you stop <code>app.js</code> but continue posting events, when you restart <code>app.js</code> within 10 seconds the <code>EventSource</code> will reconnect. This will deliver all missed events.</p>\r\n\r\n<h2 id=\"conclusion\">Conclusion</h2>\r\n\r\n<p>This very simple code gives you a very elegant and powerful push architecture to create real-time apps.</p>\r\n\r\n<h3 id=\"improvements\">Improvements</h3>\r\n\r\n<p>A possible improvement would be to render the events from MongoDB server-side when the page is first output. Then we would get updates client-side as they are pushed to the browser.</p>\r\n\r\n<h3 id=\"download\">Download</h3>\r\n\r\n<p>If you want to play with this application you can fork or browse it on <a href=\"https://github.com/baynezy/RealtimeDemo/tree/part-2\">GitHub</a>.</p>";

            for (var j = 0; j < 5; j++)
            {
                st = Stopwatch.StartNew();
                for (var i = 0; i < 1000; i++)
                {
                    var result = mdConverter.Convert(mdHtml);
                }
                Console.WriteLine("Html2Markdown {0} ms", st.ElapsedMilliseconds);

                st = Stopwatch.StartNew();
                for (var i = 0; i < 1000; i++)
                {
                    var result = revConverter.Convert(mdHtml);
                }
                Console.WriteLine("ReverseMarkdown {0} ms", st.ElapsedMilliseconds);

                st = Stopwatch.StartNew();
                for (var i = 0; i < 1000; i++)
                {
                    var result = Html.ToMarkdown(mdHtml);
                }
                Console.WriteLine("BracketPipe {0} ms", st.ElapsedMilliseconds);
            }


            Console.ReadLine();
        }
예제 #20
0
        public async Task SeedAsync(ApplicationDbContext dbContext, IServiceProvider serviceProvider)
        {
            if (await dbContext.Articles.AnyAsync())
            {
                return;
            }

            var dmpService = serviceProvider.GetService <IDiffMatchPatchService>();
            var categories = await dbContext.Categories.ToListAsync();

            var users = await dbContext.Users
                        .Select(x => x.Id)
                        .ToArrayAsync();

            var editRegex = new Regex(@"\(\/w\/index.php\?.*title=(?<title>.*)&.*action=edit.*\)");

            var config  = Configuration.Default.WithDefaultLoader();
            var context = BrowsingContext.New(config);

            var elementsToRemove = string.Join(
                ", ",
                "script",
                "style",
                ".infobox");

            var parentRemoval = string.Join(
                ", ",
                "#See_also",
                "#References",
                "#Further_reading",
                "#External_links",
                "#Източници",
                "#Външни_препратки");

            var markdownConverter = new Html2Markdown.Converter();
            var htmlParser        = new HtmlParser();
            var articles          = (await Task.WhenAll(
                                         (await Task.WhenAll(new[]
            {
                "https://en.wikipedia.org/wiki/Microsoft",
                "https://en.wikipedia.org/wiki/SignalR",
                "https://en.wikipedia.org/wiki/Event-driven_programming",
                "https://en.wikipedia.org/wiki/Computer_network_programming",
                "https://en.wikipedia.org/wiki/.NET_Framework",
                "https://en.wikipedia.org/wiki/Microsoft_Windows",
                "https://en.wikipedia.org/wiki/MS-DOS",
                "https://en.wikipedia.org/wiki/Programming_language",
                "https://en.wikipedia.org/wiki/International_Organization_for_Standardization",
                "https://en.wikipedia.org/wiki/Graphical_user_interface",
                "https://en.wikipedia.org/wiki/Icon_(computing)",
                "https://en.wikipedia.org/wiki/Text-based_user_interface",
                "https://en.wikipedia.org/wiki/Character_(computing)",
                "https://en.wikipedia.org/wiki/Carriage_return",
                "https://en.wikipedia.org/wiki/Apple_Keyboard",
                "https://bg.wikipedia.org/wiki/%D0%9A%D0%BB%D0%B0%D0%B2%D0%B8%D0%B0%D1%82%D1%83%D1%80%D0%BD%D0%B0_%D0%BF%D0%BE%D0%B4%D1%80%D0%B5%D0%B4%D0%B1%D0%B0",
                "https://bg.wikipedia.org/wiki/%D0%A2%D0%B5%D1%84%D0%BB%D0%BE%D0%BD",
                "https://bg.wikipedia.org/wiki/%D0%9A%D0%BE%D0%BD%D1%86%D0%B5%D1%80%D0%BD",
                "https://bg.wikipedia.org/wiki/%D0%9A%D0%BE%D0%BD%D0%B3%D0%BB%D0%BE%D0%BC%D0%B5%D1%80%D0%B0%D1%82_(%D0%B8%D0%BA%D0%BE%D0%BD%D0%BE%D0%BC%D0%B8%D0%BA%D0%B0)",
                "https://bg.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%B4%D0%BF%D1%80%D0%B8%D1%8F%D1%82%D0%B8%D0%B5",
                "https://en.wikipedia.org/wiki/Linux",
                "https://en.wikipedia.org/wiki/Unix",
                "https://en.wikipedia.org/wiki/Linus_Torvalds",
                "https://en.wikipedia.org/wiki/Linux_kernel",
                "https://en.wikipedia.org/wiki/Windowing_system",
                "https://en.wikipedia.org/wiki/X86",
                "https://en.wikipedia.org/wiki/GNU",
                "https://en.wikipedia.org/wiki/Richard_Stallman",
                "https://en.wikipedia.org/wiki/Emacs",
                "https://en.wikipedia.org/wiki/GNU_Compiler_Collection",
                "https://en.wikipedia.org/wiki/C%2B%2B17",
                "https://en.wikipedia.org/wiki/CXX",
                "https://en.wikipedia.org/wiki/C_(programming_language)",
            }
                                                             .OrderBy(x => ISeeder.Random.Next())
                                                             .Select(x => context.OpenAsync(x))))
                                         .Select(x =>
            {
                foreach (var element in x.QuerySelectorAll(elementsToRemove))
                {
                    element.Remove();
                }

                var breakingPoint = x.QuerySelector(parentRemoval)?.ParentElement;
                IElement next;
                while ((next = breakingPoint?.NextElementSibling) is { })
                {
                    breakingPoint.Remove();
                    breakingPoint = next;
                }

                var content = htmlParser.ParseDocumentAsync(
                    markdownConverter.Convert(
                        x.GetElementById("mw-content-text").InnerHtml));

                return(Task.Run(async() => new
                {
                    Content = await content,

                    // because of some error with Split... Won't accept "-" in "Заглавие - Уикипедия"
                    // to be the same as in "Title - Wikipedia"... Too bad.
                    Title = x.Title.Substring(0, x.Title.Length - 12),
                }));
            })))