Ejemplo n.º 1
0
        /// <summary>
        /// Process a full string block of a markdown table.
        /// Split it by rows
        /// Create the Table with correct number of columns
        /// Fill the openXML table
        /// </summary>
        /// <param name="engine"></param>
        /// <param name="markdown">The complete markdown table, must be correctly formatted</param>
        private static void ProcessTable(Md2MlEngine engine, string markdown)
        {
            var rows       = Regex.Split(markdown, "\r\n|\r|\n");
            var firstLine  = rows.First();
            var secondLine = rows.Length >= 2 ? rows.Skip(1).First() : null;
            var table      = engine.CreateTable(firstLine.Trim('|').Split('|').Count());

            engine.AddTableRow(table, firstLine.Trim('|').Split('|').ToList());
            var patternSecondLine = PatternMatcher.GetMarkdownMatch(secondLine).Key;

            if (string.IsNullOrEmpty(secondLine) ||
                (patternSecondLine != ParaPattern.TableHeaderSeparation && patternSecondLine != ParaPattern.TableHeaderSeparation)) // TODO : Throw error : Table not well formatted
            {
                return;
            }

            // Define the table alignment properties
            List <JustificationValues> cellJustification = new List <JustificationValues>();
            var nbCols          = secondLine.Trim('|').Split('|').Count();
            var secondLineCells = secondLine.Trim('|').Split('|').ToList();

            for (int i = 0; i < nbCols; i++)
            {
                var justification = JustificationValues.Left;
                if (secondLineCells[i].StartsWith(":") && secondLineCells[i].EndsWith(":"))
                {
                    justification = JustificationValues.Center;
                }
                else if (!secondLineCells[i].StartsWith(":") && secondLineCells[i].EndsWith(":"))
                {
                    justification = JustificationValues.Right;
                }

                cellJustification.Add(justification);
            }

            // Process the rest of the table
            foreach (var row in rows.Skip(2).ToList())
            {
                engine.AddTableRow(table, row.Trim('|').Split('|').ToList(), cellJustification);
            }
        }
Ejemplo n.º 2
0
        public static void Parse(Md2MlEngine engine, string mdText)
        {
            var lineAndPattern = new List <KeyValuePair <ParaPattern, string[]> >();
            var lines          = mdText.Split('\n');

            foreach (var line in lines)
            {
                lineAndPattern.Add(PatternMatcher.GetParagraphType(line));
            }
            bool          OrderedList   = false;
            bool          UnorderedList = false;
            bool          Table         = false;
            List <string> BulletItems   = new List <string>();
            List <string> NumberItems   = new List <string>();
            List <string> TableData     = new List <string>();

            foreach (var line in lineAndPattern)
            {
                Paragraph para;
                switch (line.Key)
                {
                case ParaPattern.OrderedList:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (UnorderedList)
                    {
                        ProcessBullets(engine, BulletItems, false); UnorderedList = false;
                    }
                    OrderedList = true;
                    NumberItems.Add(line.Value[1]);
                    break;

                case ParaPattern.UnorderedList:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (OrderedList)
                    {
                        ProcessBullets(engine, NumberItems, true); OrderedList = false;
                    }
                    UnorderedList = true;
                    BulletItems.Add(line.Value[1]);
                    break;

                case ParaPattern.Image:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (OrderedList || UnorderedList)
                    {
                        ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
                    }
                    para = engine.CreateParagraph();
                    if (line.Value[2].StartsWith("http://") || line.Value[2].StartsWith("https://"))
                    {
                        engine.AddImage(new System.Net.WebClient().OpenRead(line.Value[2]));
                    }
                    else
                    {
                        engine.AddImage(System.IO.File.OpenRead(line.Value[2]));
                    }
                    break;

                case ParaPattern.Table:
                    if (OrderedList || UnorderedList)
                    {
                        ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
                    }
                    Table = true;
                    TableData.Add(line.Value[1]);
                    break;

                case ParaPattern.CodeBlock:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (OrderedList || UnorderedList)
                    {
                        ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
                    }
                    para = engine.CreateParagraph(new ParaProperties()
                    {
                        StyleName = "CodeBlock"
                    });
                    engine.WriteText(para, line.Value[1]);
                    break;

                case ParaPattern.Heading1:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (OrderedList || UnorderedList)
                    {
                        ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
                    }
                    para = engine.CreateParagraph(new ParaProperties()
                    {
                        StyleName = "Heading1"
                    });
                    engine.WriteText(para, line.Value[1]);
                    break;

                case ParaPattern.Heading2:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (OrderedList || UnorderedList)
                    {
                        ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
                    }
                    para = engine.CreateParagraph(new ParaProperties()
                    {
                        StyleName = "Heading2"
                    });
                    engine.WriteText(para, line.Value[1]);
                    break;

                case ParaPattern.Heading3:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (OrderedList || UnorderedList)
                    {
                        ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
                    }
                    para = engine.CreateParagraph(new ParaProperties()
                    {
                        StyleName = "Heading3"
                    });
                    engine.WriteText(para, line.Value[1]);
                    break;

                case ParaPattern.Quote:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (OrderedList || UnorderedList)
                    {
                        ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
                    }
                    para = engine.CreateParagraph(new ParaProperties()
                    {
                        StyleName = "Quote"
                    });
                    engine.WriteText(para, line.Value[1]);
                    break;

                case ParaPattern.CommanBlock:
                default:
                    if (Table)
                    {
                        ProcessTable(engine, TableData); Table = false;
                    }
                    if (OrderedList || UnorderedList)
                    {
                        ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
                    }
                    para = engine.CreateParagraph();
                    FormatText(engine, para, line.Value[1], new FontProperties());
                    //core.WriteText(para, line.Value[1]);
                    break;
                }
            }
            if (Table)
            {
                ProcessTable(engine, TableData); Table = false;
            }
            if (OrderedList || UnorderedList)
            {
                ProcessBullets(engine, OrderedList ? NumberItems : BulletItems, OrderedList); OrderedList = false; UnorderedList = false;
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// This method allows you to rebuild a paragraph without interpreting breaking lines if there is not a double space before.
        /// Produces a counter in order to know how many lines are concatenated, in order to delete it after parsing it
        /// </summary>
        /// <param name="mdText">The text to parse line by line</param>
        /// <param name="pattern">The pattern to detect real new lines. Allows to create a block with same nature (lists, paragraphs...)</param>
        /// <returns></returns>
        private static (int counter, string textBlock) BuildWithoutBreakingLines(string mdText, ParaPattern pattern)
        {
            var lines        = Regex.Split(mdText, "\r\n|\r|\n").ToArray();
            var previousLine = lines.First().TrimStart('>');
            var output       = new StringBuilder(previousLine);
            int count        = 1;

            if (string.IsNullOrEmpty(previousLine))
            {
                return(counter : count, textBlock : output.ToString());
            }

            foreach (var line in lines.Skip(1))
            {
                // Break directly if processed line is empty
                if (string.IsNullOrEmpty(line))
                {
                    break;
                }
                var linePattern = PatternMatcher.GetMarkdownMatch(line).Key;

                // If first pattern is a table, do not break until pattern does not match to any table pattern
                bool isTableContinuing = pattern == ParaPattern.Table && (
                    linePattern == ParaPattern.Table ||
                    linePattern == ParaPattern.TableHeaderSeparation);

                // Quotes are continuing if first pattern is quote
                // And the current one equals to Quote or AnyChar
                bool isQuoteContinuing = pattern == ParaPattern.Quote &&
                                         (linePattern == ParaPattern.Quote ||
                                          linePattern == ParaPattern.AnyChar);

                // Paragraph types are continuing if previous line does not ends with double space
                // And first pattern matches with the current one
                bool isParagraphContinuing = (!previousLine.EndsWith("  ") &&
                                              (pattern == ParaPattern.AnyChar && linePattern == pattern));

                // If list is not continuing, then check if item is detected as a code block
                bool isListContinuing = (pattern == ParaPattern.OrderedList || pattern == ParaPattern.UnorderedList) &&
                                        (linePattern == ParaPattern.OrderedList ||
                                         linePattern == ParaPattern.UnorderedList);
                if ((pattern == ParaPattern.OrderedList || pattern == ParaPattern.UnorderedList) && !isListContinuing && linePattern == ParaPattern.CodeBlock)
                {
                    try
                    {
                        linePattern = PatternMatcher.GetMatchFromPattern(line, ParaPattern.OrderedList).Key;
                    }
                    catch (Exception e)
                    {
                        linePattern = PatternMatcher.GetMatchFromPattern(line, ParaPattern.UnorderedList).Key;
                    }

                    isListContinuing = (linePattern == ParaPattern.OrderedList || linePattern == ParaPattern.UnorderedList);
                }


                if (!isTableContinuing && !isQuoteContinuing && !isParagraphContinuing && !isListContinuing)
                {
                    break;
                }


                if (pattern == ParaPattern.TableHeaderSeparation ||
                    pattern == ParaPattern.Table ||
                    pattern == ParaPattern.OrderedList ||
                    pattern == ParaPattern.UnorderedList ||
                    pattern == ParaPattern.Quote)
                {
                    output.AppendLine().Append(line.TrimStart('>'));
                }
                else
                {
                    output.Append(line);
                }

                previousLine = line;
                count++;
            }
            return(counter : count, textBlock : output.ToString());
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Parse the content, and detect all real lines breaks.
        /// In fact, within markdown it is possible to write a unique paragraph, with line breaks.
        /// If there is not a double space before the line break, it's the same paragraph, and the line break is not interpreted as is.
        ///
        /// So in this parsing method, cut the content by "block" with same nature.
        ///		1| Detect the paragraph type by the start of the line
        ///		2| Process the content by its type
        ///		3| Then remove the processed content, and continue to parse the next content
        ///
        /// A block is detected by a <see cref="ParaPattern"/> which have an associated regex.
        ///
        /// </summary>
        /// <param name="engine">Describe an openXML object by code</param>
        /// <param name="mdText">The markdown text to parse</param>
        internal static void Parse(Md2MlEngine engine, string mdText)
        {
            while (!string.IsNullOrEmpty(mdText))
            {
                var       firstLine      = GetFirstLine(mdText);
                var       matchedPattern = PatternMatcher.GetMarkdownMatch(firstLine);
                Paragraph para;
                (int counter, string textBlock)rebuildText = default;
                switch (matchedPattern.Key)
                {
                case ParaPattern.InfiniteHeading:
                    string titleChars = matchedPattern.Value.Groups[1].Value;
                    int    titleLvl   = titleChars.Count(c => c == '#');
                    titleLvl = titleLvl <= 9 ? titleLvl : 9;
                    para     = engine.CreateParagraph(new ParaProperties()
                    {
                        StyleName = string.Concat("Heading", titleLvl.ToString())
                    });
                    engine.WriteText(para, matchedPattern.Value.Groups[2].Value);
                    mdText = DeleteLines(mdText);
                    continue;

                case ParaPattern.Image:
                    var link = matchedPattern.Value.Groups[2].Value;
                    if (link.StartsWith("http://") || link.StartsWith("https://"))
                    {
                        engine.AddImage(new System.Net.WebClient().OpenRead(link));
                    }
                    else
                    {
                        engine.AddImage(ConvertRelativeToAbsolutePath(link, engine.GetFileDirectory()));
                    }
                    mdText = DeleteLines(mdText);
                    continue;

                case ParaPattern.Table:
                    //case ParaPattern.TableHeaderSeparation:
                    rebuildText = BuildWithoutBreakingLines(mdText, ParaPattern.Table);
                    ProcessTable(engine, rebuildText.textBlock);
                    mdText = DeleteLines(mdText, rebuildText.counter);
                    continue;

                case ParaPattern.OrderedList:
                case ParaPattern.UnorderedList:
                    // TODO : Ordered list can contain unordered items inside and vice versa
                    rebuildText = BuildWithoutBreakingLines(mdText, ParaPattern.OrderedList);
                    ProcessBullets(engine, rebuildText.textBlock);
                    mdText = DeleteLines(mdText, rebuildText.counter);
                    continue;

                case ParaPattern.CodeBlock:
                    // TODO : Improve the rendering - not a priority for my needs
                    rebuildText = BuildWithoutBreakingLines(mdText, ParaPattern.CodeBlock);
                    para        = engine.CreateParagraph(new ParaProperties()
                    {
                        StyleName = DocStyles.CodeBlock.ToDescriptionString()
                    });
                    FormatText(engine, para, rebuildText.textBlock, new StyleProperties());
                    mdText = DeleteLines(mdText, rebuildText.counter);
                    continue;

                case ParaPattern.Quote:
                    // Markdown supports nested quotes, but word does not
                    // So whatever, put the "nested" paragraph in the same quote
                    rebuildText = BuildWithoutBreakingLines(mdText, ParaPattern.Quote);
                    para        = engine.CreateParagraph(new ParaProperties()
                    {
                        StyleName = DocStyles.Quote.ToDescriptionString()
                    });
                    foreach (var text in Regex.Split(rebuildText.textBlock, "\r\n|\r|\n"))
                    {
                        para.AppendChild(new Break());
                        FormatText(engine, para, text, new StyleProperties());
                    }
                    mdText = DeleteLines(mdText, rebuildText.counter);
                    continue;

                case ParaPattern.AnyChar:
                default:
                    rebuildText = BuildWithoutBreakingLines(mdText, matchedPattern.Key);
                    para        = engine.CreateParagraph();
                    FormatText(engine, para, rebuildText.textBlock, new StyleProperties());
                    // engine.WriteText(para, text.textBlock);
                    mdText = DeleteLines(mdText, rebuildText.counter);
                    continue;
                }
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Format a paragraph which could contains some text styles, Bold, Italics, Images and so on...
        /// Split the markdown for each pattern found. Then append correctly that text or image into the same paragraph.
        /// </summary>
        /// <param name="core">The openXML object with a document, a body and a paragraph</param>
        /// <param name="paragraph">The Paragraph object previously created</param>
        /// <param name="markdown">The string to be processed</param>
        /// <param name="fontProperties">Style properties to apply to the text</param>
        internal static void FormatText(Md2MlEngine core, Paragraph paragraph, string markdown,
                                        StyleProperties fontProperties)
        {
            var hasPattern = PatternMatcher.HasPatterns(markdown);

            while (hasPattern)
            {
                var s = PatternMatcher.GetPatternsAndNonPatternText(markdown);
                var newFontProperties = new StyleProperties();

                switch (s.Key)
                {
                case StylePattern.BoldAndItalic:
                    newFontProperties.Bold   = true;
                    newFontProperties.Italic = true;
                    FormatText(core, paragraph, s.Value[0], new StyleProperties());
                    FormatText(core, paragraph, s.Value[1], newFontProperties);
                    FormatText(core, paragraph, FramePendingString(s.Value, "***"), new StyleProperties());
                    break;

                case StylePattern.Bold:
                    newFontProperties.Bold = true;
                    FormatText(core, paragraph, s.Value[0], new StyleProperties());
                    FormatText(core, paragraph, s.Value[1], newFontProperties);
                    FormatText(core, paragraph, FramePendingString(s.Value, "**"), new StyleProperties());
                    break;

                case StylePattern.Italic:
                    newFontProperties.Italic = true;
                    FormatText(core, paragraph, s.Value[0], new StyleProperties());
                    FormatText(core, paragraph, s.Value[1], newFontProperties);
                    FormatText(core, paragraph, FramePendingString(s.Value, "*"), new StyleProperties());
                    break;

                case StylePattern.MonospaceOrCode:
                    newFontProperties.StyleName = DocStyles.CodeReference.ToDescriptionString();
                    FormatText(core, paragraph, s.Value[0], new StyleProperties());
                    FormatText(core, paragraph, s.Value[1], newFontProperties);
                    FormatText(core, paragraph, FramePendingString(s.Value, "`"), new StyleProperties());
                    break;

                case StylePattern.Strikethrough:
                    newFontProperties.Strikeout = true;
                    FormatText(core, paragraph, s.Value[0], new StyleProperties());
                    FormatText(core, paragraph, s.Value[1], newFontProperties);
                    FormatText(core, paragraph, FramePendingString(s.Value, "~~"), new StyleProperties());
                    break;

                case StylePattern.Image:
                    var regex = PatternMatcher.GetStyleMatch(s.Value[1]);
                    FormatText(core, paragraph, s.Value[0], new StyleProperties());
                    core.AddImage(ConvertRelativeToAbsolutePath(regex.Value.Groups[2].Value, core.GetFileDirectory()), paragraph);
                    FormatText(core, paragraph, FramePendingString(s.Value, ""), new StyleProperties());
                    break;

                case StylePattern.Underline:
                    newFontProperties.Underline = UnderlineValues.Single;
                    FormatText(core, paragraph, s.Value[0], new StyleProperties());
                    FormatText(core, paragraph, s.Value[1], newFontProperties);
                    FormatText(core, paragraph, FramePendingString(s.Value, "__"), new StyleProperties());
                    break;
                }
                return;
            }
            core.WriteText(paragraph, markdown, fontProperties);
        }