Пример #1
0
        /// <summary>
        /// Derive the structure of a Wikimedia page by its sections, subsections, and down the hierarchy.
        /// </summary>
        /// <param name="title">The title of the section being parsed</param>
        /// <param name="markup">The Wikimedia markup to be parsed</param>
        /// <param name="level">The numeric level of the hierarchy (eg, "==History==" is at the second level)</param>
        /// <returns>A tree of WikiSections</returns>
        internal WikiSection ParseSection(string title, string markup, int level = 2 /* language starts at two dashes (eg, '==English==') */)
        {
            const string RawPattern = "^={{{0}}}([^=]+?)={{{0}}}[\r\n]+";
            var          pattern    = string.Format(RawPattern, level);

            var matches = Regex.Matches(markup, pattern, RegexOptions.Multiline);

            if (matches.Count == 0)
            {
                return(new WikiSection
                {
                    SectionName = title,
                    Content = markup,
                    SubSections = new List <WikiSection>()
                });
            }

            var section = new WikiSection
            {
                SectionName = title,
                SubSections = new List <WikiSection>()
            };

            if (matches[0].Index > 0)
            {
                section.Content = markup.Substring(0, matches[0].Index);
            }

            for (var i = 0; i < matches.Count; i++)
            {
                var subsectionName = matches[i].Groups[1].Value;

                var contentStartIndex = matches[i].Length + matches[i].Index;

                var subsectionContent = i == matches.Count - 1
                    ? markup.Substring(contentStartIndex)                                            // last match, just take everything else
                    : markup.Substring(contentStartIndex, matches[i + 1].Index - contentStartIndex); // take everything from this match (after the heading) to the beginning of the next match

                section.SubSections.Add(ParseSection(subsectionName, subsectionContent, level + 1));
            }

            return(section);
        }
Пример #2
0
        /// <summary>
        /// Derive the structure of a Wikimedia page by its sections, subsections, and down the hierarchy.
        /// </summary>
        /// <param name="title">The title of the section being parsed</param>
        /// <param name="markup">The Wikimedia markup to be parsed</param>
        /// <param name="level">The numeric level of the hierarchy (eg, "==History==" is at the second level)</param>
        /// <returns>A tree of WikiSections</returns>
        internal WikiSection ParseSection(string title, string markup, int level = 2 /* language starts at two dashes (eg, '==English==') */ )
        {
            const string RawPattern = "^={{{0}}}([^=]+?)={{{0}}}[\r\n]+";
            var pattern = string.Format(RawPattern, level);

            var matches = Regex.Matches(markup, pattern, RegexOptions.Multiline);

            if (matches.Count == 0)
            {
                return new WikiSection
                {
                    SectionName = title,
                    Content = markup,
                    SubSections = new List<WikiSection>()
                };
            }

            var section = new WikiSection
            {
                SectionName = title,
                SubSections = new List<WikiSection>()
            };

            if (matches[0].Index > 0)
            {
                section.Content = markup.Substring(0, matches[0].Index);
            }

            for (var i = 0; i < matches.Count; i++)
            {
                var subsectionName = matches[i].Groups[1].Value;

                var contentStartIndex = matches[i].Length + matches[i].Index;

                var subsectionContent = i == matches.Count - 1
                    ? markup.Substring(contentStartIndex) // last match, just take everything else
                    : markup.Substring(contentStartIndex, matches[i + 1].Index - contentStartIndex); // take everything from this match (after the heading) to the beginning of the next match

                section.SubSections.Add(ParseSection(subsectionName, subsectionContent, level + 1));
            }

            return section;
        }