Example #1
0
 /// <inheritdoc cref="ParseAsync(WikiSite,IWikiFamily,string,int)"/>
 /// <summary>
 /// Parses a new instance from the a Wikilink expression on the specified Wiki family.
 /// This overload resolves the target interwiki site with the interwiki prefixes provided
 /// <seealso cref="IWikiFamily"/> instance, and requires <paramref name="text"/> to have interwiki prefix.
 /// </summary>
 public static Task <WikiLink> ParseAsync(IWikiFamily family, string text, int defaultNamespaceId)
 {
     if (family == null)
     {
         throw new ArgumentNullException(nameof(family));
     }
     return(ParseAsync(null, family, text, defaultNamespaceId));
 }
Example #2
0
 /// <summary>
 /// Parses a new instance using specified Wikilink expression.
 /// </summary>
 /// <param name="site">Site instance.</param>
 /// <param name="family">Wiki family. You need to provide this argument if you want to parse into interwiki links.</param>
 /// <param name="text">Wikilink expression, without square brackets.</param>
 /// <param name="defaultNamespaceId">Id of default namespace. See <see cref="BuiltInNamespaces"/> for a list of possible values.</param>
 /// <exception cref="ArgumentNullException">Either <paramref name="site"/> or <paramref name="text"/> is <c>null</c>.</exception>
 /// <exception cref="ArgumentException"><paramref name="text"/> does not contain a valid page title.</exception>
 public static Task <WikiLink> ParseAsync(WikiSite site, IWikiFamily family, string text, int defaultNamespaceId)
 {
     return(ParseInternalAsync(site, family, text, defaultNamespaceId, true));
 }
Example #3
0
 /// <summary>
 /// Parses a new instance using specified Wikilink expression.
 /// </summary>
 /// <param name="site">Site instance.</param>
 /// <param name="family">Wiki family. You need to provide this argument if you want to parse into interwiki links.</param>
 /// <param name="text">Wikilink expression, without square brackets.</param>
 /// <exception cref="ArgumentNullException">Either <paramref name="site"/> or <paramref name="text"/> is <c>null</c>.</exception>
 /// <exception cref="ArgumentException"><paramref name="text"/> does not contain a valid page title.</exception>
 public static Task <WikiLink> ParseAsync(WikiSite site, IWikiFamily family, string text)
 {
     return(ParseAsync(site, family, text, 0));
 }
Example #4
0
        private static async Task <Tuple <string, string, string> > TitlePartitionAsync(WikiSite site, IWikiFamily family, string rawTitle, int defaultNamespace)
        {
            // Tuple<interwiki, namespace, title>
            Debug.Assert(site != null);
            Debug.Assert(rawTitle != null);
            var title = rawTitle;

            if (title.Length == 0)
            {
                goto EMPTY_TITLE;
            }
            var state = 0;

            /*
             * state  accepts
             * 0      LeadingBlank
             * 1      Namespace / Interwiki
             * 2      Page title
             */
            string interwiki = null, nsname = null, pagetitle = null;

            while (title != null)
            {
                var parts = title.Split(new[] { ':' }, 2);
                var part  = parts[0].Trim(' ', '_');
                switch (state)
                {
                case 0:
                    if (part.Length > 0)
                    {
                        goto case 1;
                    }
                    // Initial colon indicates main namespace rather than default.
                    nsname = "";
                    state  = 1;
                    break;

                case 1:
                    // Make sure there's a colon ahead; otherwise we just treat it as a normal title.
                    if (parts.Length == 1)
                    {
                        goto case 2;
                    }
                    NamespaceInfo ns;
                    if (site.Namespaces.TryGetValue(part, out ns))
                    {
                        // This is a namespace name.
                        nsname = ns.CustomName;
                        state  = 2;
                    }
                    else
                    {
                        if (family != null)
                        {
                            var normalizedPart = family.TryNormalize(part);
                            if (normalizedPart != null)
                            {
                                var nextSite = await family.GetSiteAsync(part);

                                if (nextSite != null)
                                {
                                    // We have bumped into another wiki, hooray!
                                    interwiki = normalizedPart;
                                    site      = nextSite;
                                    // state will still be 1, to parse namespace or other interwikis (rare)
                                }
                            }
                        }
                        else if (site.InterwikiMap.Contains(part))
                        {
                            // Otherwise, check whether this is an interwiki prefix.
                            interwiki = part.ToLowerInvariant();
                            // For interwiki, we do not parse namespace name.
                            // Instead, we treat it as a part of page title.
                            nsname = null;
                            state  = 2;
                        }
                        else
                        {
                            // So this is only the beginning of a normal title.
                            goto case 2;
                        }
                    }
                    break;

                case 2:
                    pagetitle = Utility.NormalizeTitlePart(title, site.SiteInfo.IsTitleCaseSensitive);
                    goto END_OF_PARSING;
                }
                title = parts[1];
            }
END_OF_PARSING:
            Debug.Assert(pagetitle != null, "pagetitle != null");
            if (pagetitle.Length == 0)
            {
                goto EMPTY_TITLE;
            }
            // nsname == null means that the expression has interwiki prefix, while family == null
            if (nsname == null && interwiki == null)
            {
                nsname = site.Namespaces[defaultNamespace].CustomName;
            }
            return(Tuple.Create(interwiki, nsname, pagetitle));

EMPTY_TITLE:
            throw new ArgumentException($"The title \"{rawTitle}\" does not contain page title.");
        }
Example #5
0
        private static async Task <WikiLink> ParseInternalAsync(WikiSite site, IWikiFamily family, string text, int defaultNamespaceId, bool exceptionOnFailure)
        {
            if (site == null)
            {
                throw new ArgumentNullException(nameof(site));
            }
            if (text == null)
            {
                throw new ArgumentNullException(nameof(text));
            }
            var link = new WikiLink(site, text);
            //preprocess text (these changes aren't site-dependent)
            //First remove anchor, which is stored unchanged, if there is one
            var parts = text.Split(new[] { '|' }, 2);
            var title = parts[0];

            link.Anchor = parts.Length > 1 ? parts[1] : null;
            //This code was adapted from Title.php : secureAndSplit()
            if (title.IndexOf('\ufffd') >= 0)
            {
                if (exceptionOnFailure)
                {
                    throw new ArgumentException("Title contains illegal char (\\uFFFD 'REPLACEMENT CHARACTER')",
                                                nameof(text));
                }
                return(null);
            }
            parts        = title.Split(new[] { '#' }, 2);
            title        = parts[0];
            link.Section = parts.Length > 1 ? parts[1] : null;
            var match = IllegalTitlesPattern.Match(title);

            if (match.Success)
            {
                if (exceptionOnFailure)
                {
                    throw new ArgumentException($"Title contains illegal char sequence: {match.Value} .");
                }
                return(null);
            }
            //Parse title parts.
            var parsedTitle = await TitlePartitionAsync(site, family, title, defaultNamespaceId);

            link.InterwikiPrefix = parsedTitle.Item1;
            link.NamespaceName   = parsedTitle.Item2;
            link.Title           = parsedTitle.Item3;
            link.FullTitle       = link.Title;
            if (link.InterwikiPrefix == null)
            {
                link.TargetSite = link.Site;
            }
            else if (family != null)
            {
                link.TargetSite = await family.GetSiteAsync(link.InterwikiPrefix);

                Debug.Assert(link.TargetSite != null);
            }
            else
            {
                // If we do not have wiki family information, and there IS an interwiki prefix,
                // subsequent namespace will not be parsed and will be left as a part of Name
                Debug.Assert(parsedTitle.Item2 == null);
            }
            link.Namespace = parsedTitle.Item2 == null ? null : link.TargetSite.Namespaces[parsedTitle.Item2];
            //Format expression.
            var sb = new StringBuilder();

            if (link.InterwikiPrefix != null)
            {
                sb.Append(link.InterwikiPrefix);
                sb.Append(':');
            }
            if (!string.IsNullOrEmpty(link.NamespaceName))
            {
                sb.Append(link.NamespaceName);
                sb.Append(':');
                link.FullTitle = link.NamespaceName + ":" + link.Title;
            }
            sb.Append(link.Title);
            if (link.Section != null)
            {
                sb.Append('#');
                sb.Append(link.Section);
            }
            if (link.Anchor != null)
            {
                sb.Append('|');
                sb.Append(link.Anchor);
            }
            link._FormattedText = sb.ToString();
            return(link);
        }
Example #6
0
 /// <inheritdoc cref="ParseAsync(IWikiFamily,string,int)"/>
 public static Task <WikiLink> ParseAsync(IWikiFamily family, string text)
 {
     return(ParseAsync(family, text, 0));
 }
Example #7
0
        private static async Task <Tuple <WikiSite, string, string, string> > TitlePartitionAsync(WikiSite site, IWikiFamily family, string rawTitle, int defaultNamespace)
        {
            // Tuple<interwiki, namespace, title, targetSite>
            Debug.Assert(site != null || family != null);
            Debug.Assert(rawTitle != null);
            var title = rawTitle;

            if (title.Length == 0)
            {
                return(null);
            }
            var state = 0;

            /*
             * state  accepts
             * 0      LeadingBlank
             * 1      Namespace / Interwiki
             * 2      Page title
             */
            string interwiki = null, nsname = null, pagetitle = null;

            while (title != null)
            {
                var parts = title.Split(new[] { ':' }, 2);
                var part  = parts[0].Trim(' ', '_');
                switch (state)
                {
                case 0:
                    if (part.Length > 0)
                    {
                        goto case 1;
                    }
                    // Initial colon indicates main namespace rather than default.
                    nsname = "";
                    state  = 1;
                    break;

                case 1:
                    // Make sure there's a colon ahead; otherwise we just treat it as a normal title.
                    if (parts.Length == 1)
                    {
                        goto case 2;
                    }
                    string normalizedInterwikiPrefix;
                    if (site != null && site.Namespaces.TryGetValue(part, out var ns))
                    {
                        // This is a namespace name.
                        nsname = ns.CustomName;
                        state  = 2;
                    }
                    else if (family != null && (normalizedInterwikiPrefix = family.TryNormalize(part)) != null)
                    {
                        // This is a known prefix in the specified WikiFamily.
                        var nextSite = await family.GetSiteAsync(part);

                        if (nextSite == null)
                        {
                            Debug.Assert(false, $"{family} returned null for prefix: {normalizedInterwikiPrefix}. " +
                                         "IWikiFamily.TryNormalize should return null for in-existent interwiki prefixes.");
                        }
                        else
                        {
                            // We have bumped into another wiki, hooray!
                            interwiki = normalizedInterwikiPrefix;
                            site      = nextSite;
                            // state will still be 1, to parse namespace or other interwikis (rare)
                        }
                    }
                    else if (site != null && site.InterwikiMap.Contains(part))
                    {
                        // Otherwise, check whether this is an interwiki prefix.
                        interwiki = part.ToLowerInvariant();
                        // For interwiki, we do not parse namespace name.
                        // Instead, we treat it as a part of page title.
                        nsname = null;
                        state  = 2;
                    }
                    else
                    {
                        // So this is only the beginning of a normal title.
                        goto case 2;
                    }
                    break;

                case 2:
                    pagetitle = Utility.NormalizeTitlePart(title, site?.SiteInfo.IsTitleCaseSensitive ?? true);
                    goto END_OF_PARSING;
                }
                title = parts[1];
            }
END_OF_PARSING:
            Debug.Assert(pagetitle != null, "pagetitle != null");
            if (pagetitle.Length == 0)
            {
                return(null);
            }
            // nsname == null but interwiki != null means that the expression has interwiki prefix, while family == null
            if (nsname == null && interwiki == null)
            {
                // If site is (still) null, we will have error reported in the caller.
                nsname = site?.Namespaces[defaultNamespace].CustomName;
            }
            return(Tuple.Create(site, interwiki, nsname, pagetitle));
        }
Example #8
0
        private static async Task <WikiLink> ParseInternalAsync(WikiSite site, IWikiFamily family, string text, int defaultNamespaceId, bool exceptionOnFailure)
        {
            if (site == null && family == null)
            {
                throw new ArgumentNullException(nameof(site) + "/" + nameof(family));
            }
            if (text == null)
            {
                throw new ArgumentNullException(nameof(text));
            }
            //preprocess text (these changes aren't site-dependent)
            //First remove anchor, which is stored unchanged, if there is one
            var pipePos = text.IndexOf('|');
            var title   = pipePos >= 0 ? text.Substring(0, pipePos) : text;
            var anchor  = pipePos >= 0 ? text.Substring(pipePos + 1) : null;

            //This code was adapted from Title.php : secureAndSplit()
            if (title.IndexOf('\ufffd') >= 0)
            {
                if (exceptionOnFailure)
                {
                    throw new ArgumentException(string.Format(Prompts.ExceptionTitleIllegalCharacter1, "\uFFFD 'REPLACEMENT CHARACTER'"), nameof(text));
                }
                return(null);
            }
            var hashPos = title.IndexOf('#');
            var section = hashPos >= 0 ? title.Substring(hashPos + 1) : null;

            title = hashPos >= 0 ? title.Substring(0, hashPos) : title;
            var match = IllegalTitlesPattern.Match(title);

            if (match.Success)
            {
                if (exceptionOnFailure)
                {
                    throw new ArgumentException(string.Format(Prompts.ExceptionTitleIllegalCharacterSequence1, match.Value));
                }
                return(null);
            }
            //Parse title parts.
            var parsedTitle = await TitlePartitionAsync(site, family, title, defaultNamespaceId);

            if (parsedTitle == null)
            {
                if (exceptionOnFailure)
                {
                    throw new ArgumentException(string.Format(Prompts.ExceptionTitleIsEmpty1, title));
                }
                return(null);
            }
            var targetSite      = parsedTitle.Item1;
            var interwikiPrefix = parsedTitle.Item2;
            var nsPrefix        = parsedTitle.Item3;
            var localTitle      = parsedTitle.Item4;

            if (targetSite == null)
            {
                if (interwikiPrefix == null)
                {
                    // No interwiki prefix
                    // This means we are parsing WikiLink without originating WikiSite.
                    Debug.Assert(site == null);
                    if (exceptionOnFailure)
                    {
                        throw new ArgumentException(Prompts.ExceptionWikiLinkRequireInterwikiPrefix, nameof(text));
                    }
                    return(null);
                }
                // If we do not have wiki family information, and there IS an interwiki prefix,
                // subsequent namespace will not be parsed and will be left as a part of Name
                Debug.Assert(nsPrefix == null);
                Debug.Assert(localTitle != null);
            }
            var link = new WikiLink(site, text)
            {
                Anchor          = anchor,
                Section         = section,
                InterwikiPrefix = interwikiPrefix,
                NamespaceName   = nsPrefix,
                Title           = localTitle,
                FullTitle       = localTitle,
                TargetSite      = targetSite
            };

            link.Namespace = nsPrefix != null && link.TargetSite != null
                ? link.TargetSite.Namespaces[nsPrefix]
                : null;
            //Format expression.
            var sb = new StringBuilder();

            if (link.InterwikiPrefix != null)
            {
                sb.Append(link.InterwikiPrefix);
                sb.Append(':');
            }
            var fullTitleStartsAt = sb.Length;

            if (!string.IsNullOrEmpty(link.NamespaceName))
            {
                sb.Append(link.NamespaceName);
                sb.Append(':');
                link.FullTitle = link.NamespaceName + ":" + link.Title;
            }
            sb.Append(link.Title);
            if (link.Section != null)
            {
                sb.Append('#');
                sb.Append(link.Section);
            }
            link.Target = sb.ToString();
            link.FullTitleAndSection = fullTitleStartsAt == 0 ? link.Target : sb.ToString(fullTitleStartsAt, sb.Length - fullTitleStartsAt);
            if (link.Anchor != null)
            {
                sb.Append('|');
                sb.Append(link.Anchor);
            }
            link._FormattedText = sb.ToString();
            return(link);
        }
    public static async Task <(WikiSite site, string title)> ResolveSiteAndTitleAsync(string expr, IWikiFamily wikiFamily)
    {
        if (string.IsNullOrEmpty(expr))
        {
            throw new ArgumentException("Value cannot be null or empty.", nameof(expr));
        }
        var parts = expr.Split(':', 2);
        var site  = await wikiFamily.GetSiteAsync(parts[0]);

        return(site, parts[1]);