public static async Task <(WikiSite site, string title)> ResolveSiteAndTitleAsync(string expr, IWikiFamily wikiFamily)
    {
        if (string.IsNullOrEmpty(expr))
        {
            throw new ArgumentException("Value cannot be null or empty.", nameof(expr));
        }
        var parts = expr.Split(':', 2);
        var site  = await wikiFamily.GetSiteAsync(parts[0]);

        return(site, parts[1]);
예제 #2
0
        private static async Task <Tuple <string, string, string> > TitlePartitionAsync(WikiSite site, IWikiFamily family, string rawTitle, int defaultNamespace)
        {
            // Tuple<interwiki, namespace, title>
            Debug.Assert(site != null);
            Debug.Assert(rawTitle != null);
            var title = rawTitle;

            if (title.Length == 0)
            {
                goto EMPTY_TITLE;
            }
            var state = 0;

            /*
             * state  accepts
             * 0      LeadingBlank
             * 1      Namespace / Interwiki
             * 2      Page title
             */
            string interwiki = null, nsname = null, pagetitle = null;

            while (title != null)
            {
                var parts = title.Split(new[] { ':' }, 2);
                var part  = parts[0].Trim(' ', '_');
                switch (state)
                {
                case 0:
                    if (part.Length > 0)
                    {
                        goto case 1;
                    }
                    // Initial colon indicates main namespace rather than default.
                    nsname = "";
                    state  = 1;
                    break;

                case 1:
                    // Make sure there's a colon ahead; otherwise we just treat it as a normal title.
                    if (parts.Length == 1)
                    {
                        goto case 2;
                    }
                    NamespaceInfo ns;
                    if (site.Namespaces.TryGetValue(part, out ns))
                    {
                        // This is a namespace name.
                        nsname = ns.CustomName;
                        state  = 2;
                    }
                    else
                    {
                        if (family != null)
                        {
                            var normalizedPart = family.TryNormalize(part);
                            if (normalizedPart != null)
                            {
                                var nextSite = await family.GetSiteAsync(part);

                                if (nextSite != null)
                                {
                                    // We have bumped into another wiki, hooray!
                                    interwiki = normalizedPart;
                                    site      = nextSite;
                                    // state will still be 1, to parse namespace or other interwikis (rare)
                                }
                            }
                        }
                        else if (site.InterwikiMap.Contains(part))
                        {
                            // Otherwise, check whether this is an interwiki prefix.
                            interwiki = part.ToLowerInvariant();
                            // For interwiki, we do not parse namespace name.
                            // Instead, we treat it as a part of page title.
                            nsname = null;
                            state  = 2;
                        }
                        else
                        {
                            // So this is only the beginning of a normal title.
                            goto case 2;
                        }
                    }
                    break;

                case 2:
                    pagetitle = Utility.NormalizeTitlePart(title, site.SiteInfo.IsTitleCaseSensitive);
                    goto END_OF_PARSING;
                }
                title = parts[1];
            }
END_OF_PARSING:
            Debug.Assert(pagetitle != null, "pagetitle != null");
            if (pagetitle.Length == 0)
            {
                goto EMPTY_TITLE;
            }
            // nsname == null means that the expression has interwiki prefix, while family == null
            if (nsname == null && interwiki == null)
            {
                nsname = site.Namespaces[defaultNamespace].CustomName;
            }
            return(Tuple.Create(interwiki, nsname, pagetitle));

EMPTY_TITLE:
            throw new ArgumentException($"The title \"{rawTitle}\" does not contain page title.");
        }
예제 #3
0
        private static async Task <WikiLink> ParseInternalAsync(WikiSite site, IWikiFamily family, string text, int defaultNamespaceId, bool exceptionOnFailure)
        {
            if (site == null)
            {
                throw new ArgumentNullException(nameof(site));
            }
            if (text == null)
            {
                throw new ArgumentNullException(nameof(text));
            }
            var link = new WikiLink(site, text);
            //preprocess text (these changes aren't site-dependent)
            //First remove anchor, which is stored unchanged, if there is one
            var parts = text.Split(new[] { '|' }, 2);
            var title = parts[0];

            link.Anchor = parts.Length > 1 ? parts[1] : null;
            //This code was adapted from Title.php : secureAndSplit()
            if (title.IndexOf('\ufffd') >= 0)
            {
                if (exceptionOnFailure)
                {
                    throw new ArgumentException("Title contains illegal char (\\uFFFD 'REPLACEMENT CHARACTER')",
                                                nameof(text));
                }
                return(null);
            }
            parts        = title.Split(new[] { '#' }, 2);
            title        = parts[0];
            link.Section = parts.Length > 1 ? parts[1] : null;
            var match = IllegalTitlesPattern.Match(title);

            if (match.Success)
            {
                if (exceptionOnFailure)
                {
                    throw new ArgumentException($"Title contains illegal char sequence: {match.Value} .");
                }
                return(null);
            }
            //Parse title parts.
            var parsedTitle = await TitlePartitionAsync(site, family, title, defaultNamespaceId);

            link.InterwikiPrefix = parsedTitle.Item1;
            link.NamespaceName   = parsedTitle.Item2;
            link.Title           = parsedTitle.Item3;
            link.FullTitle       = link.Title;
            if (link.InterwikiPrefix == null)
            {
                link.TargetSite = link.Site;
            }
            else if (family != null)
            {
                link.TargetSite = await family.GetSiteAsync(link.InterwikiPrefix);

                Debug.Assert(link.TargetSite != null);
            }
            else
            {
                // If we do not have wiki family information, and there IS an interwiki prefix,
                // subsequent namespace will not be parsed and will be left as a part of Name
                Debug.Assert(parsedTitle.Item2 == null);
            }
            link.Namespace = parsedTitle.Item2 == null ? null : link.TargetSite.Namespaces[parsedTitle.Item2];
            //Format expression.
            var sb = new StringBuilder();

            if (link.InterwikiPrefix != null)
            {
                sb.Append(link.InterwikiPrefix);
                sb.Append(':');
            }
            if (!string.IsNullOrEmpty(link.NamespaceName))
            {
                sb.Append(link.NamespaceName);
                sb.Append(':');
                link.FullTitle = link.NamespaceName + ":" + link.Title;
            }
            sb.Append(link.Title);
            if (link.Section != null)
            {
                sb.Append('#');
                sb.Append(link.Section);
            }
            if (link.Anchor != null)
            {
                sb.Append('|');
                sb.Append(link.Anchor);
            }
            link._FormattedText = sb.ToString();
            return(link);
        }
예제 #4
0
        private static async Task <Tuple <WikiSite, string, string, string> > TitlePartitionAsync(WikiSite site, IWikiFamily family, string rawTitle, int defaultNamespace)
        {
            // Tuple<interwiki, namespace, title, targetSite>
            Debug.Assert(site != null || family != null);
            Debug.Assert(rawTitle != null);
            var title = rawTitle;

            if (title.Length == 0)
            {
                return(null);
            }
            var state = 0;

            /*
             * state  accepts
             * 0      LeadingBlank
             * 1      Namespace / Interwiki
             * 2      Page title
             */
            string interwiki = null, nsname = null, pagetitle = null;

            while (title != null)
            {
                var parts = title.Split(new[] { ':' }, 2);
                var part  = parts[0].Trim(' ', '_');
                switch (state)
                {
                case 0:
                    if (part.Length > 0)
                    {
                        goto case 1;
                    }
                    // Initial colon indicates main namespace rather than default.
                    nsname = "";
                    state  = 1;
                    break;

                case 1:
                    // Make sure there's a colon ahead; otherwise we just treat it as a normal title.
                    if (parts.Length == 1)
                    {
                        goto case 2;
                    }
                    string normalizedInterwikiPrefix;
                    if (site != null && site.Namespaces.TryGetValue(part, out var ns))
                    {
                        // This is a namespace name.
                        nsname = ns.CustomName;
                        state  = 2;
                    }
                    else if (family != null && (normalizedInterwikiPrefix = family.TryNormalize(part)) != null)
                    {
                        // This is a known prefix in the specified WikiFamily.
                        var nextSite = await family.GetSiteAsync(part);

                        if (nextSite == null)
                        {
                            Debug.Assert(false, $"{family} returned null for prefix: {normalizedInterwikiPrefix}. " +
                                         "IWikiFamily.TryNormalize should return null for in-existent interwiki prefixes.");
                        }
                        else
                        {
                            // We have bumped into another wiki, hooray!
                            interwiki = normalizedInterwikiPrefix;
                            site      = nextSite;
                            // state will still be 1, to parse namespace or other interwikis (rare)
                        }
                    }
                    else if (site != null && site.InterwikiMap.Contains(part))
                    {
                        // Otherwise, check whether this is an interwiki prefix.
                        interwiki = part.ToLowerInvariant();
                        // For interwiki, we do not parse namespace name.
                        // Instead, we treat it as a part of page title.
                        nsname = null;
                        state  = 2;
                    }
                    else
                    {
                        // So this is only the beginning of a normal title.
                        goto case 2;
                    }
                    break;

                case 2:
                    pagetitle = Utility.NormalizeTitlePart(title, site?.SiteInfo.IsTitleCaseSensitive ?? true);
                    goto END_OF_PARSING;
                }
                title = parts[1];
            }
END_OF_PARSING:
            Debug.Assert(pagetitle != null, "pagetitle != null");
            if (pagetitle.Length == 0)
            {
                return(null);
            }
            // nsname == null but interwiki != null means that the expression has interwiki prefix, while family == null
            if (nsname == null && interwiki == null)
            {
                // If site is (still) null, we will have error reported in the caller.
                nsname = site?.Namespaces[defaultNamespace].CustomName;
            }
            return(Tuple.Create(site, interwiki, nsname, pagetitle));
        }