public static async Task <(WikiSite site, string title)> ResolveSiteAndTitleAsync(string expr, IWikiFamily wikiFamily) { if (string.IsNullOrEmpty(expr)) { throw new ArgumentException("Value cannot be null or empty.", nameof(expr)); } var parts = expr.Split(':', 2); var site = await wikiFamily.GetSiteAsync(parts[0]); return(site, parts[1]);
private static async Task <Tuple <string, string, string> > TitlePartitionAsync(WikiSite site, IWikiFamily family, string rawTitle, int defaultNamespace) { // Tuple<interwiki, namespace, title> Debug.Assert(site != null); Debug.Assert(rawTitle != null); var title = rawTitle; if (title.Length == 0) { goto EMPTY_TITLE; } var state = 0; /* * state accepts * 0 LeadingBlank * 1 Namespace / Interwiki * 2 Page title */ string interwiki = null, nsname = null, pagetitle = null; while (title != null) { var parts = title.Split(new[] { ':' }, 2); var part = parts[0].Trim(' ', '_'); switch (state) { case 0: if (part.Length > 0) { goto case 1; } // Initial colon indicates main namespace rather than default. nsname = ""; state = 1; break; case 1: // Make sure there's a colon ahead; otherwise we just treat it as a normal title. if (parts.Length == 1) { goto case 2; } NamespaceInfo ns; if (site.Namespaces.TryGetValue(part, out ns)) { // This is a namespace name. nsname = ns.CustomName; state = 2; } else { if (family != null) { var normalizedPart = family.TryNormalize(part); if (normalizedPart != null) { var nextSite = await family.GetSiteAsync(part); if (nextSite != null) { // We have bumped into another wiki, hooray! interwiki = normalizedPart; site = nextSite; // state will still be 1, to parse namespace or other interwikis (rare) } } } else if (site.InterwikiMap.Contains(part)) { // Otherwise, check whether this is an interwiki prefix. interwiki = part.ToLowerInvariant(); // For interwiki, we do not parse namespace name. // Instead, we treat it as a part of page title. nsname = null; state = 2; } else { // So this is only the beginning of a normal title. goto case 2; } } break; case 2: pagetitle = Utility.NormalizeTitlePart(title, site.SiteInfo.IsTitleCaseSensitive); goto END_OF_PARSING; } title = parts[1]; } END_OF_PARSING: Debug.Assert(pagetitle != null, "pagetitle != null"); if (pagetitle.Length == 0) { goto EMPTY_TITLE; } // nsname == null means that the expression has interwiki prefix, while family == null if (nsname == null && interwiki == null) { nsname = site.Namespaces[defaultNamespace].CustomName; } return(Tuple.Create(interwiki, nsname, pagetitle)); EMPTY_TITLE: throw new ArgumentException($"The title \"{rawTitle}\" does not contain page title."); }
private static async Task <WikiLink> ParseInternalAsync(WikiSite site, IWikiFamily family, string text, int defaultNamespaceId, bool exceptionOnFailure) { if (site == null) { throw new ArgumentNullException(nameof(site)); } if (text == null) { throw new ArgumentNullException(nameof(text)); } var link = new WikiLink(site, text); //preprocess text (these changes aren't site-dependent) //First remove anchor, which is stored unchanged, if there is one var parts = text.Split(new[] { '|' }, 2); var title = parts[0]; link.Anchor = parts.Length > 1 ? parts[1] : null; //This code was adapted from Title.php : secureAndSplit() if (title.IndexOf('\ufffd') >= 0) { if (exceptionOnFailure) { throw new ArgumentException("Title contains illegal char (\\uFFFD 'REPLACEMENT CHARACTER')", nameof(text)); } return(null); } parts = title.Split(new[] { '#' }, 2); title = parts[0]; link.Section = parts.Length > 1 ? parts[1] : null; var match = IllegalTitlesPattern.Match(title); if (match.Success) { if (exceptionOnFailure) { throw new ArgumentException($"Title contains illegal char sequence: {match.Value} ."); } return(null); } //Parse title parts. var parsedTitle = await TitlePartitionAsync(site, family, title, defaultNamespaceId); link.InterwikiPrefix = parsedTitle.Item1; link.NamespaceName = parsedTitle.Item2; link.Title = parsedTitle.Item3; link.FullTitle = link.Title; if (link.InterwikiPrefix == null) { link.TargetSite = link.Site; } else if (family != null) { link.TargetSite = await family.GetSiteAsync(link.InterwikiPrefix); Debug.Assert(link.TargetSite != null); } else { // If we do not have wiki family information, and there IS an interwiki prefix, // subsequent namespace will not be parsed and will be left as a part of Name Debug.Assert(parsedTitle.Item2 == null); } link.Namespace = parsedTitle.Item2 == null ? null : link.TargetSite.Namespaces[parsedTitle.Item2]; //Format expression. var sb = new StringBuilder(); if (link.InterwikiPrefix != null) { sb.Append(link.InterwikiPrefix); sb.Append(':'); } if (!string.IsNullOrEmpty(link.NamespaceName)) { sb.Append(link.NamespaceName); sb.Append(':'); link.FullTitle = link.NamespaceName + ":" + link.Title; } sb.Append(link.Title); if (link.Section != null) { sb.Append('#'); sb.Append(link.Section); } if (link.Anchor != null) { sb.Append('|'); sb.Append(link.Anchor); } link._FormattedText = sb.ToString(); return(link); }
private static async Task <Tuple <WikiSite, string, string, string> > TitlePartitionAsync(WikiSite site, IWikiFamily family, string rawTitle, int defaultNamespace) { // Tuple<interwiki, namespace, title, targetSite> Debug.Assert(site != null || family != null); Debug.Assert(rawTitle != null); var title = rawTitle; if (title.Length == 0) { return(null); } var state = 0; /* * state accepts * 0 LeadingBlank * 1 Namespace / Interwiki * 2 Page title */ string interwiki = null, nsname = null, pagetitle = null; while (title != null) { var parts = title.Split(new[] { ':' }, 2); var part = parts[0].Trim(' ', '_'); switch (state) { case 0: if (part.Length > 0) { goto case 1; } // Initial colon indicates main namespace rather than default. nsname = ""; state = 1; break; case 1: // Make sure there's a colon ahead; otherwise we just treat it as a normal title. if (parts.Length == 1) { goto case 2; } string normalizedInterwikiPrefix; if (site != null && site.Namespaces.TryGetValue(part, out var ns)) { // This is a namespace name. nsname = ns.CustomName; state = 2; } else if (family != null && (normalizedInterwikiPrefix = family.TryNormalize(part)) != null) { // This is a known prefix in the specified WikiFamily. var nextSite = await family.GetSiteAsync(part); if (nextSite == null) { Debug.Assert(false, $"{family} returned null for prefix: {normalizedInterwikiPrefix}. " + "IWikiFamily.TryNormalize should return null for in-existent interwiki prefixes."); } else { // We have bumped into another wiki, hooray! interwiki = normalizedInterwikiPrefix; site = nextSite; // state will still be 1, to parse namespace or other interwikis (rare) } } else if (site != null && site.InterwikiMap.Contains(part)) { // Otherwise, check whether this is an interwiki prefix. interwiki = part.ToLowerInvariant(); // For interwiki, we do not parse namespace name. // Instead, we treat it as a part of page title. nsname = null; state = 2; } else { // So this is only the beginning of a normal title. goto case 2; } break; case 2: pagetitle = Utility.NormalizeTitlePart(title, site?.SiteInfo.IsTitleCaseSensitive ?? true); goto END_OF_PARSING; } title = parts[1]; } END_OF_PARSING: Debug.Assert(pagetitle != null, "pagetitle != null"); if (pagetitle.Length == 0) { return(null); } // nsname == null but interwiki != null means that the expression has interwiki prefix, while family == null if (nsname == null && interwiki == null) { // If site is (still) null, we will have error reported in the caller. nsname = site?.Namespaces[defaultNamespace].CustomName; } return(Tuple.Create(site, interwiki, nsname, pagetitle)); }