/// <inheritdoc cref="ParseAsync(WikiSite,IWikiFamily,string,int)"/> /// <summary> /// Parses a new instance from the a Wikilink expression on the specified Wiki family. /// This overload resolves the target interwiki site with the interwiki prefixes provided /// <seealso cref="IWikiFamily"/> instance, and requires <paramref name="text"/> to have interwiki prefix. /// </summary> public static Task <WikiLink> ParseAsync(IWikiFamily family, string text, int defaultNamespaceId) { if (family == null) { throw new ArgumentNullException(nameof(family)); } return(ParseAsync(null, family, text, defaultNamespaceId)); }
/// <summary> /// Parses a new instance using specified Wikilink expression. /// </summary> /// <param name="site">Site instance.</param> /// <param name="family">Wiki family. You need to provide this argument if you want to parse into interwiki links.</param> /// <param name="text">Wikilink expression, without square brackets.</param> /// <param name="defaultNamespaceId">Id of default namespace. See <see cref="BuiltInNamespaces"/> for a list of possible values.</param> /// <exception cref="ArgumentNullException">Either <paramref name="site"/> or <paramref name="text"/> is <c>null</c>.</exception> /// <exception cref="ArgumentException"><paramref name="text"/> does not contain a valid page title.</exception> public static Task <WikiLink> ParseAsync(WikiSite site, IWikiFamily family, string text, int defaultNamespaceId) { return(ParseInternalAsync(site, family, text, defaultNamespaceId, true)); }
/// <summary> /// Parses a new instance using specified Wikilink expression. /// </summary> /// <param name="site">Site instance.</param> /// <param name="family">Wiki family. You need to provide this argument if you want to parse into interwiki links.</param> /// <param name="text">Wikilink expression, without square brackets.</param> /// <exception cref="ArgumentNullException">Either <paramref name="site"/> or <paramref name="text"/> is <c>null</c>.</exception> /// <exception cref="ArgumentException"><paramref name="text"/> does not contain a valid page title.</exception> public static Task <WikiLink> ParseAsync(WikiSite site, IWikiFamily family, string text) { return(ParseAsync(site, family, text, 0)); }
private static async Task <Tuple <string, string, string> > TitlePartitionAsync(WikiSite site, IWikiFamily family, string rawTitle, int defaultNamespace) { // Tuple<interwiki, namespace, title> Debug.Assert(site != null); Debug.Assert(rawTitle != null); var title = rawTitle; if (title.Length == 0) { goto EMPTY_TITLE; } var state = 0; /* * state accepts * 0 LeadingBlank * 1 Namespace / Interwiki * 2 Page title */ string interwiki = null, nsname = null, pagetitle = null; while (title != null) { var parts = title.Split(new[] { ':' }, 2); var part = parts[0].Trim(' ', '_'); switch (state) { case 0: if (part.Length > 0) { goto case 1; } // Initial colon indicates main namespace rather than default. nsname = ""; state = 1; break; case 1: // Make sure there's a colon ahead; otherwise we just treat it as a normal title. if (parts.Length == 1) { goto case 2; } NamespaceInfo ns; if (site.Namespaces.TryGetValue(part, out ns)) { // This is a namespace name. nsname = ns.CustomName; state = 2; } else { if (family != null) { var normalizedPart = family.TryNormalize(part); if (normalizedPart != null) { var nextSite = await family.GetSiteAsync(part); if (nextSite != null) { // We have bumped into another wiki, hooray! interwiki = normalizedPart; site = nextSite; // state will still be 1, to parse namespace or other interwikis (rare) } } } else if (site.InterwikiMap.Contains(part)) { // Otherwise, check whether this is an interwiki prefix. interwiki = part.ToLowerInvariant(); // For interwiki, we do not parse namespace name. // Instead, we treat it as a part of page title. nsname = null; state = 2; } else { // So this is only the beginning of a normal title. goto case 2; } } break; case 2: pagetitle = Utility.NormalizeTitlePart(title, site.SiteInfo.IsTitleCaseSensitive); goto END_OF_PARSING; } title = parts[1]; } END_OF_PARSING: Debug.Assert(pagetitle != null, "pagetitle != null"); if (pagetitle.Length == 0) { goto EMPTY_TITLE; } // nsname == null means that the expression has interwiki prefix, while family == null if (nsname == null && interwiki == null) { nsname = site.Namespaces[defaultNamespace].CustomName; } return(Tuple.Create(interwiki, nsname, pagetitle)); EMPTY_TITLE: throw new ArgumentException($"The title \"{rawTitle}\" does not contain page title."); }
private static async Task <WikiLink> ParseInternalAsync(WikiSite site, IWikiFamily family, string text, int defaultNamespaceId, bool exceptionOnFailure) { if (site == null) { throw new ArgumentNullException(nameof(site)); } if (text == null) { throw new ArgumentNullException(nameof(text)); } var link = new WikiLink(site, text); //preprocess text (these changes aren't site-dependent) //First remove anchor, which is stored unchanged, if there is one var parts = text.Split(new[] { '|' }, 2); var title = parts[0]; link.Anchor = parts.Length > 1 ? parts[1] : null; //This code was adapted from Title.php : secureAndSplit() if (title.IndexOf('\ufffd') >= 0) { if (exceptionOnFailure) { throw new ArgumentException("Title contains illegal char (\\uFFFD 'REPLACEMENT CHARACTER')", nameof(text)); } return(null); } parts = title.Split(new[] { '#' }, 2); title = parts[0]; link.Section = parts.Length > 1 ? parts[1] : null; var match = IllegalTitlesPattern.Match(title); if (match.Success) { if (exceptionOnFailure) { throw new ArgumentException($"Title contains illegal char sequence: {match.Value} ."); } return(null); } //Parse title parts. var parsedTitle = await TitlePartitionAsync(site, family, title, defaultNamespaceId); link.InterwikiPrefix = parsedTitle.Item1; link.NamespaceName = parsedTitle.Item2; link.Title = parsedTitle.Item3; link.FullTitle = link.Title; if (link.InterwikiPrefix == null) { link.TargetSite = link.Site; } else if (family != null) { link.TargetSite = await family.GetSiteAsync(link.InterwikiPrefix); Debug.Assert(link.TargetSite != null); } else { // If we do not have wiki family information, and there IS an interwiki prefix, // subsequent namespace will not be parsed and will be left as a part of Name Debug.Assert(parsedTitle.Item2 == null); } link.Namespace = parsedTitle.Item2 == null ? null : link.TargetSite.Namespaces[parsedTitle.Item2]; //Format expression. var sb = new StringBuilder(); if (link.InterwikiPrefix != null) { sb.Append(link.InterwikiPrefix); sb.Append(':'); } if (!string.IsNullOrEmpty(link.NamespaceName)) { sb.Append(link.NamespaceName); sb.Append(':'); link.FullTitle = link.NamespaceName + ":" + link.Title; } sb.Append(link.Title); if (link.Section != null) { sb.Append('#'); sb.Append(link.Section); } if (link.Anchor != null) { sb.Append('|'); sb.Append(link.Anchor); } link._FormattedText = sb.ToString(); return(link); }
/// <inheritdoc cref="ParseAsync(IWikiFamily,string,int)"/> public static Task <WikiLink> ParseAsync(IWikiFamily family, string text) { return(ParseAsync(family, text, 0)); }
private static async Task <Tuple <WikiSite, string, string, string> > TitlePartitionAsync(WikiSite site, IWikiFamily family, string rawTitle, int defaultNamespace) { // Tuple<interwiki, namespace, title, targetSite> Debug.Assert(site != null || family != null); Debug.Assert(rawTitle != null); var title = rawTitle; if (title.Length == 0) { return(null); } var state = 0; /* * state accepts * 0 LeadingBlank * 1 Namespace / Interwiki * 2 Page title */ string interwiki = null, nsname = null, pagetitle = null; while (title != null) { var parts = title.Split(new[] { ':' }, 2); var part = parts[0].Trim(' ', '_'); switch (state) { case 0: if (part.Length > 0) { goto case 1; } // Initial colon indicates main namespace rather than default. nsname = ""; state = 1; break; case 1: // Make sure there's a colon ahead; otherwise we just treat it as a normal title. if (parts.Length == 1) { goto case 2; } string normalizedInterwikiPrefix; if (site != null && site.Namespaces.TryGetValue(part, out var ns)) { // This is a namespace name. nsname = ns.CustomName; state = 2; } else if (family != null && (normalizedInterwikiPrefix = family.TryNormalize(part)) != null) { // This is a known prefix in the specified WikiFamily. var nextSite = await family.GetSiteAsync(part); if (nextSite == null) { Debug.Assert(false, $"{family} returned null for prefix: {normalizedInterwikiPrefix}. " + "IWikiFamily.TryNormalize should return null for in-existent interwiki prefixes."); } else { // We have bumped into another wiki, hooray! interwiki = normalizedInterwikiPrefix; site = nextSite; // state will still be 1, to parse namespace or other interwikis (rare) } } else if (site != null && site.InterwikiMap.Contains(part)) { // Otherwise, check whether this is an interwiki prefix. interwiki = part.ToLowerInvariant(); // For interwiki, we do not parse namespace name. // Instead, we treat it as a part of page title. nsname = null; state = 2; } else { // So this is only the beginning of a normal title. goto case 2; } break; case 2: pagetitle = Utility.NormalizeTitlePart(title, site?.SiteInfo.IsTitleCaseSensitive ?? true); goto END_OF_PARSING; } title = parts[1]; } END_OF_PARSING: Debug.Assert(pagetitle != null, "pagetitle != null"); if (pagetitle.Length == 0) { return(null); } // nsname == null but interwiki != null means that the expression has interwiki prefix, while family == null if (nsname == null && interwiki == null) { // If site is (still) null, we will have error reported in the caller. nsname = site?.Namespaces[defaultNamespace].CustomName; } return(Tuple.Create(site, interwiki, nsname, pagetitle)); }
private static async Task <WikiLink> ParseInternalAsync(WikiSite site, IWikiFamily family, string text, int defaultNamespaceId, bool exceptionOnFailure) { if (site == null && family == null) { throw new ArgumentNullException(nameof(site) + "/" + nameof(family)); } if (text == null) { throw new ArgumentNullException(nameof(text)); } //preprocess text (these changes aren't site-dependent) //First remove anchor, which is stored unchanged, if there is one var pipePos = text.IndexOf('|'); var title = pipePos >= 0 ? text.Substring(0, pipePos) : text; var anchor = pipePos >= 0 ? text.Substring(pipePos + 1) : null; //This code was adapted from Title.php : secureAndSplit() if (title.IndexOf('\ufffd') >= 0) { if (exceptionOnFailure) { throw new ArgumentException(string.Format(Prompts.ExceptionTitleIllegalCharacter1, "\uFFFD 'REPLACEMENT CHARACTER'"), nameof(text)); } return(null); } var hashPos = title.IndexOf('#'); var section = hashPos >= 0 ? title.Substring(hashPos + 1) : null; title = hashPos >= 0 ? title.Substring(0, hashPos) : title; var match = IllegalTitlesPattern.Match(title); if (match.Success) { if (exceptionOnFailure) { throw new ArgumentException(string.Format(Prompts.ExceptionTitleIllegalCharacterSequence1, match.Value)); } return(null); } //Parse title parts. var parsedTitle = await TitlePartitionAsync(site, family, title, defaultNamespaceId); if (parsedTitle == null) { if (exceptionOnFailure) { throw new ArgumentException(string.Format(Prompts.ExceptionTitleIsEmpty1, title)); } return(null); } var targetSite = parsedTitle.Item1; var interwikiPrefix = parsedTitle.Item2; var nsPrefix = parsedTitle.Item3; var localTitle = parsedTitle.Item4; if (targetSite == null) { if (interwikiPrefix == null) { // No interwiki prefix // This means we are parsing WikiLink without originating WikiSite. Debug.Assert(site == null); if (exceptionOnFailure) { throw new ArgumentException(Prompts.ExceptionWikiLinkRequireInterwikiPrefix, nameof(text)); } return(null); } // If we do not have wiki family information, and there IS an interwiki prefix, // subsequent namespace will not be parsed and will be left as a part of Name Debug.Assert(nsPrefix == null); Debug.Assert(localTitle != null); } var link = new WikiLink(site, text) { Anchor = anchor, Section = section, InterwikiPrefix = interwikiPrefix, NamespaceName = nsPrefix, Title = localTitle, FullTitle = localTitle, TargetSite = targetSite }; link.Namespace = nsPrefix != null && link.TargetSite != null ? link.TargetSite.Namespaces[nsPrefix] : null; //Format expression. var sb = new StringBuilder(); if (link.InterwikiPrefix != null) { sb.Append(link.InterwikiPrefix); sb.Append(':'); } var fullTitleStartsAt = sb.Length; if (!string.IsNullOrEmpty(link.NamespaceName)) { sb.Append(link.NamespaceName); sb.Append(':'); link.FullTitle = link.NamespaceName + ":" + link.Title; } sb.Append(link.Title); if (link.Section != null) { sb.Append('#'); sb.Append(link.Section); } link.Target = sb.ToString(); link.FullTitleAndSection = fullTitleStartsAt == 0 ? link.Target : sb.ToString(fullTitleStartsAt, sb.Length - fullTitleStartsAt); if (link.Anchor != null) { sb.Append('|'); sb.Append(link.Anchor); } link._FormattedText = sb.ToString(); return(link); }
public static async Task <(WikiSite site, string title)> ResolveSiteAndTitleAsync(string expr, IWikiFamily wikiFamily) { if (string.IsNullOrEmpty(expr)) { throw new ArgumentException("Value cannot be null or empty.", nameof(expr)); } var parts = expr.Split(':', 2); var site = await wikiFamily.GetSiteAsync(parts[0]); return(site, parts[1]);