public static FirstDifference ( string a, string b ) : int | ||
a | string | First string |
b | string | Second string |
return | int |
public static void MakeLangSpecificRegexes() { TemplateStart = Variables.NamespacesCaseInsensitive[10]; if (TemplateStart[0] == '(') { TemplateStart = TemplateStart.Insert(TemplateStart.Length - 1, "|"); } else { TemplateStart = "(?:" + TemplateStart + "|)"; } TemplateStart = @"\{\{\s*" + TemplateStart; Category = new Regex(@"\[\[" + Variables.NamespacesCaseInsensitive[14] + @"(.*?)\]\]|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>", RegexOptions.Compiled); Images = new Regex(@"\[\[" + Variables.NamespacesCaseInsensitive[6] + @"(.*?)\]\]|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>", RegexOptions.Compiled); Stub = new Regex(@"{{.*?" + Variables.Stub + @"}}", RegexOptions.Compiled); PossiblyCommentedStub = new Regex(@"(<!-- ?\{\{[^}]*?" + Variables.Stub + @"\b\}\}.*?-->|\{\{[^}]*?" + Variables.Stub + @"\}\})", RegexOptions.Compiled); TemplateCall = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}", RegexOptions.Compiled | RegexOptions.Singleline); string s; switch (Variables.LangCode) { case LangCodeEnum.ar: s = "(?:تحويل|REDIRECT)"; break; case LangCodeEnum.bg: s = "(?:redirect|пренасочване|виж)"; break; case LangCodeEnum.fi: s = "(?:OHJAUS|UUDELLEENOHJAUS|REDIRECT)"; break; case LangCodeEnum.he: s = "(?:הפניה|REDIRECT)"; break; case LangCodeEnum.Is: s = "(?:tilvísun|TILVÍSUN|REDIRECT)"; break; case LangCodeEnum.nl: s = "(?:REDIRECT|DOORVERWIJZING)"; break; case LangCodeEnum.ru: s = "(?:REDIRECT|ПЕРЕНАПРАВЛЕНИЕ|ПЕРЕНАПР)"; break; case LangCodeEnum.sk: s = "(?:redirect|presmeruj)"; break; case LangCodeEnum.uk: s = "(?:REDIRECT|ПЕРЕНАПРАВЛЕННЯ|ПЕРЕНАПР)"; break; default: s = "REDIRECT"; break; } Redirect = new Regex(@"^#" + s + @".*?\[\[\s*:?\s*([^\|]*?)\s*(|\|.*?)]\]", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline); if (Variables.LangCode == LangCodeEnum.ru) { Disambigs = new Regex(TemplateStart + @"([Dd]isambiguation|[Dd]isambig|[Нн]еоднозначность)}}", RegexOptions.Compiled); } else { Disambigs = new Regex(@"{{([234]CC|[Dd]isambig|[Gg]eodis|[Hh]ndis|[Ss]urname|[Nn]umberdis|[Rr]oaddis|[Ll]etter-disambig)}}", RegexOptions.Compiled); } s = "(?i:defaultsort)"; if (Variables.LangCode == LangCodeEnum.en) { s = "(?:(?i:defaultsort|lifetime|BIRTH-DEATH-SORT)|BD)"; } Defaultsort = new Regex(TemplateStart + s + @"\s*[:|](?<key>[^\}]*)}}", RegexOptions.Compiled | RegexOptions.ExplicitCapture); //if (Variables.URL == Variables.URLLong) // s = Regex.Escape(Variables.URL); //else { int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong); s = Regex.Escape(Variables.URLLong.Substring(0, pos)); s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|" + Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")"; } ExtractTitle = new Regex("^" + s + "([^?&]*)$", RegexOptions.Compiled); }
public static void MakeLangSpecificRegexes() { NamespacesCaseInsensitive = new Dictionary <int, Regex>(); foreach (var p in Variables.NamespacesCaseInsensitive) { NamespacesCaseInsensitive[p.Key] = new Regex(p.Value, RegexOptions.Compiled); } TemplateStart = @"\{\{\s*(:?" + Variables.NamespacesCaseInsensitive[Namespace.Template] + ")?"; Category = new Regex(@"\[\[\s*" + Variables.NamespacesCaseInsensitive[Namespace.Category] + @"\s*(.*?)\s*(?:|\|([^\|\]]*))\s*\]\]", RegexOptions.Compiled); // Use allowed character list, then a file extension (these are mandatory on mediawiki), then optional closing ]] // this allows typo fixing and find&replace to operate on image descriptions // or, alternatively, an image filename has to have a pipe or ]] after it if using the [[Image: start, so just set first one to // @"[^\[\]\|\{\}]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|)) // handles <gallery> and {{gallery}} too Images = new Regex( @"\[\[\s*" + Variables.NamespacesCaseInsensitive[Namespace.File] + @"[ \%\!""$&'\(\)\*,\-.\/0-9:;=\?@A-Z\\\^_`a-z~\x80-\xFF\+]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))?|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>|{{\s*[Gg]allery\s*(?:\|(?>[^\{\}]+|\{(?<DEPTH>)|\}(?<-DEPTH>))*(?(DEPTH)(?!)))?}}|\|\s*[a-zA-Z\d_ ]+\s*=[^\|{}]+?\.[a-zA-Z]{3,4}\s*(?=\||}})", RegexOptions.Compiled | RegexOptions.Singleline); FileNamespaceLink = new Regex(@"\[\[\s*" + Variables.NamespacesCaseInsensitive[Namespace.File] + @"((?>[^\[\]]+|\[\[(?<DEPTH>)|\]\](?<-DEPTH>))*(?(DEPTH)(?!)))\]\]", RegexOptions.Compiled); Stub = new Regex(@"{{.*?" + Variables.Stub + @"}}", RegexOptions.Compiled); PossiblyCommentedStub = new Regex( @"(<!-- ?\{\{[^}]*?" + Variables.Stub + @"\b\}\}.*?-->|\{\{[^}]*?" + Variables.Stub + @"\}\})", RegexOptions.Compiled); TemplateCall = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}", RegexOptions.Compiled | RegexOptions.Singleline); LooseCategory = new Regex(@"\[\[[\s_]*" + Variables.NamespacesCaseInsensitive[Namespace.Category] + @"[\s_]*([^\|]*?)(|\|.*?)\]\]", RegexOptions.Compiled); LooseImage = new Regex(@"\[\[\s*?(" + Variables.NamespacesCaseInsensitive[Namespace.File] + @")\s*([^\|\]]+)(.*?)\]\]", RegexOptions.Compiled); Months = "(" + string.Join("|", Variables.MonthNames) + ")"; MonthsNoGroup = "(?:" + string.Join("|", Variables.MonthNames) + ")"; Dates = new Regex("^(0?[1-9]|[12][0-9]|3[01]) " + Months + "$", RegexOptions.Compiled); Dates2 = new Regex("^" + Months + " (0?[1-9]|[12][0-9]|3[01])$", RegexOptions.Compiled); InternationalDates = new Regex(@"\b([1-9]|[12][0-9]|3[01]) +" + Months + @" +([12]\d{3})\b", RegexOptions.Compiled); AmericanDates = new Regex(Months + @" +([1-9]|[12][0-9]|3[01]),? +([12]\d{3})\b", RegexOptions.Compiled); DayMonth = new Regex(@"\b([1-9]|[12][0-9]|3[01]) +" + Months + @"\b", RegexOptions.Compiled); MonthDay = new Regex(Months + @" +([1-9]|[12][0-9]|3[01])\b", RegexOptions.Compiled); DayMonthRangeSpan = new Regex(@"\b((?:[1-9]|[12][0-9]|3[01])(?:–|–|{{ndash}}|\/)(?:[1-9]|[12][0-9]|3[01])) " + Months + @"\b", RegexOptions.Compiled); MonthDayRangeSpan = new Regex(Months + @" ((?:[1-9]|[12][0-9]|3[01])(?:–|–|{{ndash}}|\/)(?:[1-9]|[12][0-9]|3[01]))\b", RegexOptions.Compiled); List <string> magic; string s = Variables.MagicWords.TryGetValue("redirect", out magic) ? string.Join("|", magic.ToArray()).Replace("#", "") : "REDIRECT"; Redirect = new Regex(@"#(?:" + s + @")\s*:?\s*\[\[\s*:?\s*([^\|\[\]]*?)\s*(\|.*?)?\]\]", RegexOptions.IgnoreCase); switch (Variables.LangCode) { case "ru": s = "([Dd]isambiguation|[Dd]isambig|[Нн]еоднозначность)"; break; default: s = "([234]CC|[Dd]isamb(?:ig(?:uation)?)?|[Dd]ab|[Mm]athdab|[Ss]urname|(?:[Nn]umber[Rr]oad|[Hh]ospital|[Gg]eo|[Hh]n|[Ss]choo)dis|SIA|[Ll]etter-disambig|[Ss]hipindex|[Mm]ountainindex)"; break; } Disambigs = new Regex(TemplateStart + s + @"\s*(?:\|[^{}]*?)?}}", RegexOptions.Compiled); if (Variables.MagicWords.TryGetValue("defaultsort", out magic)) { s = "(?i:" + string.Join("|", magic.ToArray()).Replace(":", "") + ")"; } else { s = (Variables.LangCode == "en") ? "(?:(?i:defaultsort(key|CATEGORYSORT)?))" : "(?i:defaultsort)"; } Defaultsort = new Regex(TemplateStart + s + @"\s*[:|](?<key>(?>[^\{\}]+|\{(?<DEPTH>)|\}(?<-DEPTH>))*(?(DEPTH)(?!))|[^\}\r\n]*?)(?:}}|\r|\n)", RegexOptions.Compiled | RegexOptions.ExplicitCapture); //if (Variables.URL == Variables.URLLong) // s = Regex.Escape(Variables.URL); //else //{ int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong); s = Regex.Escape(Variables.URLLong.Substring(0, pos)); s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|" + Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")"; //} ExtractTitle = new Regex("^" + s + "([^?&]*)$", RegexOptions.Compiled); string cat = Variables.NamespacesCaseInsensitive[Namespace.Category], img = Variables.NamespacesCaseInsensitive[Namespace.Image]; EmptyLink = new Regex("\\[\\[(:?" + cat + "|" + img + "|)(|" + img + "|" + cat + "|.*?)\\]\\]", RegexOptions.IgnoreCase | RegexOptions.Compiled); EmptyTemplate = new Regex(@"{{(" + Variables.NamespacesCaseInsensitive[Namespace.Template] + @")?[|\s]*}}", RegexOptions.IgnoreCase | RegexOptions.Compiled); }
public static void MakeLangSpecificRegexes() { TemplateStart = @"\{\{\s*(:?" + Variables.NamespacesCaseInsensitive[Namespace.Template] + ")?"; Category = new Regex(@"\[\[" + Variables.NamespacesCaseInsensitive[Namespace.Category] + @"(.*?)\]\]", RegexOptions.Compiled); // images mask was [^\]]*?(?:\[\[?.*?(?:\[\[.*?\]\].*?)?\]\]?[^\]]*?)*)\]\] // now instead use allowed character list, then a file extension (these are mandatory on mediawiki), then optional closing ]] // this allows typo fixing and find&replace to operate on image descriptions // TODO: replace these two with direct returns from API, when available // or, alternatively, an image filename has to have a pipe or ]] after it if using the [[Image: start, so just set first one to // @"[^\[\]\|\{\}]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|)) Images = new Regex( @"\[\[" + Variables.NamespacesCaseInsensitive[Namespace.File] + @"[ \%\!""$&'\(\)\*,\-.\/0-9:;=\?@A-Z\\\^_`a-z~\x80-\xFF\+]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))?|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>|\|\s*(?:[Pp]hoto|[Ii]mg|[Ii]mage\d*|[Cc]over)(?:[_ ]\w+)?\s*=.+?\.[a-zA-Z]{3,4}\s*(?:\||}})", RegexOptions.Compiled | RegexOptions.Singleline); Stub = new Regex(@"{{.*?" + Variables.Stub + @"}}", RegexOptions.Compiled); PossiblyCommentedStub = new Regex( @"(<!-- ?\{\{[^}]*?" + Variables.Stub + @"\b\}\}.*?-->|\{\{[^}]*?" + Variables.Stub + @"\}\})", RegexOptions.Compiled); TemplateCall = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}", RegexOptions.Compiled | RegexOptions.Singleline); LooseCategory = new Regex(@"\[\[[\s_]*" + Variables.NamespacesCaseInsensitive[Namespace.Category] + @"[\s_]*([^\|]*?)(|\|.*?)\]\]", RegexOptions.Compiled); LooseImage = new Regex(@"\[\[\s*?(" + Variables.NamespacesCaseInsensitive[Namespace.File] + @")\s*([^\|\]]*?)(.*?)\]\]", RegexOptions.Compiled); StringBuilder builder = new StringBuilder("(" + Variables.MonthNames[0]); for (int i = 1; i < 12; i++) { builder.Append("|" + Variables.MonthNames[i]); } builder.Append(")"); Dates = new Regex("^(0?[1-9]|[12][0-9]|3[01]) " + builder + "$", RegexOptions.Compiled); Dates2 = new Regex("^" + builder + " (0?[1-9]|[12][0-9]|3[01])$", RegexOptions.Compiled); builder = new StringBuilder(); if (Variables.MagicWords.ContainsKey("redirect")) { foreach (string r in Variables.MagicWords["redirect"]) { builder.Append(r.Replace("#", "") + "|"); } builder.Remove((builder.Length - 1), 1); } else { builder.Append("REDIRECT"); } Redirect = new Regex(@"#(?:" + builder + @")\s*:?\s*\[\[\s*:?\s*([^\|]*?)\s*(|\|.*?)]\]", RegexOptions.IgnoreCase | RegexOptions.Multiline); string s; switch (Variables.LangCode) { case LangCodeEnum.ru: s = "([Dd]isambiguation|[Dd]isambig|[Нн]еоднозначность)"; break; default: s = "([234]CC|[Dd]isambig|[Gg]eodis|[Hh]ndis|[Ss]urname|[Nn]umberdis|[Rr]oaddis|[Ll]etter-disambig)"; break; } Disambigs = new Regex(TemplateStart + s + "}}", RegexOptions.Compiled); if (Variables.MagicWords.ContainsKey("defaultsort")) { builder = new StringBuilder("(?i:"); foreach (string d in Variables.MagicWords["defaultsort"]) { builder.Append(d.Replace(":", "") + "|"); } builder.Remove((builder.Length - 1), 1); s = builder + ")"; } else { s = (Variables.LangCode == LangCodeEnum.en) ? "(?:(?i:defaultsort(key|CATEGORYSORT)?|lifetime|BIRTH-DEATH-SORT)|BD)" : "(?i:defaultsort)"; } Defaultsort = new Regex(TemplateStart + s + @"\s*[:|](?<key>[^\}]*)}}", RegexOptions.Compiled | RegexOptions.ExplicitCapture); //if (Variables.URL == Variables.URLLong) // s = Regex.Escape(Variables.URL); //else //{ int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong); s = Regex.Escape(Variables.URLLong.Substring(0, pos)); s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|" + Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")"; //} ExtractTitle = new Regex("^" + s + "([^?&]*)$", RegexOptions.Compiled); }
public static void MakeLangSpecificRegexes() { TemplateStart = @"\{\{\s*(:?" + Variables.NamespacesCaseInsensitive[Namespace.Template] + ")?"; Category = new Regex(@"\[\[" + Variables.NamespacesCaseInsensitive[Namespace.Category] + @"(.*?)\]\]", RegexOptions.Compiled); // Use allowed character list, then a file extension (these are mandatory on mediawiki), then optional closing ]] // this allows typo fixing and find&replace to operate on image descriptions // or, alternatively, an image filename has to have a pipe or ]] after it if using the [[Image: start, so just set first one to // @"[^\[\]\|\{\}]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|)) Images = new Regex( @"\[\[" + Variables.NamespacesCaseInsensitive[Namespace.File] + @"[ \%\!""$&'\(\)\*,\-.\/0-9:;=\?@A-Z\\\^_`a-z~\x80-\xFF\+]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))?|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>|\|\s*(?:[Pp]hoto|[Ii]mg|[Ii]mage\d*|[Cc]over)(?:[_ ]\w+)?\s*=[^\|{}]+?\.[a-zA-Z]{3,4}\s*(?=\||}})", RegexOptions.Compiled | RegexOptions.Singleline); Stub = new Regex(@"{{.*?" + Variables.Stub + @"}}", RegexOptions.Compiled); PossiblyCommentedStub = new Regex( @"(<!-- ?\{\{[^}]*?" + Variables.Stub + @"\b\}\}.*?-->|\{\{[^}]*?" + Variables.Stub + @"\}\})", RegexOptions.Compiled); TemplateCall = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}", RegexOptions.Compiled | RegexOptions.Singleline); LooseCategory = new Regex(@"\[\[[\s_]*" + Variables.NamespacesCaseInsensitive[Namespace.Category] + @"[\s_]*([^\|]*?)(|\|.*?)\]\]", RegexOptions.Compiled); LooseImage = new Regex(@"\[\[\s*?(" + Variables.NamespacesCaseInsensitive[Namespace.File] + @")\s*([^\|\]]+)(.*?)\]\]", RegexOptions.Compiled); Months = "(" + string.Join("|", Variables.MonthNames) + ")"; Dates = new Regex("^(0?[1-9]|[12][0-9]|3[01]) " + Months + "$", RegexOptions.Compiled); Dates2 = new Regex("^" + Months + " (0?[1-9]|[12][0-9]|3[01])$", RegexOptions.Compiled); string s = Variables.MagicWords.ContainsKey("redirect") ? string.Join("|", Variables.MagicWords["redirect"].ToArray()).Replace("#", "") : "REDIRECT"; Redirect = new Regex(@"#(?:" + s + @")\s*:?\s*\[\[\s*:?\s*([^\|]*?)\s*(|\|.*?)]\]", RegexOptions.IgnoreCase | RegexOptions.Multiline); switch (Variables.LangCode) { case LangCodeEnum.ru: s = "([Dd]isambiguation|[Dd]isambig|[Нн]еоднозначность)"; break; default: s = "([234]CC|[Dd]isambig|[Gg]eodis|[Hh]ndis|[Ss]urname|[Nn]umberdis|[Rr]oaddis|[Ll]etter-disambig)"; break; } Disambigs = new Regex(TemplateStart + s + "}}", RegexOptions.Compiled); if (Variables.MagicWords.ContainsKey("defaultsort")) { s = "(?i:" + string.Join("|", Variables.MagicWords["defaultsort"].ToArray()).Replace(":", "") + ")"; } else { s = (Variables.LangCode == LangCodeEnum.en) ? "(?:(?i:defaultsort(key|CATEGORYSORT)?))" : "(?i:defaultsort)"; } Defaultsort = new Regex(TemplateStart + s + @"\s*[:|](?<key>[^\}]*)}}", RegexOptions.Compiled | RegexOptions.ExplicitCapture); //if (Variables.URL == Variables.URLLong) // s = Regex.Escape(Variables.URL); //else //{ int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong); s = Regex.Escape(Variables.URLLong.Substring(0, pos)); s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|" + Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")"; //} ExtractTitle = new Regex("^" + s + "([^?&]*)$", RegexOptions.Compiled); string cat = Variables.NamespacesCaseInsensitive[Namespace.Category], img = Variables.NamespacesCaseInsensitive[Namespace.Image]; EmptyLink = new Regex("\\[\\[(:?" + cat + "|" + img + "|)(|" + img + "|" + cat + "|.*?)\\]\\]", RegexOptions.IgnoreCase | RegexOptions.Compiled); EmptyTemplate = new Regex(@"{{(" + Variables.NamespacesCaseInsensitive[Namespace.Template] + @")?[|\s]*}}", RegexOptions.IgnoreCase | RegexOptions.Compiled); }