FirstDifference() public static method

Returns index of first character different between strings
public static FirstDifference ( string a, string b ) : int
a string First string
b string Second string
return int
Exemplo n.º 1
0
        public static void MakeLangSpecificRegexes()
        {
            TemplateStart = Variables.NamespacesCaseInsensitive[10];
            if (TemplateStart[0] == '(')
            {
                TemplateStart = TemplateStart.Insert(TemplateStart.Length - 1, "|");
            }
            else
            {
                TemplateStart = "(?:" + TemplateStart + "|)";
            }
            TemplateStart = @"\{\{\s*" + TemplateStart;

            Category = new Regex(@"\[\[" + Variables.NamespacesCaseInsensitive[14] + @"(.*?)\]\]|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>", RegexOptions.Compiled);
            Images   = new Regex(@"\[\[" + Variables.NamespacesCaseInsensitive[6] + @"(.*?)\]\]|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>", RegexOptions.Compiled);
            Stub     = new Regex(@"{{.*?" + Variables.Stub + @"}}", RegexOptions.Compiled);
            PossiblyCommentedStub = new Regex(@"(<!-- ?\{\{[^}]*?" + Variables.Stub + @"\b\}\}.*?-->|\{\{[^}]*?" + Variables.Stub + @"\}\})", RegexOptions.Compiled);
            TemplateCall          = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}", RegexOptions.Compiled | RegexOptions.Singleline);

            string s;

            switch (Variables.LangCode)
            {
            case LangCodeEnum.ar:
                s = "(?:تحويل|REDIRECT)";
                break;

            case LangCodeEnum.bg:
                s = "(?:redirect|пренасочване|виж)";
                break;

            case LangCodeEnum.fi:
                s = "(?:OHJAUS|UUDELLEENOHJAUS|REDIRECT)";
                break;

            case LangCodeEnum.he:
                s = "(?:הפניה|REDIRECT)";
                break;

            case LangCodeEnum.Is:
                s = "(?:tilvísun|TILVÍSUN|REDIRECT)";
                break;

            case LangCodeEnum.nl:
                s = "(?:REDIRECT|DOORVERWIJZING)";
                break;

            case LangCodeEnum.ru:
                s = "(?:REDIRECT|ПЕРЕНАПРАВЛЕНИЕ|ПЕРЕНАПР)";
                break;

            case LangCodeEnum.sk:
                s = "(?:redirect|presmeruj)";
                break;

            case LangCodeEnum.uk:
                s = "(?:REDIRECT|ПЕРЕНАПРАВЛЕННЯ|ПЕРЕНАПР)";
                break;

            default:
                s = "REDIRECT";
                break;
            }
            Redirect = new Regex(@"^#" + s + @".*?\[\[\s*:?\s*([^\|]*?)\s*(|\|.*?)]\]", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);

            if (Variables.LangCode == LangCodeEnum.ru)
            {
                Disambigs = new Regex(TemplateStart + @"([Dd]isambiguation|[Dd]isambig|[Нн]еоднозначность)}}", RegexOptions.Compiled);
            }
            else
            {
                Disambigs = new Regex(@"{{([234]CC|[Dd]isambig|[Gg]eodis|[Hh]ndis|[Ss]urname|[Nn]umberdis|[Rr]oaddis|[Ll]etter-disambig)}}", RegexOptions.Compiled);
            }

            s = "(?i:defaultsort)";
            if (Variables.LangCode == LangCodeEnum.en)
            {
                s = "(?:(?i:defaultsort|lifetime|BIRTH-DEATH-SORT)|BD)";
            }

            Defaultsort = new Regex(TemplateStart + s + @"\s*[:|](?<key>[^\}]*)}}",
                                    RegexOptions.Compiled | RegexOptions.ExplicitCapture);

            //if (Variables.URL == Variables.URLLong)
            //    s = Regex.Escape(Variables.URL);
            //else
            {
                int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong);
                s  = Regex.Escape(Variables.URLLong.Substring(0, pos));
                s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|"
                     + Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")";
            }
            ExtractTitle = new Regex("^" + s + "([^?&]*)$", RegexOptions.Compiled);
        }
Exemplo n.º 2
0
        public static void MakeLangSpecificRegexes()
        {
            NamespacesCaseInsensitive = new Dictionary <int, Regex>();
            foreach (var p in Variables.NamespacesCaseInsensitive)
            {
                NamespacesCaseInsensitive[p.Key] = new Regex(p.Value, RegexOptions.Compiled);
            }

            TemplateStart = @"\{\{\s*(:?" + Variables.NamespacesCaseInsensitive[Namespace.Template] + ")?";

            Category = new Regex(@"\[\[\s*" + Variables.NamespacesCaseInsensitive[Namespace.Category] +
                                 @"\s*(.*?)\s*(?:|\|([^\|\]]*))\s*\]\]", RegexOptions.Compiled);

            // Use allowed character list, then a file extension (these are mandatory on mediawiki), then optional closing ]]
            // this allows typo fixing and find&replace to operate on image descriptions
            // or, alternatively, an image filename has to have a pipe or ]] after it if using the [[Image: start, so just set first one to
            // @"[^\[\]\|\{\}]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))
            // handles <gallery> and {{gallery}} too
            Images =
                new Regex(
                    @"\[\[\s*" + Variables.NamespacesCaseInsensitive[Namespace.File] +
                    @"[ \%\!""$&'\(\)\*,\-.\/0-9:;=\?@A-Z\\\^_`a-z~\x80-\xFF\+]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))?|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>|{{\s*[Gg]allery\s*(?:\|(?>[^\{\}]+|\{(?<DEPTH>)|\}(?<-DEPTH>))*(?(DEPTH)(?!)))?}}|\|\s*[a-zA-Z\d_ ]+\s*=[^\|{}]+?\.[a-zA-Z]{3,4}\s*(?=\||}})",
                    RegexOptions.Compiled | RegexOptions.Singleline);

            FileNamespaceLink = new Regex(@"\[\[\s*" + Variables.NamespacesCaseInsensitive[Namespace.File] +
                                          @"((?>[^\[\]]+|\[\[(?<DEPTH>)|\]\](?<-DEPTH>))*(?(DEPTH)(?!)))\]\]", RegexOptions.Compiled);

            Stub = new Regex(@"{{.*?" + Variables.Stub + @"}}", RegexOptions.Compiled);

            PossiblyCommentedStub =
                new Regex(
                    @"(<!-- ?\{\{[^}]*?" + Variables.Stub + @"\b\}\}.*?-->|\{\{[^}]*?" + Variables.Stub + @"\}\})",
                    RegexOptions.Compiled);

            TemplateCall = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}",
                                     RegexOptions.Compiled | RegexOptions.Singleline);

            LooseCategory =
                new Regex(@"\[\[[\s_]*" + Variables.NamespacesCaseInsensitive[Namespace.Category]
                          + @"[\s_]*([^\|]*?)(|\|.*?)\]\]",
                          RegexOptions.Compiled);

            LooseImage = new Regex(@"\[\[\s*?(" + Variables.NamespacesCaseInsensitive[Namespace.File]
                                   + @")\s*([^\|\]]+)(.*?)\]\]",
                                   RegexOptions.Compiled);

            Months        = "(" + string.Join("|", Variables.MonthNames) + ")";
            MonthsNoGroup = "(?:" + string.Join("|", Variables.MonthNames) + ")";

            Dates  = new Regex("^(0?[1-9]|[12][0-9]|3[01]) " + Months + "$", RegexOptions.Compiled);
            Dates2 = new Regex("^" + Months + " (0?[1-9]|[12][0-9]|3[01])$", RegexOptions.Compiled);

            InternationalDates = new Regex(@"\b([1-9]|[12][0-9]|3[01]) +" + Months + @" +([12]\d{3})\b", RegexOptions.Compiled);
            AmericanDates      = new Regex(Months + @" +([1-9]|[12][0-9]|3[01]),? +([12]\d{3})\b", RegexOptions.Compiled);

            DayMonth = new Regex(@"\b([1-9]|[12][0-9]|3[01]) +" + Months + @"\b", RegexOptions.Compiled);
            MonthDay = new Regex(Months + @" +([1-9]|[12][0-9]|3[01])\b", RegexOptions.Compiled);

            DayMonthRangeSpan = new Regex(@"\b((?:[1-9]|[12][0-9]|3[01])(?:–|&ndash;|{{ndash}}|\/)(?:[1-9]|[12][0-9]|3[01])) " + Months + @"\b", RegexOptions.Compiled);

            MonthDayRangeSpan = new Regex(Months + @" ((?:[1-9]|[12][0-9]|3[01])(?:–|&ndash;|{{ndash}}|\/)(?:[1-9]|[12][0-9]|3[01]))\b", RegexOptions.Compiled);

            List <string> magic;
            string        s = Variables.MagicWords.TryGetValue("redirect", out magic)
                           ? string.Join("|", magic.ToArray()).Replace("#", "")
                           : "REDIRECT";

            Redirect = new Regex(@"#(?:" + s + @")\s*:?\s*\[\[\s*:?\s*([^\|\[\]]*?)\s*(\|.*?)?\]\]", RegexOptions.IgnoreCase);

            switch (Variables.LangCode)
            {
            case "ru":
                s = "([Dd]isambiguation|[Dd]isambig|[Нн]еоднозначность)";
                break;

            default:
                s = "([234]CC|[Dd]isamb(?:ig(?:uation)?)?|[Dd]ab|[Mm]athdab|[Ss]urname|(?:[Nn]umber[Rr]oad|[Hh]ospital|[Gg]eo|[Hh]n|[Ss]choo)dis|SIA|[Ll]etter-disambig|[Ss]hipindex|[Mm]ountainindex)";
                break;
            }
            Disambigs = new Regex(TemplateStart + s + @"\s*(?:\|[^{}]*?)?}}", RegexOptions.Compiled);

            if (Variables.MagicWords.TryGetValue("defaultsort", out magic))
            {
                s = "(?i:" + string.Join("|", magic.ToArray()).Replace(":", "") + ")";
            }
            else
            {
                s = (Variables.LangCode == "en")
                    ? "(?:(?i:defaultsort(key|CATEGORYSORT)?))"
                    : "(?i:defaultsort)";
            }

            Defaultsort = new Regex(TemplateStart + s + @"\s*[:|](?<key>(?>[^\{\}]+|\{(?<DEPTH>)|\}(?<-DEPTH>))*(?(DEPTH)(?!))|[^\}\r\n]*?)(?:}}|\r|\n)",
                                    RegexOptions.Compiled | RegexOptions.ExplicitCapture);

            //if (Variables.URL == Variables.URLLong)
            //    s = Regex.Escape(Variables.URL);
            //else
            //{
            int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong);

            s  = Regex.Escape(Variables.URLLong.Substring(0, pos));
            s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|"
                 + Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")";
            //}
            ExtractTitle = new Regex("^" + s + "([^?&]*)$", RegexOptions.Compiled);

            string cat = Variables.NamespacesCaseInsensitive[Namespace.Category],
                   img = Variables.NamespacesCaseInsensitive[Namespace.Image];

            EmptyLink     = new Regex("\\[\\[(:?" + cat + "|" + img + "|)(|" + img + "|" + cat + "|.*?)\\]\\]", RegexOptions.IgnoreCase | RegexOptions.Compiled);
            EmptyTemplate = new Regex(@"{{(" + Variables.NamespacesCaseInsensitive[Namespace.Template] + @")?[|\s]*}}", RegexOptions.IgnoreCase | RegexOptions.Compiled);
        }
Exemplo n.º 3
0
        public static void MakeLangSpecificRegexes()
        {
            TemplateStart = @"\{\{\s*(:?" + Variables.NamespacesCaseInsensitive[Namespace.Template] + ")?";

            Category = new Regex(@"\[\[" + Variables.NamespacesCaseInsensitive[Namespace.Category]
                                 + @"(.*?)\]\]", RegexOptions.Compiled);

            // images mask was [^\]]*?(?:\[\[?.*?(?:\[\[.*?\]\].*?)?\]\]?[^\]]*?)*)\]\]
            // now instead use allowed character list, then a file extension (these are mandatory on mediawiki), then optional closing ]]
            // this allows typo fixing and find&replace to operate on image descriptions
            // TODO: replace these two with direct returns from API, when available
            // or, alternatively, an image filename has to have a pipe or ]] after it if using the [[Image: start, so just set first one to
            // @"[^\[\]\|\{\}]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))
            Images =
                new Regex(
                    @"\[\[" + Variables.NamespacesCaseInsensitive[Namespace.File] +
                    @"[ \%\!""$&'\(\)\*,\-.\/0-9:;=\?@A-Z\\\^_`a-z~\x80-\xFF\+]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))?|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>|\|\s*(?:[Pp]hoto|[Ii]mg|[Ii]mage\d*|[Cc]over)(?:[_ ]\w+)?\s*=.+?\.[a-zA-Z]{3,4}\s*(?:\||}})",
                    RegexOptions.Compiled | RegexOptions.Singleline);

            Stub = new Regex(@"{{.*?" + Variables.Stub + @"}}", RegexOptions.Compiled);

            PossiblyCommentedStub =
                new Regex(
                    @"(<!-- ?\{\{[^}]*?" + Variables.Stub + @"\b\}\}.*?-->|\{\{[^}]*?" + Variables.Stub + @"\}\})",
                    RegexOptions.Compiled);

            TemplateCall = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}",
                                     RegexOptions.Compiled | RegexOptions.Singleline);

            LooseCategory =
                new Regex(@"\[\[[\s_]*" + Variables.NamespacesCaseInsensitive[Namespace.Category]
                          + @"[\s_]*([^\|]*?)(|\|.*?)\]\]",
                          RegexOptions.Compiled);

            LooseImage = new Regex(@"\[\[\s*?(" + Variables.NamespacesCaseInsensitive[Namespace.File]
                                   + @")\s*([^\|\]]*?)(.*?)\]\]",
                                   RegexOptions.Compiled);

            StringBuilder builder = new StringBuilder("(" + Variables.MonthNames[0]);

            for (int i = 1; i < 12; i++)
            {
                builder.Append("|" + Variables.MonthNames[i]);
            }

            builder.Append(")");

            Dates  = new Regex("^(0?[1-9]|[12][0-9]|3[01]) " + builder + "$", RegexOptions.Compiled);
            Dates2 = new Regex("^" + builder + " (0?[1-9]|[12][0-9]|3[01])$", RegexOptions.Compiled);

            builder = new StringBuilder();

            if (Variables.MagicWords.ContainsKey("redirect"))
            {
                foreach (string r in Variables.MagicWords["redirect"])
                {
                    builder.Append(r.Replace("#", "") + "|");
                }
                builder.Remove((builder.Length - 1), 1);
            }
            else
            {
                builder.Append("REDIRECT");
            }

            Redirect = new Regex(@"#(?:" + builder + @")\s*:?\s*\[\[\s*:?\s*([^\|]*?)\s*(|\|.*?)]\]",
                                 RegexOptions.IgnoreCase | RegexOptions.Multiline);
            string s;

            switch (Variables.LangCode)
            {
            case LangCodeEnum.ru:
                s = "([Dd]isambiguation|[Dd]isambig|[Нн]еоднозначность)";
                break;

            default:
                s = "([234]CC|[Dd]isambig|[Gg]eodis|[Hh]ndis|[Ss]urname|[Nn]umberdis|[Rr]oaddis|[Ll]etter-disambig)";
                break;
            }
            Disambigs = new Regex(TemplateStart + s + "}}", RegexOptions.Compiled);

            if (Variables.MagicWords.ContainsKey("defaultsort"))
            {
                builder = new StringBuilder("(?i:");
                foreach (string d in Variables.MagicWords["defaultsort"])
                {
                    builder.Append(d.Replace(":", "") + "|");
                }
                builder.Remove((builder.Length - 1), 1);
                s = builder + ")";
            }
            else
            {
                s = (Variables.LangCode == LangCodeEnum.en)
                    ? "(?:(?i:defaultsort(key|CATEGORYSORT)?|lifetime|BIRTH-DEATH-SORT)|BD)"
                    : "(?i:defaultsort)";
            }

            Defaultsort = new Regex(TemplateStart + s + @"\s*[:|](?<key>[^\}]*)}}",
                                    RegexOptions.Compiled | RegexOptions.ExplicitCapture);

            //if (Variables.URL == Variables.URLLong)
            //    s = Regex.Escape(Variables.URL);
            //else
            //{
            int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong);

            s  = Regex.Escape(Variables.URLLong.Substring(0, pos));
            s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|"
                 + Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")";
            //}
            ExtractTitle = new Regex("^" + s + "([^?&]*)$", RegexOptions.Compiled);
        }
Exemplo n.º 4
0
        public static void MakeLangSpecificRegexes()
        {
            TemplateStart = @"\{\{\s*(:?" + Variables.NamespacesCaseInsensitive[Namespace.Template] + ")?";

            Category = new Regex(@"\[\[" + Variables.NamespacesCaseInsensitive[Namespace.Category]
                                 + @"(.*?)\]\]", RegexOptions.Compiled);

            // Use allowed character list, then a file extension (these are mandatory on mediawiki), then optional closing ]]
            // this allows typo fixing and find&replace to operate on image descriptions
            // or, alternatively, an image filename has to have a pipe or ]] after it if using the [[Image: start, so just set first one to
            // @"[^\[\]\|\{\}]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))
            Images =
                new Regex(
                    @"\[\[" + Variables.NamespacesCaseInsensitive[Namespace.File] +
                    @"[ \%\!""$&'\(\)\*,\-.\/0-9:;=\?@A-Z\\\^_`a-z~\x80-\xFF\+]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))?|<[Gg]allery\b([^>]*?)>[\s\S]*?</ ?[Gg]allery>|\|\s*(?:[Pp]hoto|[Ii]mg|[Ii]mage\d*|[Cc]over)(?:[_ ]\w+)?\s*=[^\|{}]+?\.[a-zA-Z]{3,4}\s*(?=\||}})",
                    RegexOptions.Compiled | RegexOptions.Singleline);

            Stub = new Regex(@"{{.*?" + Variables.Stub + @"}}", RegexOptions.Compiled);

            PossiblyCommentedStub =
                new Regex(
                    @"(<!-- ?\{\{[^}]*?" + Variables.Stub + @"\b\}\}.*?-->|\{\{[^}]*?" + Variables.Stub + @"\}\})",
                    RegexOptions.Compiled);

            TemplateCall = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}",
                                     RegexOptions.Compiled | RegexOptions.Singleline);

            LooseCategory =
                new Regex(@"\[\[[\s_]*" + Variables.NamespacesCaseInsensitive[Namespace.Category]
                          + @"[\s_]*([^\|]*?)(|\|.*?)\]\]",
                          RegexOptions.Compiled);

            LooseImage = new Regex(@"\[\[\s*?(" + Variables.NamespacesCaseInsensitive[Namespace.File]
                                   + @")\s*([^\|\]]+)(.*?)\]\]",
                                   RegexOptions.Compiled);

            Months = "(" + string.Join("|", Variables.MonthNames) + ")";

            Dates  = new Regex("^(0?[1-9]|[12][0-9]|3[01]) " + Months + "$", RegexOptions.Compiled);
            Dates2 = new Regex("^" + Months + " (0?[1-9]|[12][0-9]|3[01])$", RegexOptions.Compiled);

            string s = Variables.MagicWords.ContainsKey("redirect")
                    ? string.Join("|", Variables.MagicWords["redirect"].ToArray()).Replace("#", "")
                    : "REDIRECT";

            Redirect = new Regex(@"#(?:" + s + @")\s*:?\s*\[\[\s*:?\s*([^\|]*?)\s*(|\|.*?)]\]",
                                 RegexOptions.IgnoreCase | RegexOptions.Multiline);

            switch (Variables.LangCode)
            {
            case LangCodeEnum.ru:
                s = "([Dd]isambiguation|[Dd]isambig|[Нн]еоднозначность)";
                break;

            default:
                s = "([234]CC|[Dd]isambig|[Gg]eodis|[Hh]ndis|[Ss]urname|[Nn]umberdis|[Rr]oaddis|[Ll]etter-disambig)";
                break;
            }
            Disambigs = new Regex(TemplateStart + s + "}}", RegexOptions.Compiled);

            if (Variables.MagicWords.ContainsKey("defaultsort"))
            {
                s = "(?i:" + string.Join("|", Variables.MagicWords["defaultsort"].ToArray()).Replace(":", "") + ")";
            }
            else
            {
                s = (Variables.LangCode == LangCodeEnum.en)
                    ? "(?:(?i:defaultsort(key|CATEGORYSORT)?))"
                    : "(?i:defaultsort)";
            }

            Defaultsort = new Regex(TemplateStart + s + @"\s*[:|](?<key>[^\}]*)}}",
                                    RegexOptions.Compiled | RegexOptions.ExplicitCapture);

            //if (Variables.URL == Variables.URLLong)
            //    s = Regex.Escape(Variables.URL);
            //else
            //{
            int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong);

            s  = Regex.Escape(Variables.URLLong.Substring(0, pos));
            s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|"
                 + Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")";
            //}
            ExtractTitle = new Regex("^" + s + "([^?&]*)$", RegexOptions.Compiled);

            string cat = Variables.NamespacesCaseInsensitive[Namespace.Category],
                   img = Variables.NamespacesCaseInsensitive[Namespace.Image];

            EmptyLink     = new Regex("\\[\\[(:?" + cat + "|" + img + "|)(|" + img + "|" + cat + "|.*?)\\]\\]", RegexOptions.IgnoreCase | RegexOptions.Compiled);
            EmptyTemplate = new Regex(@"{{(" + Variables.NamespacesCaseInsensitive[Namespace.Template] + @")?[|\s]*}}", RegexOptions.IgnoreCase | RegexOptions.Compiled);
        }