Exemplo n.º 1
0
        /// <summary>
        /// register a synonym
        /// </summary>
        /// <param name="from">original word</param>
        /// <param name="to">converted word</param>
        public void Add(string from, string to)
        {
            var   from2 = StrMatch.ReplaceMetaStr(from);
            Regex reg;

            if (from.EndsWith("#"))
            {
                from  = from.Substring(0, from.Length - 1);
                from2 = from2.Substring(0, from2.Length - 1);
                reg   = new Regex(string.Format("(^|[^A-Z0-9]){0}[0-9]+", from2), RegexOptions.IgnoreCase | RegexOptions.Compiled);
            }
            else
            {
                reg = new Regex(string.Format("(^|[^A-Z0-9]){0}($|[^A-Z0-9])", from2), RegexOptions.IgnoreCase | RegexOptions.Compiled);
            }
            _regs.Add(reg);
            _froms.Add(from);
            _tos.Add(to);
        }
Exemplo n.º 2
0
        /// <summary>
        /// make instance with specific match pattern
        /// </summary>
        /// <param name="pattern"></param>
        public StrMatch(string pattern)
        {
            var reopt = RegexOptions.IgnoreCase | RegexOptions.Singleline;

            var isRegularExpressioning = false;
            var isDoubleQuatationing   = false;
            var isEscaping             = false;

            var expss = new List <List <string> >();
            var str   = new StringBuilder();

            var exps = new List <string>();

            expss.Add(exps);

            pattern = pattern.Trim();
            for (var ci = 0; ci < pattern.Length; ci++)
            {
                var c = pattern[ci];
                if (isRegularExpressioning == false)
                {
                    if (c == '\\' || isEscaping)
                    {
                        isEscaping = !isEscaping;
                        str.Append(c);
                        continue;
                    }
                    if (c == '{')
                    {
                        isRegularExpressioning = true;
                    }
                    if (c == '\"')
                    {
                        isDoubleQuatationing = !isDoubleQuatationing;
                        str.Append(c);
                        continue;
                    }
                    if (c == '|' && !isDoubleQuatationing)
                    {
                        var cc = str.ToString().Trim();
                        if (cc != null)
                        {
                            exps.Add(cc);
                            expss.Add(exps = new List <string>());   // prepare next OR operator
                        }
                        str = new StringBuilder();
                        continue;
                    }
                    if (char.IsWhiteSpace(c) && !isDoubleQuatationing)
                    {
                        var cc = str.ToString().Trim();
                        if (cc != "")
                        {
                            exps.Add(cc);
                        }
                        str = new StringBuilder();
                    }
                    else
                    {
                        str.Append(c);
                    }
                }
                else
                {
                    str.Append(c);
                    if (c == '}')
                    {
                        isRegularExpressioning = false;
                        exps.Add(str.ToString());
                        str = new StringBuilder();
                    }
                }
            }
            if (str.ToString().Trim() != "")
            {
                exps.Add(str.ToString());
            }

            foreach (var strs in expss)
            {
                var resRemove = new List <Regex>();
                _resRemoves.Add(resRemove);
                var resCollect = new List <Regex>();
                _resCollects.Add(resCollect);

                var nStartMark = 0; // number of ^ character
                var nEndMark   = 0; // number of $ character
                foreach (var com in strs)
                {
                    var com2 = StrMatch.ConvertSimpleAsterisk(com);
                    if (com2.StartsWith("^"))
                    {
                        nStartMark++;
                    }

                    if (com2.EndsWith("$") && com2.StartsWith("-") == false)
                    {
                        nEndMark++;
                    }
                }
                if (nStartMark > 1)
                {
                    Debug.WriteLine("Warning : Found ^ mark twice or more");
                }
                if (nEndMark > 1)
                {
                    Debug.WriteLine("Warning : Found $ mark twice of more");
                }

                foreach (var com in strs)
                {
                    if (com.Length < 1)
                    {
                        continue;
                    }
                    if ((com.StartsWith("{") || com.StartsWith("-{")) && com.EndsWith("}"))
                    {
                        var re = new Regex(com.Substring(com.IndexOf('{') + 1, com.Length - com.IndexOf('{') - 2), reopt);
                        if (com[0] == '-')
                        {
                            resRemove.Add(re);
                            NRemove++;
                        }
                        else
                        {
                            resCollect.Add(re);
                        }
                    }
                    else
                    {
                        var    isRemove = false;
                        string restr;
                        if (com.StartsWith("-"))
                        {
                            restr = com.Substring(1, com.Length - 1);
                            if (restr == "")
                            {
                                restr = " ";    // Restore space character that removed with Trim function
                            }
                            isRemove = true;
                        }
                        else
                        {
                            restr = com;
                        }

                        // Remove double quotations
                        var preC   = '\0';
                        var restr2 = new StringBuilder();
                        for (var i = 0; i < restr.Length; i++)
                        {
                            var c = restr[i];
                            if (c == '\"' && preC != '\\')
                            {
                                preC = c;
                                continue;
                            }
                            restr2.Append(c);
                            preC = c;
                        }
                        restr = restr2.ToString();

                        // Support simple asterisk expression
                        restr = StrMatch.ConvertSimpleAsterisk(restr);

                        // Change * or ? character to regular expression
                        restr = restr.Replace(".", "\\.");
                        restr = restr.Replace("*", ".*");
                        restr = restr.Replace("?", ".?");

                        // Kaizen
                        restr = Regex.Replace(restr, "&", "\\s*&\\s*"); // Not care space character around & mark

                        // remove consecutive space
                        int ll;
                        do
                        {
                            ll    = restr.Length;
                            restr = restr.Replace("  ", " ");
                        } while (ll != restr.Length);
                        restr = restr.Replace(" ", " +");

                        if (restr.Length > 0)
                        {
                            try
                            {
                                var re = new Regex(restr, reopt);
                                if (isRemove)
                                {
                                    resRemove.Add(re);
                                    NRemove++;
                                }
                                else
                                {
                                    resCollect.Add(re);
                                }
                            }
                            catch (Exception)
                            {
                            }
                        }
                    }
                }
            }

            if (NRemove == 0)
            {
                _isAllMatch = IsMatch("龗鱻麤");
            }
            else
            {
                _isAllMatch = false;
            }
        }