Ejemplo n.º 1
0
        // 全文中提取公司
        public string extractCompany_fuzzy(string line)
        {
            line = line.ToLower();
            string company = "";

            var pattern = new Regex("(公\\s*司)(:|:)(([\u4e00-\u9fa5]|[a-z0-9])+)($|#|\\s)",
                                    RegexOptions.IgnoreCase);
            var matcher = pattern.Match(line);

            if (matcher.Success)
            {
                company = matcher.Groups[3].Value;
                return(company);
            }

            // TODO:交付时顺序后移到正则之后做召回 find maximum length position
            List <string> companies = CompanyTokenAnalyzer.segment(line);

            if (companies.Count > 0)
            {
                return(companies[0].ToUpper());
            }

            pattern = new Regex(
                "(?!于|在|#)[\u4e00-\u9fa5]+?[\u4e00-\u9fa5|(|)|0-9|(|)]{2,20}(公司|集团|机构|代表处|办事处|营业部|经营部|事务所|学校|中心|研究所|研究院|酒店|商行|工作室|银行|俱乐部|加盟店|集团|门诊部)",
                RegexOptions.IgnoreCase);
            matcher = pattern.Match(line);
            if (matcher.Success)
            {
                company = matcher.Groups[0].Value;
                return(company);
            }

            return(company);
        }
Ejemplo n.º 2
0
        // 提取公司
        public string extractCompany_exact(string line)
        {
            string company = "";

            var pattern = new Regex("(公\\s*司)(:|:)(([\u4e00-\u9fa5]|[a-z0-9])+)($|#|\\s)",
                                    RegexOptions.IgnoreCase);
            var matcher = pattern.Match(line);

            if (matcher.Success)
            {
                company = matcher.Groups[3].Value;
                return(company);
            }

            pattern = new Regex(
                "(?!于|在|#)[\u4e00-\u9fa5]+?[\u4e00-\u9fa5|(|)|0-9|(|)]{2,15}(公司|集团|代表处|办事处|营业部|经营部|事务所|学校|中心|研究所|研究院|酒店|商行|工作室|银行|俱乐部|加盟店|集团|门诊部)+",
                RegexOptions.IgnoreCase);
            matcher = pattern.Match(line);
            if (matcher.Success)
            {
                company = matcher.Groups[0].Value;
                return(company);
            }

            List <string> companies = CompanyTokenAnalyzer.segment(line);

            if (companies.Count > 0)
            {
                return(companies[0]);
            }
            return(company);
        }