예제 #1
0
        public ChineseAddressWordParser(string[] citys)
        {
            if (ms_nationwordmatch == null)
            {
                InitBasicWordmatch();
            }

            ms_citywordmatch = NewElementsWordmatch("city", citys);
            InitBasicWordmatchChain();
        }
 public string GetStringByWordmatch(AbstractWordmatch wm)
 {
     foreach (MatchingBlock mb in m_matchblocklist)
     {
         if (mb.wordmatch == wm)
         {
             return(MatchingBlockGetString(mb));
         }
     }
     return(null);
 }
        public StringCollection GetStringsByWordmatch(AbstractWordmatch wm)
        {
            StringCollection sc = new StringCollection();

            foreach (MatchingBlock mb in m_matchblocklist)
            {
                if (mb.wordmatch == wm)
                {
                    sc.Add(this.MatchingBlockGetString(mb));
                }
            }
            return(sc);
        }
예제 #4
0
        private static void InitBasicWordmatch()
        {
            //国名
            ms_nationwordmatch = NewSingleElementWordmatch("nation", "中国");

            //省和直辖市
            ms_provincewordmatch = new AggregateWordmatch("province", "province.txt", true);

            //地级市
            ms_citywordmatch = new AggregateWordmatch("city", "city.txt", true);

            //县级市
            ms_countywordmatch = new RegexWordmatch("county", new Regex(@"[\u4e00-\u9fa5]+?(县|市)", RegexOptions.Compiled));

            //工业区、高新区
            ms_industrialparkwordmatch = new RegexWordmatch("plaza", new Regex(@"[\u4e00-\u9fa5]+(园区|工业区|工业城)", RegexOptions.Compiled));

            //区
            ms_districtwordmatch = new RegexWordmatch("district", new Regex(@"近郊|[\u4e00-\u9fa5]+?区", RegexOptions.Compiled));

            //街道&道路
            ms_streetwordmatch = new RegexWordmatch("street", new Regex(@"[\u4e00-\u9fa5]+街道", RegexOptions.Compiled));
            ms_roadwordmatch   = new RegexWordmatch("road", new Regex(@"[\u4e00-\u9fa5]+?(胡同|弄堂|街|巷|路|道)", RegexOptions.Compiled));

            //门牌
            ms_numberwordmatch = new RegexWordmatch("number", new Regex(@"(\d|-|甲|乙|丙)+?号(?!楼)", RegexOptions.Compiled));

            //住宅区
            ms_zonewordmatch = new RegexWordmatch("number", new Regex(@"[\u4e00-\u9fa5]+?(社区|小区)", RegexOptions.Compiled));

            //广场/购物中心/酒店
            ms_plazawordmatch = new RegexWordmatch("plaza", new Regex(@"[\u4e00-\u9fa5]+(层|楼|广场|商城|商场|酒店|购物中心|市场|大厦|校区|百货)([A-Z]座)?", RegexOptions.Compiled));

            //镇
            ms_townwordmatch = new RegexWordmatch("town", new Regex(@"[\u4e00-\u9fa5]+?(镇|乡)", RegexOptions.Compiled));

            //村
            ms_villagewordmatch = new RegexWordmatch("village", new Regex(@"[\u4e00-\u9fa5]+?村", RegexOptions.Compiled));

            //链头
            ms_headwordmatch = new HeadWordmatch();

            //备注
            ms_notewordmatch = new RegexWordmatch("note", new Regex(@"\(.+\)", RegexOptions.Compiled), false);

            //噪音收集器
            ms_noisecollector = new NoiseCollector();
        }
        private static void InitBasicWordmatch()
        {
            //国名
            ms_nationwordmatch = NewSingleElementWordmatch("nation", "中国");

            //省和直辖市
            ms_provincewordmatch = new AggregateWordmatch("province", "province.txt", true);

            //地级市
            ms_citywordmatch = new AggregateWordmatch("city", "city.txt", true);

            //县级市
            ms_countywordmatch = new RegexWordmatch("county", new Regex(@"[\u4e00-\u9fa5]+?(县|市)", RegexOptions.Compiled));

            //工业区、高新区
            ms_industrialparkwordmatch = new RegexWordmatch("plaza", new Regex(@"[\u4e00-\u9fa5]+(园区|工业区|工业城)", RegexOptions.Compiled));

            //区
            ms_districtwordmatch = new RegexWordmatch("district", new Regex(@"近郊|[\u4e00-\u9fa5]+?区", RegexOptions.Compiled));

            //街道&道路
            ms_streetwordmatch = new RegexWordmatch("street", new Regex(@"[\u4e00-\u9fa5]+街道", RegexOptions.Compiled));
            ms_roadwordmatch = new RegexWordmatch("road", new Regex(@"[\u4e00-\u9fa5]+?(胡同|弄堂|街|巷|路|道)", RegexOptions.Compiled));

            //门牌
            ms_numberwordmatch = new RegexWordmatch("number", new Regex(@"(\d|-|甲|乙|丙)+?号(?!楼)", RegexOptions.Compiled));

            //住宅区
            ms_zonewordmatch = new RegexWordmatch("number", new Regex(@"[\u4e00-\u9fa5]+?(社区|小区)", RegexOptions.Compiled));

            //广场/购物中心/酒店
            ms_plazawordmatch = new RegexWordmatch("plaza", new Regex(@"[\u4e00-\u9fa5]+(层|楼|广场|商城|商场|酒店|购物中心|市场|大厦|校区|百货)([A-Z]座)?", RegexOptions.Compiled));

            //镇
            ms_townwordmatch = new RegexWordmatch("town", new Regex(@"[\u4e00-\u9fa5]+?(镇|乡)", RegexOptions.Compiled));

            //村
            ms_villagewordmatch = new RegexWordmatch("village", new Regex(@"[\u4e00-\u9fa5]+?村", RegexOptions.Compiled));

            //链头
            ms_headwordmatch = new HeadWordmatch();

            //备注
            ms_notewordmatch = new RegexWordmatch("note", new Regex(@"\(.+\)", RegexOptions.Compiled),false);

            //噪音收集器
            ms_noisecollector = new NoiseCollector();
        }
        public bool AddMatchingBlock(AbstractWordmatch wordmatch, int absolutestartpoint, int length)
        {
            if (absolutestartpoint < 0 || length > this.m_string.Length)
            {
                return(false);
            }

            //查找它在哪个匹配块内
            foreach (MatchingBlock mpexist in m_matchblocklist)
            {
                if (mpexist.startpoint <= absolutestartpoint && absolutestartpoint + length <= mpexist.startpoint + mpexist.length)
                {
                    //拆分该块
                    SplitMatchingBlock(mpexist, absolutestartpoint, length, wordmatch);
                    return(true);
                }
            }
            return(false);
        }
 public AbstractWordmatch SetNext(AbstractWordmatch next)
 {
     this.next = next;
     return next;
 }
        public ChineseAddressWordParser(string[] citys)
        {
            if (ms_nationwordmatch == null)
                InitBasicWordmatch();

            ms_citywordmatch = NewElementsWordmatch("city", citys);
            InitBasicWordmatchChain();
        }
        private void SplitMatchingBlock(MatchingBlock oldmb, int startpoint, int length, AbstractWordmatch wordmatch)
        {
            int oldstart = oldmb.startpoint;
            int oldlenth = oldmb.length;

            m_matchblocklist.Remove(oldmb);
            m_matchblocklist.Add(new MatchingBlock(wordmatch, startpoint, length));

            if (oldstart < startpoint)
            {
                m_matchblocklist.Add(new MatchingBlock(null, oldstart, startpoint - oldstart));
            }

            if ((oldstart + oldlenth) > (startpoint + length))
            {
                m_matchblocklist.Add(new MatchingBlock(null, startpoint + length, oldstart + oldlenth - startpoint - length));
            }
        }
예제 #10
0
 public bool AddMatchingBlock(AbstractWordmatch wordmatch, MatchingBlock oldmb, int relativestartpoint, int length)
 {
     return(this.AddMatchingBlock(wordmatch, oldmb.startpoint + relativestartpoint, length));
 }
예제 #11
0
 public MatchingBlock(AbstractWordmatch wm, int startp, int len)
 {
     this.wordmatch  = wm;
     this.startpoint = startp;
     this.length     = len;
 }
 public AbstractWordmatch SetNext(AbstractWordmatch next)
 {
     this.next = next;
     return(next);
 }