public ChineseAddressWordParser(string[] citys) { if (ms_nationwordmatch == null) { InitBasicWordmatch(); } ms_citywordmatch = NewElementsWordmatch("city", citys); InitBasicWordmatchChain(); }
public string GetStringByWordmatch(AbstractWordmatch wm) { foreach (MatchingBlock mb in m_matchblocklist) { if (mb.wordmatch == wm) { return(MatchingBlockGetString(mb)); } } return(null); }
public StringCollection GetStringsByWordmatch(AbstractWordmatch wm) { StringCollection sc = new StringCollection(); foreach (MatchingBlock mb in m_matchblocklist) { if (mb.wordmatch == wm) { sc.Add(this.MatchingBlockGetString(mb)); } } return(sc); }
private static void InitBasicWordmatch() { //国名 ms_nationwordmatch = NewSingleElementWordmatch("nation", "中国"); //省和直辖市 ms_provincewordmatch = new AggregateWordmatch("province", "province.txt", true); //地级市 ms_citywordmatch = new AggregateWordmatch("city", "city.txt", true); //县级市 ms_countywordmatch = new RegexWordmatch("county", new Regex(@"[\u4e00-\u9fa5]+?(县|市)", RegexOptions.Compiled)); //工业区、高新区 ms_industrialparkwordmatch = new RegexWordmatch("plaza", new Regex(@"[\u4e00-\u9fa5]+(园区|工业区|工业城)", RegexOptions.Compiled)); //区 ms_districtwordmatch = new RegexWordmatch("district", new Regex(@"近郊|[\u4e00-\u9fa5]+?区", RegexOptions.Compiled)); //街道&道路 ms_streetwordmatch = new RegexWordmatch("street", new Regex(@"[\u4e00-\u9fa5]+街道", RegexOptions.Compiled)); ms_roadwordmatch = new RegexWordmatch("road", new Regex(@"[\u4e00-\u9fa5]+?(胡同|弄堂|街|巷|路|道)", RegexOptions.Compiled)); //门牌 ms_numberwordmatch = new RegexWordmatch("number", new Regex(@"(\d|-|甲|乙|丙)+?号(?!楼)", RegexOptions.Compiled)); //住宅区 ms_zonewordmatch = new RegexWordmatch("number", new Regex(@"[\u4e00-\u9fa5]+?(社区|小区)", RegexOptions.Compiled)); //广场/购物中心/酒店 ms_plazawordmatch = new RegexWordmatch("plaza", new Regex(@"[\u4e00-\u9fa5]+(层|楼|广场|商城|商场|酒店|购物中心|市场|大厦|校区|百货)([A-Z]座)?", RegexOptions.Compiled)); //镇 ms_townwordmatch = new RegexWordmatch("town", new Regex(@"[\u4e00-\u9fa5]+?(镇|乡)", RegexOptions.Compiled)); //村 ms_villagewordmatch = new RegexWordmatch("village", new Regex(@"[\u4e00-\u9fa5]+?村", RegexOptions.Compiled)); //链头 ms_headwordmatch = new HeadWordmatch(); //备注 ms_notewordmatch = new RegexWordmatch("note", new Regex(@"\(.+\)", RegexOptions.Compiled), false); //噪音收集器 ms_noisecollector = new NoiseCollector(); }
private static void InitBasicWordmatch() { //国名 ms_nationwordmatch = NewSingleElementWordmatch("nation", "中国"); //省和直辖市 ms_provincewordmatch = new AggregateWordmatch("province", "province.txt", true); //地级市 ms_citywordmatch = new AggregateWordmatch("city", "city.txt", true); //县级市 ms_countywordmatch = new RegexWordmatch("county", new Regex(@"[\u4e00-\u9fa5]+?(县|市)", RegexOptions.Compiled)); //工业区、高新区 ms_industrialparkwordmatch = new RegexWordmatch("plaza", new Regex(@"[\u4e00-\u9fa5]+(园区|工业区|工业城)", RegexOptions.Compiled)); //区 ms_districtwordmatch = new RegexWordmatch("district", new Regex(@"近郊|[\u4e00-\u9fa5]+?区", RegexOptions.Compiled)); //街道&道路 ms_streetwordmatch = new RegexWordmatch("street", new Regex(@"[\u4e00-\u9fa5]+街道", RegexOptions.Compiled)); ms_roadwordmatch = new RegexWordmatch("road", new Regex(@"[\u4e00-\u9fa5]+?(胡同|弄堂|街|巷|路|道)", RegexOptions.Compiled)); //门牌 ms_numberwordmatch = new RegexWordmatch("number", new Regex(@"(\d|-|甲|乙|丙)+?号(?!楼)", RegexOptions.Compiled)); //住宅区 ms_zonewordmatch = new RegexWordmatch("number", new Regex(@"[\u4e00-\u9fa5]+?(社区|小区)", RegexOptions.Compiled)); //广场/购物中心/酒店 ms_plazawordmatch = new RegexWordmatch("plaza", new Regex(@"[\u4e00-\u9fa5]+(层|楼|广场|商城|商场|酒店|购物中心|市场|大厦|校区|百货)([A-Z]座)?", RegexOptions.Compiled)); //镇 ms_townwordmatch = new RegexWordmatch("town", new Regex(@"[\u4e00-\u9fa5]+?(镇|乡)", RegexOptions.Compiled)); //村 ms_villagewordmatch = new RegexWordmatch("village", new Regex(@"[\u4e00-\u9fa5]+?村", RegexOptions.Compiled)); //链头 ms_headwordmatch = new HeadWordmatch(); //备注 ms_notewordmatch = new RegexWordmatch("note", new Regex(@"\(.+\)", RegexOptions.Compiled),false); //噪音收集器 ms_noisecollector = new NoiseCollector(); }
public bool AddMatchingBlock(AbstractWordmatch wordmatch, int absolutestartpoint, int length) { if (absolutestartpoint < 0 || length > this.m_string.Length) { return(false); } //查找它在哪个匹配块内 foreach (MatchingBlock mpexist in m_matchblocklist) { if (mpexist.startpoint <= absolutestartpoint && absolutestartpoint + length <= mpexist.startpoint + mpexist.length) { //拆分该块 SplitMatchingBlock(mpexist, absolutestartpoint, length, wordmatch); return(true); } } return(false); }
public AbstractWordmatch SetNext(AbstractWordmatch next) { this.next = next; return next; }
public ChineseAddressWordParser(string[] citys) { if (ms_nationwordmatch == null) InitBasicWordmatch(); ms_citywordmatch = NewElementsWordmatch("city", citys); InitBasicWordmatchChain(); }
private void SplitMatchingBlock(MatchingBlock oldmb, int startpoint, int length, AbstractWordmatch wordmatch) { int oldstart = oldmb.startpoint; int oldlenth = oldmb.length; m_matchblocklist.Remove(oldmb); m_matchblocklist.Add(new MatchingBlock(wordmatch, startpoint, length)); if (oldstart < startpoint) { m_matchblocklist.Add(new MatchingBlock(null, oldstart, startpoint - oldstart)); } if ((oldstart + oldlenth) > (startpoint + length)) { m_matchblocklist.Add(new MatchingBlock(null, startpoint + length, oldstart + oldlenth - startpoint - length)); } }
public bool AddMatchingBlock(AbstractWordmatch wordmatch, MatchingBlock oldmb, int relativestartpoint, int length) { return(this.AddMatchingBlock(wordmatch, oldmb.startpoint + relativestartpoint, length)); }
public MatchingBlock(AbstractWordmatch wm, int startp, int len) { this.wordmatch = wm; this.startpoint = startp; this.length = len; }
public AbstractWordmatch SetNext(AbstractWordmatch next) { this.next = next; return(next); }