public NorwegianCompanyNameCleaner()
        {
            _sub = new LowerCaseNormalizeCleaner();

            Add("\\s(a/s)(\\s|$)", "as");
            Add("\\s(a\\\\s)(\\s|$)", "as");
            Add("^(a/s)\\s", "as");
            Add("^(a\\\\s)\\s", "as");
            Add("\\s(a/l)(\\s|$)", "al");
            Add("^(a/l)\\s", "al");
        }
        public NorwegianCompanyNameCleaner()
        {
            _sub = new LowerCaseNormalizeCleaner();

            Add("\\s(a/s)(\\s|$)", "as");
            Add("\\s(a\\\\s)(\\s|$)", "as");
            Add("^(a/s)\\s", "as");
            Add("^(a\\\\s)\\s", "as");
            Add("\\s(a/l)(\\s|$)", "al");
            Add("^(a/l)\\s", "al");

        }
Пример #3
0
        public PersonNameCleaner()
        {
            _sub = new LowerCaseNormalizeCleaner();

            // load token translation _mapping (FIXME: move to static init?)
            try
            {
                _mapping = LoadMapping();
            }
            catch (System.Exception ex)
            {
                logger.Error("Error initializing object: {0}", ex.Message);
            }
        }
Пример #4
0
        public PersonNameCleaner()
        {
            _sub = new LowerCaseNormalizeCleaner();

            // load token translation _mapping (FIXME: move to static init?)
            try
            {
                _mapping = LoadMapping();
            }
            catch (System.Exception ex)
            {
               logger.Error("Error initializing object: {0}", ex.Message);
            }
        }
        public NorwegianAddressCleaner()
        {
            _sub = new LowerCaseNormalizeCleaner();

            Add("^(co/ ?)", "c/o ");
            Add("^(c\\\\o)", "c/o");
            Add("[A-Za-z]+(g\\.) [0-9]+", "gata");
            Add("[A-Za-z]+ (gt?\\.?) [0-9]+", "gate");
            Add("[A-Za-z]+(v\\.) [0-9]+", "veien");
            Add("[A-Za-z]+ (v\\.?) [0-9]+", "vei");
            Add("[A-Za-z]+(vn\\.?)[0-9]+", "veien ");
            Add("[A-Za-z]+(vn\\.?) [0-9]+", "veien");
            Add("[A-Za-z]+(gt\\.?) [0-9]+", "gata");
            Add("[A-Za-z]+(gaten) [0-9]+", "gata");
            Add("(\\s|^)(pb\\.?) [0-9]+", "postboks", 2);
            Add("(\\s|^)(boks) [0-9]+", "postboks", 2);
            Add("[A-Za-z]+ [0-9]+(\\s+)[A-Za-z](\\s|$)", "");
            Add("[A-Za-z]+(gata|veien)()[0-9]+[a-z]?(\\s|$)", " ");

            // FIXME: not sure about the following rules
            Add("postboks\\s+[0-9]+(\\s*-\\s*)", " ");
        }
Пример #6
0
        public NorwegianAddressCleaner()
        {
            _sub = new LowerCaseNormalizeCleaner();

            Add("^(co/ ?)", "c/o ");
            Add("^(c\\\\o)", "c/o");
            Add("[A-Za-z]+(g\\.) [0-9]+", "gata");
            Add("[A-Za-z]+ (gt?\\.?) [0-9]+", "gate");
            Add("[A-Za-z]+(v\\.) [0-9]+", "veien");
            Add("[A-Za-z]+ (v\\.?) [0-9]+", "vei");
            Add("[A-Za-z]+(vn\\.?)[0-9]+", "veien ");
            Add("[A-Za-z]+(vn\\.?) [0-9]+", "veien");
            Add("[A-Za-z]+(gt\\.?) [0-9]+", "gata");
            Add("[A-Za-z]+(gaten) [0-9]+", "gata");
            Add("(\\s|^)(pb\\.?) [0-9]+", "postboks", 2);
            Add("(\\s|^)(boks) [0-9]+", "postboks", 2);
            Add("[A-Za-z]+ [0-9]+(\\s+)[A-Za-z](\\s|$)", "");
            Add("[A-Za-z]+(gata|veien)()[0-9]+[a-z]?(\\s|$)", " ");

            // FIXME: not sure about the following rules
            Add("postboks\\s+[0-9]+(\\s*-\\s*)", " ");
        }
Пример #7
0
 public CapitalCleaner()
 {
     _sub = new LowerCaseNormalizeCleaner();
 }
 public void Init()
 {
     // Setup code goes here...
     cleaner = new LowerCaseNormalizeCleaner();
 }
 public void Cleanup()
 {
     // TearDown code goes here...
     cleaner = null;
 }
Пример #10
0
 public CountryNameCleaner()
 {
     _sub = new LowerCaseNormalizeCleaner();
 }