예제 #1
2
		public MorphAutomat(Lemmatizer lemmatizer,InternalMorphLanguage language, char annotChar)
			: base(lemmatizer,language, annotChar) {
		}
예제 #2
0
파일: ABCEncoder.cs 프로젝트: tvi123/rep123
		private static int InitAlphabet(InternalMorphLanguage language, int[] pCode2Alphabet, int[] pAlphabet2Code, char annotChar) {
			if (char.IsUpper(annotChar)) {
				throw new MorphException("annotChar is not upper");
			}
			var AdditionalEnglishChars = "'1234567890";
			var AdditionalGermanChars = "";
			var AlphabetSize = 0;
			for (var i = 0; i < Constants.AlphabetSize; i++) {
				var ch = Convert.ToChar(i);
				if (Lang.is_upper_alpha((byte)i, language)
				|| (ch == '-')
				|| (ch == annotChar)
				|| ((language == InternalMorphLanguage.morphEnglish)
						&& (AdditionalEnglishChars.IndexOf(ch) >= 0)
					)
				|| ((language == InternalMorphLanguage.morphGerman)
						&& (AdditionalGermanChars.IndexOf(ch) >= 0)
					)
				|| ((language == InternalMorphLanguage.morphURL)
					  && Lang.is_alpha((byte)i, language)
					 )) {


					pCode2Alphabet[AlphabetSize] = i;
					pAlphabet2Code[i] = AlphabetSize;
					AlphabetSize++;
				} else {
					pAlphabet2Code[i] = -1;
				}
			}
			if (AlphabetSize > Constants.MaxAlphabetSize) {
				throw new MorphException("Error! The  ABC is too large");
			}
			return AlphabetSize;
		}
예제 #3
0
        public byte TransferReverseVowelNoToCharNo(string form, byte accentCharNo, InternalMorphLanguage language, int codePage)
        {
            if (accentCharNo == Constants.UnknownAccent)
            {
                return(Constants.UnknownAccent);
            }
            if (accentCharNo >= form.Length)
            {
                throw new MorphException("AccentCharNo >= form.Length");
            }
            var countOfVowels = -1;
            var i             = form.Length - 1;

            if (i >= Constants.UnknownAccent)
            {
                throw new MorphException("i >= UnknownAccent");
            }
            for (; i >= 0; i--)
            {
                if (Lang.is_lower_vowel(GetByte(form[i], codePage), language) ||
                    Lang.is_upper_vowel(GetByte(form[i], codePage), language)
                    )
                {
                    countOfVowels++;
                }
                if (countOfVowels == accentCharNo)
                {
                    return((byte)i);
                }
            }
            return(Constants.UnknownAccent);
        }
예제 #4
0
 internal static bool is_upper_consonant(byte x, InternalMorphLanguage Langua)
 {
     if (!is_upper_alpha(x, Langua))
     {
         return(false);
     }
     return(!is_upper_vowel(x, Langua));
 }
예제 #5
0
파일: Lemmatizer.cs 프로젝트: tvi123/rep123
		public Lemmatizer(InternalMorphLanguage language)
			: base(language) {
			_loaded = false;
			_useStatistic = false;
			_maximalPrediction = false;
			_allowRussianJo = false;
			_predict = new PredictBase(this,language);
			InitAutomat(new MorphAutomat(this,language, Constants.MorphAnnotChar));
		}
예제 #6
0
 public Lemmatizer(InternalMorphLanguage language)
     : base(language)
 {
     _loaded            = false;
     _useStatistic      = false;
     _maximalPrediction = false;
     _allowRussianJo    = false;
     _predict           = new PredictBase(this, language);
     InitAutomat(new MorphAutomat(this, language, Constants.MorphAnnotChar));
 }
예제 #7
0
파일: ABCEncoder.cs 프로젝트: tvi123/rep123
		public ABCEncoder(Lemmatizer lemmatizer,InternalMorphLanguage language, char annotChar) {
            _lemmatizer = lemmatizer;
            _language = language;
			_annotChar=annotChar;
			_alphabetSize = InitAlphabet(language, _code2Alphabet, _alphabet2Code, _annotChar);
			_alphabetSizeWithoutAnnotator = InitAlphabet(language,_code2AlphabetWithoutAnnotator,_alphabet2CodeWithoutAnnotator,(char)257/* non-exeting symbol */);
			if (_alphabetSizeWithoutAnnotator + 1 != _alphabetSize) {
				throw new MorphException("_alphabetSizeWithoutAnnotator + 1 != _alphabetSize");
			}
		}
예제 #8
0
        internal static bool is_lower_vowel(byte x, InternalMorphLanguage Langua)
        {
            switch (Langua)
            {
            case InternalMorphLanguage.morphRussian: return(is_russian_lower_vowel(x));

            case InternalMorphLanguage.morphEnglish: return(is_english_lower_vowel(x));

            case InternalMorphLanguage.morphGerman: return(is_german_lower_vowel(x));
            }
            return(false);
        }
예제 #9
0
 public ABCEncoder(Lemmatizer lemmatizer, InternalMorphLanguage language, char annotChar)
 {
     _lemmatizer   = lemmatizer;
     _language     = language;
     _annotChar    = annotChar;
     _alphabetSize = InitAlphabet(language, _code2Alphabet, _alphabet2Code, _annotChar);
     _alphabetSizeWithoutAnnotator = InitAlphabet(language, _code2AlphabetWithoutAnnotator, _alphabet2CodeWithoutAnnotator, (char)257 /* non-exeting symbol */);
     if (_alphabetSizeWithoutAnnotator + 1 != _alphabetSize)
     {
         throw new MorphException("_alphabetSizeWithoutAnnotator + 1 != _alphabetSize");
     }
 }
예제 #10
0
        internal static bool is_upper_alpha(byte x, InternalMorphLanguage Langua)
        {
            switch (Langua)
            {
            case InternalMorphLanguage.morphRussian: return(is_russian_upper(x));

            case InternalMorphLanguage.morphEnglish: return(is_english_upper(x));

            case InternalMorphLanguage.morphGerman: return(is_german_upper(x));

            case InternalMorphLanguage.morphGeneric: return(is_generic_upper(x));

            case InternalMorphLanguage.morphURL: return(false);
            }
            return(false);
        }
예제 #11
0
        internal static bool is_alpha(byte x, InternalMorphLanguage Langua)
        {
            switch (Langua)
            {
            case InternalMorphLanguage.morphRussian: return(is_russian_alpha(x));

            case InternalMorphLanguage.morphEnglish: return(is_english_alpha(x));

            case InternalMorphLanguage.morphGerman: return(is_german_alpha(x));

            case InternalMorphLanguage.morphGeneric: return(is_generic_alpha(x));

            case InternalMorphLanguage.morphURL: return(is_URL_alpha(x));
            }
            throw new MorphException("unknown char x");
        }
예제 #12
0
        public static string GetStringByLanguage(InternalMorphLanguage langua)
        {
            switch (langua)
            {
            case InternalMorphLanguage.morphRussian: return("Russian");

            case InternalMorphLanguage.morphEnglish: return("English");

            case InternalMorphLanguage.morphGerman: return("German");

            case InternalMorphLanguage.morphGeneric: return("Generic");

            case InternalMorphLanguage.morphURL: return("URL_ABC");

            default: return("unk");
            }
        }
예제 #13
0
        private static int InitAlphabet(InternalMorphLanguage language, int[] pCode2Alphabet, int[] pAlphabet2Code, char annotChar)
        {
            if (char.IsUpper(annotChar))
            {
                throw new MorphException("annotChar is not upper");
            }
            var AdditionalEnglishChars = "'1234567890";
            var AdditionalGermanChars  = "";
            var AlphabetSize           = 0;

            for (var i = 0; i < Constants.AlphabetSize; i++)
            {
                var ch = Convert.ToChar(i);
                if (Lang.is_upper_alpha((byte)i, language) ||
                    (ch == '-') ||
                    (ch == annotChar) ||
                    ((language == InternalMorphLanguage.morphEnglish) &&
                     (AdditionalEnglishChars.IndexOf(ch) >= 0)
                    ) ||
                    ((language == InternalMorphLanguage.morphGerman) &&
                     (AdditionalGermanChars.IndexOf(ch) >= 0)
                    ) ||
                    ((language == InternalMorphLanguage.morphURL) &&
                     Lang.is_alpha((byte)i, language)
                    ))
                {
                    pCode2Alphabet[AlphabetSize] = i;
                    pAlphabet2Code[i]            = AlphabetSize;
                    AlphabetSize++;
                }
                else
                {
                    pAlphabet2Code[i] = -1;
                }
            }
            if (AlphabetSize > Constants.MaxAlphabetSize)
            {
                throw new MorphException("Error! The  ABC is too large");
            }
            return(AlphabetSize);
        }
예제 #14
0
파일: Lang.cs 프로젝트: tvi123/rep123
		internal static bool is_upper_alpha(byte x, InternalMorphLanguage Langua) {
			switch (Langua) {
				case InternalMorphLanguage.morphRussian: return is_russian_upper(x);
				case InternalMorphLanguage.morphEnglish: return is_english_upper(x);
				case InternalMorphLanguage.morphGerman: return is_german_upper(x);
				case InternalMorphLanguage.morphGeneric: return is_generic_upper(x);
				case InternalMorphLanguage.morphURL: return false;
			}
			return false;
		}
예제 #15
0
 protected MorphDict(InternalMorphLanguage language)
 {
     _formAutomat = null;
     _comparer    = new InternalComparer(_bases);
 }
예제 #16
0
 public PredictBase(Lemmatizer lemmatizer, InternalMorphLanguage lang)
 {
     _suffixAutomat = new MorphAutomat(lemmatizer, lang, Constants.MorphAnnotChar);
 }
예제 #17
0
 public MorphAutomat(Lemmatizer lemmatizer, InternalMorphLanguage language, char annotChar)
     : base(lemmatizer, language, annotChar)
 {
     _tools = new Tools();
 }
예제 #18
0
		public PredictBase(Lemmatizer lemmatizer,InternalMorphLanguage lang) {
			_suffixAutomat = new MorphAutomat(lemmatizer,lang, Constants.MorphAnnotChar);
		}
예제 #19
0
파일: Lang.cs 프로젝트: tvi123/rep123
		internal static bool is_alpha(byte x, InternalMorphLanguage Langua) {
			switch (Langua) {
				case InternalMorphLanguage.morphRussian: return is_russian_alpha(x);
				case InternalMorphLanguage.morphEnglish: return is_english_alpha(x);
				case InternalMorphLanguage.morphGerman: return is_german_alpha(x);
				case InternalMorphLanguage.morphGeneric: return is_generic_alpha(x);
				case InternalMorphLanguage.morphURL: return is_URL_alpha(x);
			}
			throw new MorphException("unknown char x");
		}
예제 #20
0
파일: Lang.cs 프로젝트: tvi123/rep123
		internal static bool is_lower_vowel(byte x, InternalMorphLanguage Langua) {
			switch (Langua) {
				case InternalMorphLanguage.morphRussian: return is_russian_lower_vowel(x);
				case InternalMorphLanguage.morphEnglish: return is_english_lower_vowel(x);
				case InternalMorphLanguage.morphGerman: return is_german_lower_vowel(x);
			}
			return false;
		}
예제 #21
0
파일: Lang.cs 프로젝트: tvi123/rep123
		internal static bool is_upper_consonant(byte x, InternalMorphLanguage Langua) {
			if (!is_upper_alpha(x, Langua)) return false;
			return !is_upper_vowel(x, Langua);
		}