public MorphAutomat(Lemmatizer lemmatizer,InternalMorphLanguage language, char annotChar) : base(lemmatizer,language, annotChar) { }
private static int InitAlphabet(InternalMorphLanguage language, int[] pCode2Alphabet, int[] pAlphabet2Code, char annotChar) { if (char.IsUpper(annotChar)) { throw new MorphException("annotChar is not upper"); } var AdditionalEnglishChars = "'1234567890"; var AdditionalGermanChars = ""; var AlphabetSize = 0; for (var i = 0; i < Constants.AlphabetSize; i++) { var ch = Convert.ToChar(i); if (Lang.is_upper_alpha((byte)i, language) || (ch == '-') || (ch == annotChar) || ((language == InternalMorphLanguage.morphEnglish) && (AdditionalEnglishChars.IndexOf(ch) >= 0) ) || ((language == InternalMorphLanguage.morphGerman) && (AdditionalGermanChars.IndexOf(ch) >= 0) ) || ((language == InternalMorphLanguage.morphURL) && Lang.is_alpha((byte)i, language) )) { pCode2Alphabet[AlphabetSize] = i; pAlphabet2Code[i] = AlphabetSize; AlphabetSize++; } else { pAlphabet2Code[i] = -1; } } if (AlphabetSize > Constants.MaxAlphabetSize) { throw new MorphException("Error! The ABC is too large"); } return AlphabetSize; }
public byte TransferReverseVowelNoToCharNo(string form, byte accentCharNo, InternalMorphLanguage language, int codePage) { if (accentCharNo == Constants.UnknownAccent) { return(Constants.UnknownAccent); } if (accentCharNo >= form.Length) { throw new MorphException("AccentCharNo >= form.Length"); } var countOfVowels = -1; var i = form.Length - 1; if (i >= Constants.UnknownAccent) { throw new MorphException("i >= UnknownAccent"); } for (; i >= 0; i--) { if (Lang.is_lower_vowel(GetByte(form[i], codePage), language) || Lang.is_upper_vowel(GetByte(form[i], codePage), language) ) { countOfVowels++; } if (countOfVowels == accentCharNo) { return((byte)i); } } return(Constants.UnknownAccent); }
internal static bool is_upper_consonant(byte x, InternalMorphLanguage Langua) { if (!is_upper_alpha(x, Langua)) { return(false); } return(!is_upper_vowel(x, Langua)); }
public Lemmatizer(InternalMorphLanguage language) : base(language) { _loaded = false; _useStatistic = false; _maximalPrediction = false; _allowRussianJo = false; _predict = new PredictBase(this,language); InitAutomat(new MorphAutomat(this,language, Constants.MorphAnnotChar)); }
public Lemmatizer(InternalMorphLanguage language) : base(language) { _loaded = false; _useStatistic = false; _maximalPrediction = false; _allowRussianJo = false; _predict = new PredictBase(this, language); InitAutomat(new MorphAutomat(this, language, Constants.MorphAnnotChar)); }
public ABCEncoder(Lemmatizer lemmatizer,InternalMorphLanguage language, char annotChar) { _lemmatizer = lemmatizer; _language = language; _annotChar=annotChar; _alphabetSize = InitAlphabet(language, _code2Alphabet, _alphabet2Code, _annotChar); _alphabetSizeWithoutAnnotator = InitAlphabet(language,_code2AlphabetWithoutAnnotator,_alphabet2CodeWithoutAnnotator,(char)257/* non-exeting symbol */); if (_alphabetSizeWithoutAnnotator + 1 != _alphabetSize) { throw new MorphException("_alphabetSizeWithoutAnnotator + 1 != _alphabetSize"); } }
internal static bool is_lower_vowel(byte x, InternalMorphLanguage Langua) { switch (Langua) { case InternalMorphLanguage.morphRussian: return(is_russian_lower_vowel(x)); case InternalMorphLanguage.morphEnglish: return(is_english_lower_vowel(x)); case InternalMorphLanguage.morphGerman: return(is_german_lower_vowel(x)); } return(false); }
public ABCEncoder(Lemmatizer lemmatizer, InternalMorphLanguage language, char annotChar) { _lemmatizer = lemmatizer; _language = language; _annotChar = annotChar; _alphabetSize = InitAlphabet(language, _code2Alphabet, _alphabet2Code, _annotChar); _alphabetSizeWithoutAnnotator = InitAlphabet(language, _code2AlphabetWithoutAnnotator, _alphabet2CodeWithoutAnnotator, (char)257 /* non-exeting symbol */); if (_alphabetSizeWithoutAnnotator + 1 != _alphabetSize) { throw new MorphException("_alphabetSizeWithoutAnnotator + 1 != _alphabetSize"); } }
internal static bool is_upper_alpha(byte x, InternalMorphLanguage Langua) { switch (Langua) { case InternalMorphLanguage.morphRussian: return(is_russian_upper(x)); case InternalMorphLanguage.morphEnglish: return(is_english_upper(x)); case InternalMorphLanguage.morphGerman: return(is_german_upper(x)); case InternalMorphLanguage.morphGeneric: return(is_generic_upper(x)); case InternalMorphLanguage.morphURL: return(false); } return(false); }
internal static bool is_alpha(byte x, InternalMorphLanguage Langua) { switch (Langua) { case InternalMorphLanguage.morphRussian: return(is_russian_alpha(x)); case InternalMorphLanguage.morphEnglish: return(is_english_alpha(x)); case InternalMorphLanguage.morphGerman: return(is_german_alpha(x)); case InternalMorphLanguage.morphGeneric: return(is_generic_alpha(x)); case InternalMorphLanguage.morphURL: return(is_URL_alpha(x)); } throw new MorphException("unknown char x"); }
public static string GetStringByLanguage(InternalMorphLanguage langua) { switch (langua) { case InternalMorphLanguage.morphRussian: return("Russian"); case InternalMorphLanguage.morphEnglish: return("English"); case InternalMorphLanguage.morphGerman: return("German"); case InternalMorphLanguage.morphGeneric: return("Generic"); case InternalMorphLanguage.morphURL: return("URL_ABC"); default: return("unk"); } }
private static int InitAlphabet(InternalMorphLanguage language, int[] pCode2Alphabet, int[] pAlphabet2Code, char annotChar) { if (char.IsUpper(annotChar)) { throw new MorphException("annotChar is not upper"); } var AdditionalEnglishChars = "'1234567890"; var AdditionalGermanChars = ""; var AlphabetSize = 0; for (var i = 0; i < Constants.AlphabetSize; i++) { var ch = Convert.ToChar(i); if (Lang.is_upper_alpha((byte)i, language) || (ch == '-') || (ch == annotChar) || ((language == InternalMorphLanguage.morphEnglish) && (AdditionalEnglishChars.IndexOf(ch) >= 0) ) || ((language == InternalMorphLanguage.morphGerman) && (AdditionalGermanChars.IndexOf(ch) >= 0) ) || ((language == InternalMorphLanguage.morphURL) && Lang.is_alpha((byte)i, language) )) { pCode2Alphabet[AlphabetSize] = i; pAlphabet2Code[i] = AlphabetSize; AlphabetSize++; } else { pAlphabet2Code[i] = -1; } } if (AlphabetSize > Constants.MaxAlphabetSize) { throw new MorphException("Error! The ABC is too large"); } return(AlphabetSize); }
internal static bool is_upper_alpha(byte x, InternalMorphLanguage Langua) { switch (Langua) { case InternalMorphLanguage.morphRussian: return is_russian_upper(x); case InternalMorphLanguage.morphEnglish: return is_english_upper(x); case InternalMorphLanguage.morphGerman: return is_german_upper(x); case InternalMorphLanguage.morphGeneric: return is_generic_upper(x); case InternalMorphLanguage.morphURL: return false; } return false; }
protected MorphDict(InternalMorphLanguage language) { _formAutomat = null; _comparer = new InternalComparer(_bases); }
public PredictBase(Lemmatizer lemmatizer, InternalMorphLanguage lang) { _suffixAutomat = new MorphAutomat(lemmatizer, lang, Constants.MorphAnnotChar); }
public MorphAutomat(Lemmatizer lemmatizer, InternalMorphLanguage language, char annotChar) : base(lemmatizer, language, annotChar) { _tools = new Tools(); }
public PredictBase(Lemmatizer lemmatizer,InternalMorphLanguage lang) { _suffixAutomat = new MorphAutomat(lemmatizer,lang, Constants.MorphAnnotChar); }
internal static bool is_alpha(byte x, InternalMorphLanguage Langua) { switch (Langua) { case InternalMorphLanguage.morphRussian: return is_russian_alpha(x); case InternalMorphLanguage.morphEnglish: return is_english_alpha(x); case InternalMorphLanguage.morphGerman: return is_german_alpha(x); case InternalMorphLanguage.morphGeneric: return is_generic_alpha(x); case InternalMorphLanguage.morphURL: return is_URL_alpha(x); } throw new MorphException("unknown char x"); }
internal static bool is_lower_vowel(byte x, InternalMorphLanguage Langua) { switch (Langua) { case InternalMorphLanguage.morphRussian: return is_russian_lower_vowel(x); case InternalMorphLanguage.morphEnglish: return is_english_lower_vowel(x); case InternalMorphLanguage.morphGerman: return is_german_lower_vowel(x); } return false; }
internal static bool is_upper_consonant(byte x, InternalMorphLanguage Langua) { if (!is_upper_alpha(x, Langua)) return false; return !is_upper_vowel(x, Langua); }