Exemplo n.º 1
2
		public MorphAutomat(Lemmatizer lemmatizer,InternalMorphLanguage language, char annotChar)
			: base(lemmatizer,language, annotChar) {
		}
Exemplo n.º 2
0
		public FormInfo(Lemmatizer parent, AutomAnnotationInner a, string inputWordForm, bool found) {
			_innerAnnot = a;
			_parent = parent;
			_found = found;
			_inputWordBase = inputWordForm;
			var m = FlexiaModel[a.ItemNo];
			var flexLength = m.FlexiaStr.Length;

			//  It can be so( if CLemmatizer::PredictByDataBase was used) that 
			//  the flexion  is not suffix of m_InputWordBase, but only part of  it.
			//  If so, then we cannot generate paradigm, since the current form cannot be 
			//  divided into two parts: the base and a known flexion.

			if (_found
				|| ((_inputWordBase.Length >= flexLength)
				&& (_inputWordBase.Substring(_inputWordBase.Length - flexLength) == m.FlexiaStr))) {
				_flexiaWasCut = true;

				var rempos = _inputWordBase.Length - m.FlexiaStr.Length;
				if (rempos < _inputWordBase.Length) {
					_inputWordBase = _inputWordBase.Remove(rempos);
				}
			} else {
				_flexiaWasCut = false;
			}
			var lemmPrefix = _parent.Prefixes[_innerAnnot.PrefixNo];
			if (_found
				|| ((_inputWordBase.Substring(0, lemmPrefix.Length) == lemmPrefix)
				&& (_inputWordBase.Substring(lemmPrefix.Length, m.PrefixStr.Length) == m.PrefixStr))) {
				_inputWordBase = _inputWordBase.Remove(0, lemmPrefix.Length + m.PrefixStr.Length);
				_prefixesWereCut = true;
			} else {
				_prefixesWereCut = false;
			}
		}
Exemplo n.º 3
0
 public void Load(Lemmatizer lemmatizer, string prefix, FileManager manager)
 {
     using (var file = manager.GetFile(lemmatizer.Registry, prefix + Constants.HomoweightBinPath, lemmatizer.CodePage)) {
         Tools.LoadList(file, _homoWeights);
     }
     using (var file = manager.GetFile(lemmatizer.Registry, prefix + Constants.WordweightBinPath, lemmatizer.CodePage)) {
         Tools.LoadList(file, _wordWeights);
     }
 }
Exemplo n.º 4
0
		public ABCEncoder(Lemmatizer lemmatizer,InternalMorphLanguage language, char annotChar) {
            _lemmatizer = lemmatizer;
            _language = language;
			_annotChar=annotChar;
			_alphabetSize = InitAlphabet(language, _code2Alphabet, _alphabet2Code, _annotChar);
			_alphabetSizeWithoutAnnotator = InitAlphabet(language,_code2AlphabetWithoutAnnotator,_alphabet2CodeWithoutAnnotator,(char)257/* non-exeting symbol */);
			if (_alphabetSizeWithoutAnnotator + 1 != _alphabetSize) {
				throw new MorphException("_alphabetSizeWithoutAnnotator + 1 != _alphabetSize");
			}
		}
Exemplo n.º 5
0
 public ABCEncoder(Lemmatizer lemmatizer, InternalMorphLanguage language, char annotChar)
 {
     _lemmatizer   = lemmatizer;
     _language     = language;
     _annotChar    = annotChar;
     _alphabetSize = InitAlphabet(language, _code2Alphabet, _alphabet2Code, _annotChar);
     _alphabetSizeWithoutAnnotator = InitAlphabet(language, _code2AlphabetWithoutAnnotator, _alphabet2CodeWithoutAnnotator, (char)257 /* non-exeting symbol */);
     if (_alphabetSizeWithoutAnnotator + 1 != _alphabetSize)
     {
         throw new MorphException("_alphabetSizeWithoutAnnotator + 1 != _alphabetSize");
     }
 }
Exemplo n.º 6
0
        public FormInfo(Lemmatizer parent, AutomAnnotationInner a, string inputWordForm, bool found)
        {
            _innerAnnot    = a;
            _parent        = parent;
            _found         = found;
            _inputWordBase = inputWordForm;
            var m          = FlexiaModel[a.ItemNo];
            var flexLength = m.FlexiaStr.Length;

            _tools = new Tools();
            //  It can be so( if CLemmatizer::PredictByDataBase was used) that
            //  the flexion  is not suffix of m_InputWordBase, but only part of  it.
            //  If so, then we cannot generate paradigm, since the current form cannot be
            //  divided into two parts: the base and a known flexion.

            if (_found ||
                ((_inputWordBase.Length >= flexLength) &&
                 (_inputWordBase.Substring(_inputWordBase.Length - flexLength) == m.FlexiaStr)))
            {
                _flexiaWasCut = true;

                var rempos = _inputWordBase.Length - m.FlexiaStr.Length;
                if (rempos < _inputWordBase.Length)
                {
                    _inputWordBase = _inputWordBase.Remove(rempos);
                }
            }
            else
            {
                _flexiaWasCut = false;
            }
            var lemmPrefix = _parent.Prefixes[_innerAnnot.PrefixNo];

            if (_found ||
                ((_inputWordBase.Substring(0, lemmPrefix.Length) == lemmPrefix) &&
                 (_inputWordBase.Substring(lemmPrefix.Length, m.PrefixStr.Length) == m.PrefixStr)))
            {
                _inputWordBase   = _inputWordBase.Remove(0, lemmPrefix.Length + m.PrefixStr.Length);
                _prefixesWereCut = true;
            }
            else
            {
                _prefixesWereCut = false;
            }
        }
Exemplo n.º 7
0
		public void AttachLemmatizer(Lemmatizer parent) {
			_parent = parent;
		}
Exemplo n.º 8
0
 public PredictBase(Lemmatizer lemmatizer, InternalMorphLanguage lang)
 {
     _suffixAutomat = new MorphAutomat(lemmatizer, lang, Constants.MorphAnnotChar);
 }
Exemplo n.º 9
0
 public MorphAutomat(Lemmatizer lemmatizer, InternalMorphLanguage language, char annotChar)
     : base(lemmatizer, language, annotChar)
 {
     _tools = new Tools();
 }
Exemplo n.º 10
0
		public PredictBase(Lemmatizer lemmatizer,InternalMorphLanguage lang) {
			_suffixAutomat = new MorphAutomat(lemmatizer,lang, Constants.MorphAnnotChar);
		}
Exemplo n.º 11
0
 public void AttachLemmatizer(Lemmatizer parent)
 {
     _parent = parent;
 }