예제 #1
0
파일: FormInfo.cs 프로젝트: tvi123/rep123
		public FormInfo(Lemmatizer parent, AutomAnnotationInner a, string inputWordForm, bool found) {
			_innerAnnot = a;
			_parent = parent;
			_found = found;
			_inputWordBase = inputWordForm;
			var m = FlexiaModel[a.ItemNo];
			var flexLength = m.FlexiaStr.Length;

			//  It can be so( if CLemmatizer::PredictByDataBase was used) that 
			//  the flexion  is not suffix of m_InputWordBase, but only part of  it.
			//  If so, then we cannot generate paradigm, since the current form cannot be 
			//  divided into two parts: the base and a known flexion.

			if (_found
				|| ((_inputWordBase.Length >= flexLength)
				&& (_inputWordBase.Substring(_inputWordBase.Length - flexLength) == m.FlexiaStr))) {
				_flexiaWasCut = true;

				var rempos = _inputWordBase.Length - m.FlexiaStr.Length;
				if (rempos < _inputWordBase.Length) {
					_inputWordBase = _inputWordBase.Remove(rempos);
				}
			} else {
				_flexiaWasCut = false;
			}
			var lemmPrefix = _parent.Prefixes[_innerAnnot.PrefixNo];
			if (_found
				|| ((_inputWordBase.Substring(0, lemmPrefix.Length) == lemmPrefix)
				&& (_inputWordBase.Substring(lemmPrefix.Length, m.PrefixStr.Length) == m.PrefixStr))) {
				_inputWordBase = _inputWordBase.Remove(0, lemmPrefix.Length + m.PrefixStr.Length);
				_prefixesWereCut = true;
			} else {
				_prefixesWereCut = false;
			}
		}
예제 #2
0
        private void GetAllMorphInterpsRecursive(int nodeNo, string currPath, IList <AutomAnnotationInner> infos)
        {
            if (_nodes[nodeNo].IsFinal)
            {
                var a = new AutomAnnotationInner();
                int i = DecodeFromAlphabet(currPath);
                int itemNo;
                int modelNo;
                int prefixNo;
                DecodeMorphAutomatInfo(i, out modelNo, out itemNo, out prefixNo);
                a.ItemNo   = (short)itemNo;
                a.ModelNo  = (short)modelNo;
                a.PrefixNo = (short)prefixNo;
                infos.Add(a);
            }
            var count        = GetChildrenCount(nodeNo);
            var currPathSize = currPath.Length;
            var buff         = new char[currPathSize + 1];

            currPath.CopyTo(0, buff, 0, currPathSize);
            for (var i = 0; i < count; i++)
            {
                var p = GetChildren(nodeNo, i);
                buff[currPathSize] = _tools.GetChar(p.RelationalChar, Lemmatizer.CodePage);
                GetAllMorphInterpsRecursive(p.ChildNo, new string(buff), infos);
            }
        }
예제 #3
0
        public bool SetParadigmId(int newVal)
        {
            if (_parent == null)
            {
                throw new MorphException("_parent == null");
            }
            var a = new AutomAnnotationInner();

            a.SplitParadigmId(newVal);
            if (a.LemmaInfoNo > _parent.LemmaInfos.Count)
            {
                return(false);
            }
            if (a.PrefixNo > _parent.Prefixes.Count)
            {
                return(false);
            }
            a.ItemNo    = 0;
            a.Weight    = _parent.Statistic.GetHomoWeight(a.ParadigmId, 0);
            a.ModelNo   = _parent.LemmaInfos[a.LemmaInfoNo].LemmaInfo.FlexiaModelNo;
            _innerAnnot = a;

            _prefixesWereCut = true;
            _flexiaWasCut    = true;
            _found           = true;
            _inputWordBase   = SrcNorm;
            _inputWordBase   = _inputWordBase.Remove(_inputWordBase.Length - FlexiaModel.FirstFlex.Length);

            return(true);
        }
예제 #4
0
파일: Lemmatizer.cs 프로젝트: tvi123/rep123
		private AutomAnnotationInner ConvertPredictTupleToAnnot(PredictTuple input) {
			var node = new AutomAnnotationInner();
			node.LemmaInfoNo = input.LemmaInfoNo;
			node.ModelNo = LemmaInfos[node.LemmaInfoNo].LemmaInfo.FlexiaModelNo;
			node.Weight = 0;
			node.PrefixNo = 0;
			node.ItemNo = input.ItemNo;
			return node;
		}
예제 #5
0
        private AutomAnnotationInner ConvertPredictTupleToAnnot(PredictTuple input)
        {
            var node = new AutomAnnotationInner();

            node.LemmaInfoNo = input.LemmaInfoNo;
            node.ModelNo     = LemmaInfos[node.LemmaInfoNo].LemmaInfo.FlexiaModelNo;
            node.Weight      = 0;
            node.PrefixNo    = 0;
            node.ItemNo      = input.ItemNo;
            return(node);
        }
예제 #6
0
        public FormInfo(Lemmatizer parent, AutomAnnotationInner a, string inputWordForm, bool found)
        {
            _innerAnnot    = a;
            _parent        = parent;
            _found         = found;
            _inputWordBase = inputWordForm;
            var m          = FlexiaModel[a.ItemNo];
            var flexLength = m.FlexiaStr.Length;

            _tools = new Tools();
            //  It can be so( if CLemmatizer::PredictByDataBase was used) that
            //  the flexion  is not suffix of m_InputWordBase, but only part of  it.
            //  If so, then we cannot generate paradigm, since the current form cannot be
            //  divided into two parts: the base and a known flexion.

            if (_found ||
                ((_inputWordBase.Length >= flexLength) &&
                 (_inputWordBase.Substring(_inputWordBase.Length - flexLength) == m.FlexiaStr)))
            {
                _flexiaWasCut = true;

                var rempos = _inputWordBase.Length - m.FlexiaStr.Length;
                if (rempos < _inputWordBase.Length)
                {
                    _inputWordBase = _inputWordBase.Remove(rempos);
                }
            }
            else
            {
                _flexiaWasCut = false;
            }
            var lemmPrefix = _parent.Prefixes[_innerAnnot.PrefixNo];

            if (_found ||
                ((_inputWordBase.Substring(0, lemmPrefix.Length) == lemmPrefix) &&
                 (_inputWordBase.Substring(lemmPrefix.Length, m.PrefixStr.Length) == m.PrefixStr)))
            {
                _inputWordBase   = _inputWordBase.Remove(0, lemmPrefix.Length + m.PrefixStr.Length);
                _prefixesWereCut = true;
            }
            else
            {
                _prefixesWereCut = false;
            }
        }
예제 #7
0
		private void GetAllMorphInterpsRecursive(int nodeNo, string currPath, IList<AutomAnnotationInner> infos) {
			if (_nodes[nodeNo].IsFinal) {
				var a = new AutomAnnotationInner();
				int i = DecodeFromAlphabet(currPath);
				int itemNo;
				int modelNo;
				int prefixNo;
				DecodeMorphAutomatInfo(i, out modelNo, out itemNo, out prefixNo);
				a.ItemNo = (short)itemNo;
				a.ModelNo = (short)modelNo;
				a.PrefixNo = (short)prefixNo;
				infos.Add(a);
			}
			var count = GetChildrenCount(nodeNo);
			var currPathSize = currPath.Length;
			var buff = new char[currPathSize + 1];
			currPath.CopyTo(0, buff, 0, currPathSize);
			for (var i = 0; i < count; i++) {
				var p = GetChildren(nodeNo, i);
				buff[currPathSize] = Tools.GetChar(p.RelationalChar);
				GetAllMorphInterpsRecursive(p.ChildNo, new string(buff), infos);
			}
		}
예제 #8
0
파일: FormInfo.cs 프로젝트: tvi123/rep123
		public bool SetParadigmId(int newVal) {
			if (_parent == null) {
				throw new MorphException("_parent == null");
			}
			var a = new AutomAnnotationInner();
			a.SplitParadigmId(newVal);
			if (a.LemmaInfoNo > _parent.LemmaInfos.Count) {
				return false;
			}
			if (a.PrefixNo > _parent.Prefixes.Count) {
				return false;
			}
			a.ItemNo = 0;
			a.Weight = _parent.Statistic.GetHomoWeight(a.ParadigmId, 0);
			a.ModelNo = _parent.LemmaInfos[a.LemmaInfoNo].LemmaInfo.FlexiaModelNo;
			_innerAnnot = a;

			_prefixesWereCut = true;
			_flexiaWasCut = true;
			_found = true;
			_inputWordBase = SrcNorm;
			_inputWordBase=_inputWordBase.Remove(_inputWordBase.Length - FlexiaModel.FirstFlex.Length);

			return true;
		}