public FormInfo(Lemmatizer parent, AutomAnnotationInner a, string inputWordForm, bool found) { _innerAnnot = a; _parent = parent; _found = found; _inputWordBase = inputWordForm; var m = FlexiaModel[a.ItemNo]; var flexLength = m.FlexiaStr.Length; // It can be so( if CLemmatizer::PredictByDataBase was used) that // the flexion is not suffix of m_InputWordBase, but only part of it. // If so, then we cannot generate paradigm, since the current form cannot be // divided into two parts: the base and a known flexion. if (_found || ((_inputWordBase.Length >= flexLength) && (_inputWordBase.Substring(_inputWordBase.Length - flexLength) == m.FlexiaStr))) { _flexiaWasCut = true; var rempos = _inputWordBase.Length - m.FlexiaStr.Length; if (rempos < _inputWordBase.Length) { _inputWordBase = _inputWordBase.Remove(rempos); } } else { _flexiaWasCut = false; } var lemmPrefix = _parent.Prefixes[_innerAnnot.PrefixNo]; if (_found || ((_inputWordBase.Substring(0, lemmPrefix.Length) == lemmPrefix) && (_inputWordBase.Substring(lemmPrefix.Length, m.PrefixStr.Length) == m.PrefixStr))) { _inputWordBase = _inputWordBase.Remove(0, lemmPrefix.Length + m.PrefixStr.Length); _prefixesWereCut = true; } else { _prefixesWereCut = false; } }
private void GetAllMorphInterpsRecursive(int nodeNo, string currPath, IList <AutomAnnotationInner> infos) { if (_nodes[nodeNo].IsFinal) { var a = new AutomAnnotationInner(); int i = DecodeFromAlphabet(currPath); int itemNo; int modelNo; int prefixNo; DecodeMorphAutomatInfo(i, out modelNo, out itemNo, out prefixNo); a.ItemNo = (short)itemNo; a.ModelNo = (short)modelNo; a.PrefixNo = (short)prefixNo; infos.Add(a); } var count = GetChildrenCount(nodeNo); var currPathSize = currPath.Length; var buff = new char[currPathSize + 1]; currPath.CopyTo(0, buff, 0, currPathSize); for (var i = 0; i < count; i++) { var p = GetChildren(nodeNo, i); buff[currPathSize] = _tools.GetChar(p.RelationalChar, Lemmatizer.CodePage); GetAllMorphInterpsRecursive(p.ChildNo, new string(buff), infos); } }
public bool SetParadigmId(int newVal) { if (_parent == null) { throw new MorphException("_parent == null"); } var a = new AutomAnnotationInner(); a.SplitParadigmId(newVal); if (a.LemmaInfoNo > _parent.LemmaInfos.Count) { return(false); } if (a.PrefixNo > _parent.Prefixes.Count) { return(false); } a.ItemNo = 0; a.Weight = _parent.Statistic.GetHomoWeight(a.ParadigmId, 0); a.ModelNo = _parent.LemmaInfos[a.LemmaInfoNo].LemmaInfo.FlexiaModelNo; _innerAnnot = a; _prefixesWereCut = true; _flexiaWasCut = true; _found = true; _inputWordBase = SrcNorm; _inputWordBase = _inputWordBase.Remove(_inputWordBase.Length - FlexiaModel.FirstFlex.Length); return(true); }
private AutomAnnotationInner ConvertPredictTupleToAnnot(PredictTuple input) { var node = new AutomAnnotationInner(); node.LemmaInfoNo = input.LemmaInfoNo; node.ModelNo = LemmaInfos[node.LemmaInfoNo].LemmaInfo.FlexiaModelNo; node.Weight = 0; node.PrefixNo = 0; node.ItemNo = input.ItemNo; return node; }
private AutomAnnotationInner ConvertPredictTupleToAnnot(PredictTuple input) { var node = new AutomAnnotationInner(); node.LemmaInfoNo = input.LemmaInfoNo; node.ModelNo = LemmaInfos[node.LemmaInfoNo].LemmaInfo.FlexiaModelNo; node.Weight = 0; node.PrefixNo = 0; node.ItemNo = input.ItemNo; return(node); }
public FormInfo(Lemmatizer parent, AutomAnnotationInner a, string inputWordForm, bool found) { _innerAnnot = a; _parent = parent; _found = found; _inputWordBase = inputWordForm; var m = FlexiaModel[a.ItemNo]; var flexLength = m.FlexiaStr.Length; _tools = new Tools(); // It can be so( if CLemmatizer::PredictByDataBase was used) that // the flexion is not suffix of m_InputWordBase, but only part of it. // If so, then we cannot generate paradigm, since the current form cannot be // divided into two parts: the base and a known flexion. if (_found || ((_inputWordBase.Length >= flexLength) && (_inputWordBase.Substring(_inputWordBase.Length - flexLength) == m.FlexiaStr))) { _flexiaWasCut = true; var rempos = _inputWordBase.Length - m.FlexiaStr.Length; if (rempos < _inputWordBase.Length) { _inputWordBase = _inputWordBase.Remove(rempos); } } else { _flexiaWasCut = false; } var lemmPrefix = _parent.Prefixes[_innerAnnot.PrefixNo]; if (_found || ((_inputWordBase.Substring(0, lemmPrefix.Length) == lemmPrefix) && (_inputWordBase.Substring(lemmPrefix.Length, m.PrefixStr.Length) == m.PrefixStr))) { _inputWordBase = _inputWordBase.Remove(0, lemmPrefix.Length + m.PrefixStr.Length); _prefixesWereCut = true; } else { _prefixesWereCut = false; } }
private void GetAllMorphInterpsRecursive(int nodeNo, string currPath, IList<AutomAnnotationInner> infos) { if (_nodes[nodeNo].IsFinal) { var a = new AutomAnnotationInner(); int i = DecodeFromAlphabet(currPath); int itemNo; int modelNo; int prefixNo; DecodeMorphAutomatInfo(i, out modelNo, out itemNo, out prefixNo); a.ItemNo = (short)itemNo; a.ModelNo = (short)modelNo; a.PrefixNo = (short)prefixNo; infos.Add(a); } var count = GetChildrenCount(nodeNo); var currPathSize = currPath.Length; var buff = new char[currPathSize + 1]; currPath.CopyTo(0, buff, 0, currPathSize); for (var i = 0; i < count; i++) { var p = GetChildren(nodeNo, i); buff[currPathSize] = Tools.GetChar(p.RelationalChar); GetAllMorphInterpsRecursive(p.ChildNo, new string(buff), infos); } }
public bool SetParadigmId(int newVal) { if (_parent == null) { throw new MorphException("_parent == null"); } var a = new AutomAnnotationInner(); a.SplitParadigmId(newVal); if (a.LemmaInfoNo > _parent.LemmaInfos.Count) { return false; } if (a.PrefixNo > _parent.Prefixes.Count) { return false; } a.ItemNo = 0; a.Weight = _parent.Statistic.GetHomoWeight(a.ParadigmId, 0); a.ModelNo = _parent.LemmaInfos[a.LemmaInfoNo].LemmaInfo.FlexiaModelNo; _innerAnnot = a; _prefixesWereCut = true; _flexiaWasCut = true; _found = true; _inputWordBase = SrcNorm; _inputWordBase=_inputWordBase.Remove(_inputWordBase.Length - FlexiaModel.FirstFlex.Length); return true; }