unsafe public void Run(List <Word> words #if DEBUG , bool applyMorphoAmbiguityPreProcess #endif ) { var wordMorphology = default(WordMorphology); for (int i = 0, wordsLength = words.Count; i < wordsLength; i++) { var word = words[i]; if (word.posTaggerExtraWordType != PosTaggerExtraWordType.__DEFAULT__) { _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i)); continue; } switch (word.posTaggerInputType) { case PosTaggerInputType.O: // = "O"; // Другой case PosTaggerInputType.AllLat: // - только латиница: нет строчных и точек; case PosTaggerInputType.FstC: // - первая заглавная, не содержит пробелов; { if (word.valueUpper == null) { _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i)); continue; } var mode = GetWordFormMorphologyMode(word, i); wordMorphology = _morphoAnalyzer.GetWordMorphology_NoToUpper(word.valueUpper, mode); } break; case PosTaggerInputType.Num: // – содержит хотя бы одну цифру и не содержит букв; { if (word.posTaggerLastValueUpperInNumeralChain == null) { _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i)); continue; } var mode = GetWordFormMorphologyMode(word, i); wordMorphology = _morphoAnalyzer.GetWordMorphology_4LastValueUpperInNumeralChain( word.posTaggerLastValueUpperInNumeralChain, mode); } break; default: _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i)); continue; } #region post process MorphoAnalyze result if (wordMorphology.HasWordFormMorphologies) { var wfms = default(WordFormMorphology[]); //Если данное слово имеет только одну часть речи, прописанную в морфословаре, то использовать ее вместо определённой с помощью PoS-tagger. if (wordMorphology.IsSinglePartOfSpeech) { CorrectPosTaggerOutputType(word, wordMorphology.PartOfSpeech); wfms = wordMorphology.WordFormMorphologies.ToArray(); _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i, wfms)); } else { #region clause #1 //для данного слова в морфословаре определено несколько частей речи. //ищем среди морфоинформации по слову морфоинформацию по части речи от pos-tagger'а, если она есть - берем её //вот эта хуйня из-за двойной трансляции //{PosTaggerOutputType::AdjectivePronoun => PartOfSpeechEnum::Adjective, PartOfSpeechEnum::Pronoun} // & //{PosTaggerOutputType::AdverbialPronoun => PartOfSpeechEnum::Adverb, PartOfSpeechEnum::Pronoun} var partOfSpeech = default(PartOfSpeechEnum?); switch (word.posTaggerOutputType) { case PosTaggerOutputType.AdjectivePronoun: { wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Adjective, PartOfSpeechEnum.Pronoun); if (wfms != null) { _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i, wfms)); continue; } } break; case PosTaggerOutputType.AdverbialPronoun: { wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Adverb, PartOfSpeechEnum.Pronoun); if (wfms != null) { _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i, wfms)); continue; } } break; default: { partOfSpeech = ToPartOfSpeech(word.posTaggerOutputType); if (partOfSpeech.HasValue) { wfms = TryGetByPosTaggerOutputType(ref wordMorphology, partOfSpeech.Value); if (wfms != null) { _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i, wfms)); continue; } } } break; } #endregion #region clause #2 //При этом для данного слова в морфословаре определено несколько частей речи. //В данном случае в первую очередь м.б. соответствия (слева выход PoS-tagger, справа морфословарь, последовательность пунктов неважна): switch (word.posTaggerOutputType) { /*2.3. Participle = Adjective */ case PosTaggerOutputType.Participle: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Adjective); break; default: #region clause #3 { if (partOfSpeech.HasValue) { switch (partOfSpeech.Value) { /*3.1. Pronoun = Noun*/ case PartOfSpeechEnum.Pronoun: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Noun); break; /*3.2. Noun = Pronoun */ case PartOfSpeechEnum.Noun: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Pronoun); break; /*3.3. Conjunction = Particle */ case PartOfSpeechEnum.Conjunction: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Particle); break; /*3.4. Particle = Conjunction*/ case PartOfSpeechEnum.Particle: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Conjunction); break; /*3.5. Numeral = Noun, Adjective */ case PartOfSpeechEnum.Numeral: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Noun, PartOfSpeechEnum.Adjective); break; /*3.6. Adjective = Verb, Adverb*/ case PartOfSpeechEnum.Adjective: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Verb, PartOfSpeechEnum.Adverb); break; /*3.7. Adverb = Adjective */ case PartOfSpeechEnum.Adverb: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Adjective); break; } } } #endregion break; } /*Если таковых соответствий не нашлось, то берется первая из выдачи морфословаря часть речи.*/ if (wfms == null) { var _partOfSpeech = wordMorphology.WordFormMorphologies[0].PartOfSpeech; word.posTaggerOutputType = ToPosTaggerOutputType(_partOfSpeech); wfms = TryGetByPosTaggerOutputType(ref wordMorphology, _partOfSpeech); } _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i, wfms)); #endregion } } else { _wordMorphoAmbiguities.Add(_wordMorphoAmbiguityFactory.Create(word, i)); } #endregion } #if DEBUG if (applyMorphoAmbiguityPreProcess) { #endif _morphoAmbiguityPreProcessor.Run(_wordMorphoAmbiguities); #if DEBUG } #endif _morphoAmbiguityResolver.Resolve(_wordMorphoAmbiguities); _wordMorphoAmbiguities.Clear(); }
unsafe public void Run(List <word_t> words #if DEBUG , bool applyMorphoAmbiguityPreProcess #endif ) { //-apply morpho-analysis-// var wordMorphology = default(WordMorphology_t); for (int i = 0, wordsLength = words.Count; i < wordsLength; i++) { var word = words[i]; #region [.sikp something & make Morpho-Analyze.] if (word.posTaggerExtraWordType != PosTaggerExtraWordType.__DEFAULT__) { _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i)); continue; } switch (word.posTaggerInputType) { case PosTaggerInputType.O: // = "O"; // Другой case PosTaggerInputType.AllLat: // - только латиница: нет строчных и точек; case PosTaggerInputType.FstC: // - первая заглавная, не содержит пробелов; #region { if (word.valueUpper == null) { _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i)); continue; } #if DEBUG var wordFirstCharIsUpper = (*(_CTM + word.valueOriginal[0]) & CharType.IsUpper) == CharType.IsUpper; Debug.Assert(wordFirstCharIsUpper == word.posTaggerFirstCharIsUpper, "(wordFirstCharIsUpper != word.posTaggerFirstCharIsUpper)"); #endif var mode = GetWordFormMorphologyMode(word, i); wordMorphology = _MorphoAnalyzer.GetWordMorphology_NoToUpper(word.valueUpper, mode); //---!!!---DONT-FREE-USED-IN-FURTHER---// word.valueUpper = null; //free resource } #endregion break; case PosTaggerInputType.Num: // – содержит хотя бы одну цифру и не содержит букв; #region { //---Debug.Assert( word.valueUpper == null, "word.valueUpper != null" );---// if (word.posTaggerLastValueUpperInNumeralChain == null) { _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i)); continue; } var mode = GetWordFormMorphologyMode(word, i); wordMorphology = _MorphoAnalyzer.GetWordMorphology_4LastValueUpperInNumeralChain( word.posTaggerLastValueUpperInNumeralChain, mode); #region [.commented.] /* * fixed ( char* ptr = word.posTaggerLastValueOriginalInNumeralChain ) * { * //-get-first-is-upper-from-valueOriginal-// * //---var wordFirstCharIsUpper = (*(_CTM + *ptr) & CharType.IsUpper) == CharType.IsUpper; * //---var wordFormMorphologyMode = GetWordFormMorphologyMode( word, wordFirstCharIsUpper, i ); * var mode = GetWordFormMorphologyMode( word, i ); * var length = word.posTaggerLastValueOriginalInNumeralChain.Length; * wordMorphology = _MorphoAnalyzer.GetWordMorphology_4LastValueOriginalInNumeralChain( ptr, length, mode ); * } */ #endregion //---!!!---DONT-FREE-USED-IN-FURTHER---// word.posTaggerLastValueOriginalInNumeralChain = null; //free resource } #endregion break; default: #region [.who's skip.] //CompPh – составные (имеющие хотя бы один пробел); //Col – двоеточие. //Com – запятая; //Dush – тире; //OneCP – первая заглавная с точкой; #endregion _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i)); continue; } #endregion #region [.post-process Morpho-Analyze-result.] if (wordMorphology.HasWordFormMorphologies) { var wfms = default(WordFormMorphology_t[]); //Если данное слово имеет только одну часть речи, прописанную в морфословаре, то использовать ее вместо определённой с помощью PoS-tagger. if (wordMorphology.IsSinglePartOfSpeech) { CorrectPosTaggerOutputType(word, wordMorphology.PartOfSpeech); wfms = wordMorphology.WordFormMorphologies.ToArray(); _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i, wfms)); #if DEBUG word.morphologies = wfms; #endif } else { #region [.clause #1.] //для данного слова в морфословаре определено несколько частей речи. //ищем среди морфоинформации по слову морфоинформацию по части речи от pos-tagger'а, если она есть - берем её //вот эта хуйня из-за двойной трансляции //{PosTaggerOutputType::AdjectivePronoun => PartOfSpeechEnum::Adjective, PartOfSpeechEnum::Pronoun} // & //{PosTaggerOutputType::AdverbialPronoun => PartOfSpeechEnum::Adverb, PartOfSpeechEnum::Pronoun} var partOfSpeech = default(PartOfSpeechEnum?); switch (word.posTaggerOutputType) { case PosTaggerOutputType.AdjectivePronoun: #region { wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Adjective, PartOfSpeechEnum.Pronoun); if (wfms != null) { _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i, wfms)); #if DEBUG word.morphologies = wfms; #endif continue; } } #endregion break; case PosTaggerOutputType.AdverbialPronoun: #region { wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Adverb, PartOfSpeechEnum.Pronoun); if (wfms != null) { _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i, wfms)); #if DEBUG word.morphologies = wfms; #endif continue; } } #endregion break; default: #region { partOfSpeech = ToPartOfSpeech(word.posTaggerOutputType); if (partOfSpeech.HasValue) { wfms = TryGetByPosTaggerOutputType(ref wordMorphology, partOfSpeech.Value); if (wfms != null) { _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i, wfms)); #if DEBUG word.morphologies = wfms; #endif continue; } } } #endregion break; } #endregion #region [.clause #2.] //При этом для данного слова в морфословаре определено несколько частей речи. //В данном случае в первую очередь м.б. соответствия (слева выход PoS-tagger, справа морфословарь, последовательность пунктов неважна): switch (word.posTaggerOutputType) { #region [.commented. previous.] /* * /*2.1. AdjectivePronoun = Adjective* / * case PosTaggerOutputType.AdjectivePronoun: * wfms = TryGetByPosTaggerOutputType( ref wordMorphology, PartOfSpeechEnum.Adjective ); #if DEBUG * word.morphologies = wfms; #endif * break; * * /*2.2. AdverbialPronoun = Adverb* / * case PosTaggerOutputType.AdverbialPronoun: * wfms = TryGetByPosTaggerOutputType( ref wordMorphology, PartOfSpeechEnum.Adverb ); #if DEBUG * word.morphologies = wfms; #endif * break; */ #endregion /*2.3. Participle = Adjective */ case PosTaggerOutputType.Participle: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Adjective); #if DEBUG word.morphologies = wfms; #endif break; default: #region [.clause #3.] { if (partOfSpeech.HasValue) { switch (partOfSpeech.Value) { /*3.1. Pronoun = Noun*/ case PartOfSpeechEnum.Pronoun: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Noun); #if DEBUG word.morphologies = wfms; #endif break; /*3.2. Noun = Pronoun */ case PartOfSpeechEnum.Noun: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Pronoun); #if DEBUG word.morphologies = wfms; #endif break; /*3.3. Conjunction = Particle */ case PartOfSpeechEnum.Conjunction: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Particle); #if DEBUG word.morphologies = wfms; #endif break; /*3.4. Particle = Conjunction*/ case PartOfSpeechEnum.Particle: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Conjunction); #if DEBUG word.morphologies = wfms; #endif break; /*3.5. Numeral = Noun, Adjective */ case PartOfSpeechEnum.Numeral: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Noun, PartOfSpeechEnum.Adjective); #if DEBUG word.morphologies = wfms; #endif break; /*3.6. Adjective = Verb, Adverb*/ case PartOfSpeechEnum.Adjective: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Verb, PartOfSpeechEnum.Adverb); #if DEBUG word.morphologies = wfms; #endif break; /*3.7. Adverb = Adjective */ case PartOfSpeechEnum.Adverb: wfms = TryGetByPosTaggerOutputType(ref wordMorphology, PartOfSpeechEnum.Adjective); #if DEBUG word.morphologies = wfms; #endif break; } } } #endregion break; } /*Если таковых соответствий не нашлось, то берется первая из выдачи морфословаря часть речи.*/ if (wfms == null) { var _partOfSpeech = wordMorphology.WordFormMorphologies[0].PartOfSpeech; word.posTaggerOutputType = ToPosTaggerOutputType(_partOfSpeech); wfms = TryGetByPosTaggerOutputType(ref wordMorphology, _partOfSpeech); #if DEBUG word.morphologies = wfms; #endif } _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i, wfms)); #endregion } } else { _WordMorphoAmbiguities.Add(_WordMorphoAmbiguityFactory.Create(word, i)); } #endregion } //-pre-process morpho-ambiguity-// #if DEBUG if (applyMorphoAmbiguityPreProcess) { #endif _MorphoAmbiguityPreProcessor.Run(_WordMorphoAmbiguities); #if DEBUG } #endif //-resolve morpho-ambiguity-// _MorphoAmbiguityResolver.Resolve(_WordMorphoAmbiguities); //-clear-// _WordMorphoAmbiguities.Clear(); }