Пример #1
0
        public WordDecomposition DecomposeWord(string word, int maxMorphDistance)
        {
            using (Trace.Entering())
            {
                List <WordConstruct> wordConstructs = FindPossibleWordConstructions(word, maxMorphDistance, new List <Morpheme>())
                                                      // Note: as an input parameter there is the list which is filled during the iteration.
                                                      //       Therefore it must be iterated in once - so it must be converted to the list.
                                                      .ToList();

                List <Morpheme> nonLexemes = FindNonLexemes(word).ToList();

                List <WordConstruct> wordCompositions = new List <WordConstruct>();
                if (wordConstructs.Any())
                {
                    wordCompositions.AddRange(wordConstructs);
                }
                if (nonLexemes.Any())
                {
                    var wordConstruct = new WordConstruct(nonLexemes);
                    wordCompositions.Add(wordConstruct);
                }

                var result = new WordDecomposition(word, wordCompositions);
                return(result);
            }
        }
Пример #2
0
        private IEnumerable <WordConstruct> FindPossibleWordConstructions(
            string word,
            int morphDistance,
            List <Morpheme> localSequence)
        {
            using (Trace.Entering())
            {
                // Find if the word is a lexeme.
                IEnumerable <Morpheme> lexemes = FindLexemes(word, morphDistance);
                foreach (Morpheme lexeme in lexemes)
                {
                    localSequence.Add(lexeme);
                    var wordConstruct = new WordConstruct(localSequence.ToList());
                    yield return(wordConstruct);

                    localSequence.RemoveAt(localSequence.Count - 1);
                }

                // Find if the word is a lexeme with suffixes.
                IEnumerable <IReadOnlyList <Morpheme> > wordSuffixes = FindLexemeAndItsSuffixes(word, morphDistance, new List <Morpheme>());
                foreach (IReadOnlyList <Morpheme> sequence in wordSuffixes)
                {
                    var wordConstruct = new WordConstruct(localSequence.Concat(sequence.Reverse()).ToList());
                    yield return(wordConstruct);
                }

                // Find if the word is a lexeme with prefixes and suffixes.
                for (int i = 1; i < word.Length; ++i)
                {
                    string nonLexeme = word.Substring(0, i);
                    IEnumerable <Morpheme> prefixHomonyms = FindNonLexemes(nonLexeme)
                                                            .Where(x => AttributesModel.IsPrefix(x.Attributes));
                    if (prefixHomonyms.Any())
                    {
                        string newWord = word.Substring(i);

                        foreach (Morpheme prefix in prefixHomonyms)
                        {
                            localSequence.Add(prefix);

                            // Try if there are sub-prefixes.
                            IEnumerable <WordConstruct> sequences = FindPossibleWordConstructions(newWord, morphDistance, localSequence);
                            foreach (var sequence in sequences)
                            {
                                var wordConstruct = new WordConstruct(sequence.Morphemes.ToList());
                                yield return(wordConstruct);
                            }

                            localSequence.RemoveAt(localSequence.Count - 1);
                        }
                    }
                }
            }
        }