Exemplo n.º 1
0
        private void HandleSpecialChars(List <WordInformation> words)
        {
            var word = new WordInformation(null, null, words[startIndex].Surface, null, words[startIndex].IsUnknownWord);

            word.AddWordPart(words[startIndex].Surface, null, null);
            results.Add(word);
            startIndex++;
        }
Exemplo n.º 2
0
        private WordInformation GetUntokenWord(string unTokenized)
        {
            WordInformation word = new WordInformation(null, null, unTokenized, null);
            string          reading;
            string          pronunciation;

            GetUntokenReadingAndPronunication(unTokenized, out reading, out pronunciation);

            word.AddWordPart(unTokenized, reading.ToString(), pronunciation.ToString());
            return(word);
        }
Exemplo n.º 3
0
        private void CreateAndAddNewWord(string wordSurface, string wordBaseform, List <JmdictEntity> matchWords)
        {
            WordInformation newWord = new WordInformation(null,
                                                          null,
                                                          wordBaseform,
                                                          null);
            string readingStr = reading.ToString();
            string pronunStr  = pronunciation.ToString();

            MakeSureReadingAndPronunIsCorrect(wordSurface, matchWords, ref readingStr, ref pronunStr);
            newWord.AddWordPart(wordSurface, readingStr, pronunStr);
            newWord.IsInDictionary = true;
            newWord.LinkWordGroup  = words[startIndex].LinkWordGroup;
            results.Add(newWord);
        }
        private static List <JmdictEntity> TrySearchByConvertToHira(WordInformation selectedWord, Database japEngDictionary, List <JmdictEntity> perfectMatches)
        {
            var hira = KataHiraConvert.ConvertKataToHira(selectedWord.Surface);
            var word = new WordInformation(selectedWord.FirstConjugationType, selectedWord.FirstConjugationForm,
                                           selectedWord.BaseForm, selectedWord.PartOfSpeech);

            word.AddWordPart(hira, selectedWord.Reading, selectedWord.Pronunciation);
            var entries = FindTokenPerfectMatchInDictionary(word, japEngDictionary);

            if (entries.Count > 0)
            {
                return(perfectMatches.Union(entries, JmdictEntity.EqualComparer).ToList());
            }
            else
            {
                return(perfectMatches);
            }
        }
        private static List <JmdictEntity> TrySearchCompoundVerbs(WordInformation currentSelectedWord, int selectedIndex, List <WordInformation> words, Database japEngDictionary)
        {
            if (currentSelectedWord.IsVerb() && currentSelectedWord.IsMasuConjugation() &&
                (selectedIndex < (words.Count - 1)) &&
                words[selectedIndex + 1].IsVerb())
            {
                var nextWord = words[selectedIndex + 1];

                //Remove potential conjugation if has to make sure word is in its most baseform
                var nextWordBase = WordInformation.TryRemoveGodanPotential(nextWord, japEngDictionary);
                if (nextWordBase != null)
                {
                    nextWord = nextWordBase;
                }

                string baseForm     = currentSelectedWord.Surface + nextWord.BaseForm;
                var    compoundWord = new WordInformation(nextWord.FirstConjugationType, nextWord.FirstConjugationForm, baseForm, nextWord.PartOfSpeech, false, nextWord.Conjugation);
                compoundWord.AddWordPart(currentSelectedWord.Surface + nextWord.Surface,
                                         currentSelectedWord.Reading + nextWord.Reading,
                                         currentSelectedWord.Pronunciation + nextWord.Pronunciation);
                return(FindTokenPerfectMatchInDictionary(compoundWord, japEngDictionary));
            }
            return(null);
        }
Exemplo n.º 6
0
        public static List <WordInformation> Combine <T>(List <T> tokens, Database dictionary) where T : class, IToken
        {
            List <WordInformation> words = new List <WordInformation>();
            WordInformation        word  = null;
            T    nextToken            = null;
            T    previousToken        = null;
            bool isPreviousConjugated = false;

            for (int i = 0; i < tokens.Count; i++)
            {
                if (i < tokens.Count - 1)
                {
                    nextToken = tokens[i + 1];
                }
                else
                {
                    nextToken = null;
                }

                if (isPreviousConjugated)
                {
                    if (word.TryAddConjungationPart(previousToken, tokens[i], nextToken))
                    {
                        word.AddWordPart(tokens[i]);

                        if (nextToken != null &&
                            IsConjugationForm(tokens[i], nextToken))
                        {
                            isPreviousConjugated = true;
                        }
                        else
                        {
                            isPreviousConjugated = false;
                        }

                        previousToken = tokens[i];
                        continue;
                    }
                }

                if (nextToken != null &&
                    IsConjugationForm(tokens[i], nextToken))
                {
                    isPreviousConjugated = true;
                }
                else
                {
                    isPreviousConjugated = false;
                }

                word = new WordInformation(tokens[i], nextToken, dictionary);
                words.Add(word);
                if (word.IsHaveSplitWords)
                {
                    words.AddRange(word.SplitWords);
                    isPreviousConjugated = false;
                }
                previousToken = tokens[i];
            }

            FinalizeWordList(dictionary, words);

            return(words);
        }