private void HandleSpecialChars(List <WordInformation> words) { var word = new WordInformation(null, null, words[startIndex].Surface, null, words[startIndex].IsUnknownWord); word.AddWordPart(words[startIndex].Surface, null, null); results.Add(word); startIndex++; }
private WordInformation GetUntokenWord(string unTokenized) { WordInformation word = new WordInformation(null, null, unTokenized, null); string reading; string pronunciation; GetUntokenReadingAndPronunication(unTokenized, out reading, out pronunciation); word.AddWordPart(unTokenized, reading.ToString(), pronunciation.ToString()); return(word); }
private void CreateAndAddNewWord(string wordSurface, string wordBaseform, List <JmdictEntity> matchWords) { WordInformation newWord = new WordInformation(null, null, wordBaseform, null); string readingStr = reading.ToString(); string pronunStr = pronunciation.ToString(); MakeSureReadingAndPronunIsCorrect(wordSurface, matchWords, ref readingStr, ref pronunStr); newWord.AddWordPart(wordSurface, readingStr, pronunStr); newWord.IsInDictionary = true; newWord.LinkWordGroup = words[startIndex].LinkWordGroup; results.Add(newWord); }
private static List <JmdictEntity> TrySearchByConvertToHira(WordInformation selectedWord, Database japEngDictionary, List <JmdictEntity> perfectMatches) { var hira = KataHiraConvert.ConvertKataToHira(selectedWord.Surface); var word = new WordInformation(selectedWord.FirstConjugationType, selectedWord.FirstConjugationForm, selectedWord.BaseForm, selectedWord.PartOfSpeech); word.AddWordPart(hira, selectedWord.Reading, selectedWord.Pronunciation); var entries = FindTokenPerfectMatchInDictionary(word, japEngDictionary); if (entries.Count > 0) { return(perfectMatches.Union(entries, JmdictEntity.EqualComparer).ToList()); } else { return(perfectMatches); } }
private static List <JmdictEntity> TrySearchCompoundVerbs(WordInformation currentSelectedWord, int selectedIndex, List <WordInformation> words, Database japEngDictionary) { if (currentSelectedWord.IsVerb() && currentSelectedWord.IsMasuConjugation() && (selectedIndex < (words.Count - 1)) && words[selectedIndex + 1].IsVerb()) { var nextWord = words[selectedIndex + 1]; //Remove potential conjugation if has to make sure word is in its most baseform var nextWordBase = WordInformation.TryRemoveGodanPotential(nextWord, japEngDictionary); if (nextWordBase != null) { nextWord = nextWordBase; } string baseForm = currentSelectedWord.Surface + nextWord.BaseForm; var compoundWord = new WordInformation(nextWord.FirstConjugationType, nextWord.FirstConjugationForm, baseForm, nextWord.PartOfSpeech, false, nextWord.Conjugation); compoundWord.AddWordPart(currentSelectedWord.Surface + nextWord.Surface, currentSelectedWord.Reading + nextWord.Reading, currentSelectedWord.Pronunciation + nextWord.Pronunciation); return(FindTokenPerfectMatchInDictionary(compoundWord, japEngDictionary)); } return(null); }
public static List <WordInformation> Combine <T>(List <T> tokens, Database dictionary) where T : class, IToken { List <WordInformation> words = new List <WordInformation>(); WordInformation word = null; T nextToken = null; T previousToken = null; bool isPreviousConjugated = false; for (int i = 0; i < tokens.Count; i++) { if (i < tokens.Count - 1) { nextToken = tokens[i + 1]; } else { nextToken = null; } if (isPreviousConjugated) { if (word.TryAddConjungationPart(previousToken, tokens[i], nextToken)) { word.AddWordPart(tokens[i]); if (nextToken != null && IsConjugationForm(tokens[i], nextToken)) { isPreviousConjugated = true; } else { isPreviousConjugated = false; } previousToken = tokens[i]; continue; } } if (nextToken != null && IsConjugationForm(tokens[i], nextToken)) { isPreviousConjugated = true; } else { isPreviousConjugated = false; } word = new WordInformation(tokens[i], nextToken, dictionary); words.Add(word); if (word.IsHaveSplitWords) { words.AddRange(word.SplitWords); isPreviousConjugated = false; } previousToken = tokens[i]; } FinalizeWordList(dictionary, words); return(words); }