private static void AddVerbExceptSuruKuruDictionaryEntry(WordInformation word, Database dictionary, Dictionary <int, JmdictEntity> entries, string conjungation) { List <JmdictEntity> list; if (word.IsBaseForm()) { list = JmdictEntity.GetJapMatchWordAndPOS(word.BaseForm, dictionary, JmdictEntity.POS_VERB_EXCEPT_SURU_KURU); AddDictionaryEntry(list, entries, conjungation); return; } else if (word.IsGodanConjugation()) { list = JmdictEntity.GetJapMatchWordAndPOS(word.BaseForm, dictionary, JmdictEntity.POS_VERB_GODAN_EXCEPT_SURU_KURU); AddDictionaryEntry(list, entries, conjungation); AddPossibleSpecialSuruVerb(word, dictionary, conjungation, entries); return; } else if (word.IsIchidanConjugation()) { list = JmdictEntity.GetJapMatchWordAndPOS(word.BaseForm, dictionary, JmdictEntity.POS_VERB_ICHIDAN_EXCEPT_SURU_KURU); AddDictionaryEntry(list, entries, conjungation); return; } else if (word.BaseForm.EndsWith("する") || word.BaseForm.EndsWith("くる") || word.BaseForm.EndsWith("来る")) { //Special Suru Verb list = JmdictEntity.GetJapMatchVerb(word.BaseForm, dictionary); AddDictionaryEntry(list, entries, conjungation); return; } }
private string GetBaseForm(WordInformation word) { if (WordInformation.IsHave(word.BaseForm)) { return(word.BaseForm); } return(word.Surface); }
public static List <JmdictEntity> FindTokenPerfectMatchInDictionary(WordInformation word, Database dictionary, bool isSkipSurfaceIfHasBase = false) { var results = GetMatchedWords(word, dictionary, "", isSkipSurfaceIfHasBase, word.Conjugation); ReoderFirstMatchedReadingIfNeeded(results, word); ReorderByPartOfSpeech(results, word); return(results); }
private void HandleSpecialChars(List <WordInformation> words) { var word = new WordInformation(null, null, words[startIndex].Surface, null, words[startIndex].IsUnknownWord); word.AddWordPart(words[startIndex].Surface, null, null); results.Add(word); startIndex++; }
public Font GetFont(WordInformation word, string fontname) { if (word.Frequency == maxFriquency) { return(new Font(fontname, 60)); } return(new Font(fontname, 8)); }
private static void AddPossibleSpecialSuruVerb(WordInformation word, Database dictionary, string conjungation, Dictionary <int, JmdictEntity> entries) { if (conjungation != null && word.BaseForm.EndsWith("す") && !word.Surface.EndsWith("せば")) { var newBaseform = word.BaseForm.Remove(word.BaseForm.Length - 1) + "する"; var list = JmdictEntity.GetJapMatchWordAndPOS(newBaseform, dictionary, JmdictEntity.POS_VERB_SURU); AddDictionaryEntry(list, entries, conjungation); } }
private static void AddKuruVerb(WordInformation word, Database dictionary, string conjungation, Dictionary <int, JmdictEntity> entries) { if (word.Reading.StartsWith("コラ", StringComparison.OrdinalIgnoreCase)) { AddKuruSpecialOnly(conjungation, entries, dictionary); string searchWord; if (StringHelper.IsHaveKanji(word.BaseForm)) { searchWord = word.BaseForm; } else { searchWord = "こる"; } var list = JmdictEntity.GetJapMatchGodanVerb(searchWord, dictionary); var newConjungation = conjungation.Replace("[passive or potential]", "[passive]"); AddDictionaryEntry(list, entries, newConjungation); } else if (word.Reading.StartsWith("コサ", StringComparison.OrdinalIgnoreCase)) { AddKuruSpecialOnly(conjungation, entries, dictionary); string searchWord; if (StringHelper.IsHaveKanji(word.BaseForm)) { searchWord = "来す"; } else { searchWord = "こす"; } var list = JmdictEntity.GetJapMatchGodanVerb(searchWord, dictionary); AddDictionaryEntry(list, entries, conjungation); } else if (word.Reading.StartsWith("コ", StringComparison.OrdinalIgnoreCase)) { AddKuruSpecialOnly(conjungation, entries, dictionary); } else if (word.Reading.StartsWith("キ", StringComparison.OrdinalIgnoreCase)) { AddKuruSpecialOnly(conjungation, entries, dictionary); if (word.Surface.StartsWith("き", StringComparison.OrdinalIgnoreCase)) { var list = JmdictEntity.GetJapMatchIChidan("きる", dictionary); AddDictionaryEntry(list, entries, conjungation); } } else { var list = JmdictEntity.GetJapMatchVerb(word.BaseForm, dictionary); AddDictionaryEntry(list, entries, conjungation); } }
private WordInformation GetUntokenWord(string unTokenized) { WordInformation word = new WordInformation(null, null, unTokenized, null); string reading; string pronunciation; GetUntokenReadingAndPronunication(unTokenized, out reading, out pronunciation); word.AddWordPart(unTokenized, reading.ToString(), pronunciation.ToString()); return(word); }
public Font GetFont(WordInformation word, string fontname) { if (word.Frequency > averageFrequency) { return(new Font(fontname, BigSize)); } if (word.Frequency < 0.4 * averageFrequency) { return(new Font(fontname, SmallSize)); } return(new Font(fontname, MediumSize)); }
private async Task AddDefinitionToTableAsync(Guid instanceId, WordInformation word) { word.Serialize(); var tableEntity = new TableEntityAdapter <WordInformation>() { PartitionKey = $"{instanceId}_{word.WordName}", RowKey = Guid.NewGuid().ToString(), OriginalEntity = word, }; await definitionsTable.ExecuteAsync(TableOperation.Insert(tableEntity)); }
private void FindUntokenWordInDict() { bool isInDict = false; string previousWord = null; WordInformation word = null; for (int index = 0; index < currentTokenizedSentence.Length; index++) { string unTokenized = currentTokenizedSentence.Substring(0, index + 1); if (JmdictEntity.HasJapWord(unTokenized + "*", japDict)) { previousWord = unTokenized; isInDict = true; } else { if (isInDict) { isInDict = false; while (!String.IsNullOrWhiteSpace(previousWord)) { if (JmdictEntity.HasJapWord(previousWord, japDict)) { word = GetUntokenWord(previousWord); word.IsInDictionary = true; unTokenizedWords.Add(word); currentTokenizedSentence = currentTokenizedSentence.Remove(0, previousWord.Length); isInDict = true; break; } previousWord = previousWord.Remove(previousWord.Length - 1); } } break; } } if (!isInDict) { word = GetUntokenWord(currentTokenizedSentence[0].ToString()); unTokenizedWords.Add(word); currentTokenizedSentence = currentTokenizedSentence.Remove(0, 1); } else if (word == null) { word = GetUntokenWord(previousWord); word.IsInDictionary = true; unTokenizedWords.Add(word); currentTokenizedSentence = currentTokenizedSentence.Remove(0, previousWord.Length); } }
private static void AddSuruVerb(WordInformation word, Database dictionary, string conjungation, Dictionary <int, JmdictEntity> entries) { if (word.Surface.Equals("すれば", StringComparison.OrdinalIgnoreCase)) { var list = JmdictEntity.GetJapMatchVerb(word.BaseForm, dictionary); AddDictionaryEntry(list, entries, conjungation); } else { var entry = JmdictEntity.GetSpecialSuruVerb(dictionary); entry.Conjugation = conjungation; entries[entry.EntrySequence] = entry; } }
public static bool IsConjugationForm(IToken token, IToken nextToken) { if (!IsHave(token.ConjugationForm)) { //Deal with De-iru if (WordInformation.IsDeIru(token, nextToken)) { return(true); } return(false); } //Deal with nde* if (token.BaseForm.Equals("ん", StringComparison.OrdinalIgnoreCase) && (nextToken.BaseForm.Equals("です", StringComparison.OrdinalIgnoreCase) || nextToken.Surface.Equals("で", StringComparison.OrdinalIgnoreCase))) { return(true); } if (token.ConjugationForm.Equals(WordInformation.JAP_BASE_FORM, StringComparison.OrdinalIgnoreCase)) { return(false); } if (WordInformation.IsInConjugationList(nextToken.BaseForm)) { return(true); } //Deal with -masende if (token.Surface.Equals("ませ", StringComparison.OrdinalIgnoreCase) && nextToken.Surface.Equals("んで", StringComparison.OrdinalIgnoreCase)) { return(true); } //Deal with dewanai & dearu if (token.Surface.Equals("で", StringComparison.OrdinalIgnoreCase) && (nextToken.Surface.Equals("は", StringComparison.OrdinalIgnoreCase) || (nextToken.BaseForm.Equals("ある", StringComparison.OrdinalIgnoreCase) || nextToken.BaseForm.Equals("ない", StringComparison.OrdinalIgnoreCase)) ) ) { return(true); } return(false); }
public async Task <IEnumerable <WordInformation> > GetInformation(string word) { using (var req = new HttpRequestMessage(HttpMethod.Get, baseAddress + word)) { req.Headers.Add("app_id", "757a4cb7"); req.Headers.Add("app_key", "391f43296b91d426a938d22eee70050d"); req.Headers.Add("Accept", "application/json"); var res = await this.httpClient.SendAsync(req); var content = await res.Content.ReadAsStringAsync(); return(WordInformation.FromOedResponse(JsonConvert.DeserializeObject <OEDWordResponse>(content))); } }
private void CreateAndAddNewWord(string wordSurface, string wordBaseform, List <JmdictEntity> matchWords) { WordInformation newWord = new WordInformation(null, null, wordBaseform, null); string readingStr = reading.ToString(); string pronunStr = pronunciation.ToString(); MakeSureReadingAndPronunIsCorrect(wordSurface, matchWords, ref readingStr, ref pronunStr); newWord.AddWordPart(wordSurface, readingStr, pronunStr); newWord.IsInDictionary = true; newWord.LinkWordGroup = words[startIndex].LinkWordGroup; results.Add(newWord); }
private static void HandleIAdjective(WordInformation word, Database dictionary, string conjungation, Dictionary <int, JmdictEntity> entries) { AddIAdjectiveDictionaryEntry(word.BaseForm, dictionary, entries, conjungation); if (word.BaseForm.EndsWith("たい")) { var maybeVerb = word.BaseForm.Remove(word.BaseForm.Length - 2, 2); maybeVerb += "る"; string newConjungation = WordInformation.ToConjungationTag("-tai") + " "; if (conjungation != null) { newConjungation += conjungation; } AddIchidanVerbDictionaryEntry(maybeVerb, dictionary, entries, newConjungation); } }
private WordInformationModel GetWordInformationModel(WordInformation wordInfor, string romajiWords, int index) { string baseForm; string reading; string pronunciation; string conjugation; bool isChecked = false; bool isInDicionary = false; if (wordInfor.IsSymbol() || String.IsNullOrWhiteSpace(wordInfor.Surface)) { reading = null; pronunciation = null; baseForm = null; conjugation = null; isChecked = false; isInDicionary = false; } else { reading = wordInfor.Reading; pronunciation = romajiWords; isInDicionary = wordInfor.IsInDictionary; baseForm = wordInfor.BaseForm; var wrodConjugation = wordInfor.Conjugation; if (WordInformation.IsHave(wrodConjugation)) { conjugation = wrodConjugation; } else { conjugation = null; } if (CurrentSelectedIndex == NO_INDEX && isInDicionary) { CurrentSelectedIndex = index; isChecked = true; } } SolidColorBrush borderColor = GetBorderColor(wordInfor); var word = new WordInformationModel(wordInfor.Surface, conjugation, baseForm, reading, pronunciation, isInDicionary, isChecked, borderColor); word.Index = index; return(word); }
private static SolidColorBrush GetBorderColor(WordInformation wordInfor) { if (wordInfor.LinkWordGroup != 0) { if (wordInfor.LinkWordGroup % 2 == 0) { return(UIUtilities.Orange); } else { return(UIUtilities.DodgerBlue); } } return(UIUtilities.Green); }
private static void HandleVerb(WordInformation word, Database dictionary, string conjungation, Dictionary <int, JmdictEntity> entries) { if (!word.Reading.StartsWith("ナ", StringComparison.OrdinalIgnoreCase) && WordInformation.IsSpecialSuVerb(word.FirstConjugationType) && (word.BaseForm.Equals("為る", StringComparison.OrdinalIgnoreCase) || word.BaseForm.Equals("する", StringComparison.OrdinalIgnoreCase))) { //Deal with suru verb alone to avoid showing noun with conjungation AddSuruVerb(word, dictionary, conjungation, entries); } else if ((word.BaseForm.Equals("来る", StringComparison.OrdinalIgnoreCase) || word.BaseForm.Equals("くる", StringComparison.OrdinalIgnoreCase)) && WordInformation.IsSpecialKuVerb(word.FirstConjugationType)) { //Deal with kuru alone AddKuruVerb(word, dictionary, conjungation, entries); } else { AddVerbExceptSuruKuruDictionaryEntry(word, dictionary, entries, conjungation); if (word.BaseForm.Equals("くる", StringComparison.OrdinalIgnoreCase)) { AddKuruSpecialOnly(null, entries, dictionary); } else if ((word.Surface.StartsWith("こら", StringComparison.OrdinalIgnoreCase) && word.BaseForm.Equals("こる", StringComparison.OrdinalIgnoreCase)) || (word.Surface.StartsWith("来ら", StringComparison.OrdinalIgnoreCase) && word.BaseForm.Equals("来る", StringComparison.OrdinalIgnoreCase))) { if (conjungation != null && conjungation.Contains("[passive]")) { string newConjun = conjungation.Replace("[passive]", "[passive or potential]"); AddKuruSpecialOnly(newConjun, entries, dictionary); } } else if (word.Surface.StartsWith("来さ", StringComparison.OrdinalIgnoreCase) && word.BaseForm.Equals("来す", StringComparison.OrdinalIgnoreCase)) { if (conjungation != null && conjungation.Contains("causative")) { AddKuruSpecialOnly(conjungation, entries, dictionary); } } else if (word.Surface.StartsWith("こさ", StringComparison.OrdinalIgnoreCase) && word.BaseForm.Equals("こす", StringComparison.OrdinalIgnoreCase)) { if (conjungation != null && conjungation.Contains("causative")) { AddKuruSpecialOnly(conjungation, entries, dictionary); } } } }
private static List <JmdictEntity> GetMatchedWords(WordInformation word, Database dictionary, string queryCommmand, bool isSkipSurfaceIfHasBaseForm, string conjungation = null) { //Use Dictionary to ensure unique entries Dictionary <int, JmdictEntity> entries = new Dictionary <int, JmdictEntity>(); if (WordInformation.IsHave(word.BaseForm)) { if (word.IsMaybeAmbiguousGodan()) { var allVariants = GetPossibleGodanVerb(word); AddPossibleGodanVerbsDictionaryEntry(allVariants, dictionary, entries, conjungation); if (allVariants.Count == 1) { AddPossibleSpecialSuruVerb(word, dictionary, conjungation, entries); } } else { if (word.IsIAdjectiveConjugation()) { HandleIAdjective(word, dictionary, conjungation, entries); } else if (word.IsVerb()) { HandleVerb(word, dictionary, conjungation, entries); } else if (word.IsAuxiliaryVerb()) { AddVerbDictionaryEntry(word.BaseForm, dictionary, entries, conjungation); } else { AddAllDictionaryEntry(word.BaseForm + queryCommmand, dictionary, entries); } } } if (word.BaseForm == null || entries.Count == 0 || !word.BaseForm.EqualsOrdinalIgnore(word.Surface)) { if (entries.Count == 0 || !isSkipSurfaceIfHasBaseForm) { AddAllDictionaryEntry(word.Surface + queryCommmand, dictionary, entries); } } return(entries.Values.ToList()); }
private static List <JmdictEntity> TrySearchByConvertToHira(WordInformation selectedWord, Database japEngDictionary, List <JmdictEntity> perfectMatches) { var hira = KataHiraConvert.ConvertKataToHira(selectedWord.Surface); var word = new WordInformation(selectedWord.FirstConjugationType, selectedWord.FirstConjugationForm, selectedWord.BaseForm, selectedWord.PartOfSpeech); word.AddWordPart(hira, selectedWord.Reading, selectedWord.Pronunciation); var entries = FindTokenPerfectMatchInDictionary(word, japEngDictionary); if (entries.Count > 0) { return(perfectMatches.Union(entries, JmdictEntity.EqualComparer).ToList()); } else { return(perfectMatches); } }
private static bool IsValidVerb(WordInformation word) { if (!word.IsVerb()) { return(false); } if (word.IsBaseForm()) { return(true); } if (!String.IsNullOrWhiteSpace(word.Conjugation)) { return(true); } return(false); }
/// <summary> /// 登録ボタンを押したときの処理 /// </summary> public void RegistrationButtonClick() { // todo 識別子がオンリーワンかを判定する処理を追加する 12/1 WordInformation newList = new WordInformation() { Identifier = Identifier, Text = VoiceText, Category = Category, Volume = VolumeChange, Start = StartTime, End = EndTime, File = VoiceFile }; WordInformationViewModel item = new WordInformationViewModel(this) { Item = newList }; RegistrationDataGrid(item); }
public static List <JmdictEntity> SearchTokenWord(WordInformation selectedWord, int selectedIndex, List <WordInformation> words, Database japEngDictionary) { List <JmdictEntity> results = TrySearchCompoundVerbs(selectedWord, selectedIndex, words, japEngDictionary); if (results == null || results.Count == 0) { results = TrylongestWordSearch(selectedWord, selectedIndex, words, japEngDictionary); } if (results == null || results.Count == 0) { results = FindTokenPerfectMatchInDictionary(selectedWord, japEngDictionary); } else { results = results.Union(FindTokenPerfectMatchInDictionary(selectedWord, japEngDictionary), JmdictEntity.EqualComparer).ToList(); } var possibleWord = WordInformation.TryRemoveGodanPotential(selectedWord, japEngDictionary); if (possibleWord != null) { var newResults = FindTokenPerfectMatchInDictionary(possibleWord, japEngDictionary); results = results.Union(newResults, JmdictEntity.EqualComparer).ToList(); } if (StringHelper.IsKatakanaOnly(selectedWord.Surface)) { results = TrySearchByConvertToHira(selectedWord, japEngDictionary, results); } if (results.Count == 0) { return(FindTokenPartialMatchInDictionary(selectedWord, japEngDictionary)); } else { return(results); } }
private static string GetBaseFormReadingOfWordWithKanji(WordInformation word) { string hiragana = KataHiraConvert.ConvertKataToHira(word.Reading); string surface = word.Surface; if (surface.Length == 1) { // if only one kanji exists then reading is already base form return(hiragana); } if (word.BaseForm == null) { return(hiragana); } string baseForm = word.BaseForm; var surfaceIndex = surface.Length - 1; int readingIndex = hiragana.Length - 1; for (; surfaceIndex > 0 && readingIndex > 0; surfaceIndex--, readingIndex--) { if (surface[surfaceIndex] != hiragana[readingIndex]) { break; } } var startBaseReading = hiragana.Substring(0, readingIndex + 1); surfaceIndex++; if (surfaceIndex > baseForm.Length - 1) { return(hiragana.Substring(0, readingIndex)); } else { return(startBaseReading + baseForm.Substring(surfaceIndex)); } }
private static List <JmdictEntity> TrylongestWordSearch(WordInformation currentSelectedWord, int selectedIndex, List <WordInformation> words, Database japEngDictionary) { WordListReducer reducer = new WordListReducer(); var longestWord = reducer.ReduceOnce(selectedIndex, words, japEngDictionary, true); if (!longestWord.Surface.Equals(currentSelectedWord.Surface, StringComparison.OrdinalIgnoreCase)) { var perfectMatches = FindTokenPerfectMatchInDictionary(longestWord, japEngDictionary, true); if (perfectMatches.Count <= VALID_LONGEST_MATCH_COUNT) { return(perfectMatches); } if (StringHelper.IsHaveKanji(longestWord.Surface) && perfectMatches.Count <= MAX_VALID_LONGEST_MATCH_COUNT) { return(perfectMatches); } return(null); } return(null); }
private static void ReorderByPartOfSpeech(List <JmdictEntity> results, WordInformation word) { if (results.Count < 2) { return; } string pos = word.GetPartOfSpeechInEnglish(); if (String.IsNullOrEmpty(pos)) { return; } List <JmdictEntity> removed = new List <JmdictEntity>(); for (int i = 0; i < results.Count;) { if (word.IsVerb() && (!results[i].PartOfSpeech.Contains("Godan") || !results[i].PartOfSpeech.Contains("Ichidan")) ) { removed.Add(results[i]); results.RemoveAt(i); } else if (!results[i].PartOfSpeech.Contains(pos)) { removed.Add(results[i]); results.RemoveAt(i); } else { i++; } } results.AddRange(removed); }
private static List <string> GetPossibleGodanVerb(WordInformation word) { List <string> results = new List <string>(); int lastIndex = word.BaseForm.Length - 1; var lastLetter = word.BaseForm[lastIndex]; if (lastLetter.Equals('ぶ') || lastLetter.Equals('む') || lastLetter.Equals('ぬ')) { StringBuilder builder = new StringBuilder(word.BaseForm); AddNewGodanVerb(results, lastIndex, builder, 'ぶ'); AddNewGodanVerb(results, lastIndex, builder, 'む'); AddNewGodanVerb(results, lastIndex, builder, 'ぬ'); } else if (lastLetter.Equals('る') || lastLetter.Equals('つ') || lastLetter.Equals('う') || word.BaseForm.EqualsOrdinalIgnore("いく") || (word.BaseForm[0].Equals('行') && word.BaseForm.Length == 2)) { if (word.BaseForm.Length == 2) { if (word.BaseForm[0].Equals('い') || word.BaseForm[0].Equals('行')) { results.Add("行く"); //iku exception } } StringBuilder builder = new StringBuilder(word.BaseForm); AddNewGodanVerb(results, lastIndex, builder, 'る'); AddNewGodanVerb(results, lastIndex, builder, 'つ'); AddNewGodanVerb(results, lastIndex, builder, 'う'); } else { results.Add(word.BaseForm); } return(results); }
private static List <JmdictEntity> TrySearchCompoundVerbs(WordInformation currentSelectedWord, int selectedIndex, List <WordInformation> words, Database japEngDictionary) { if (currentSelectedWord.IsVerb() && currentSelectedWord.IsMasuConjugation() && (selectedIndex < (words.Count - 1)) && words[selectedIndex + 1].IsVerb()) { var nextWord = words[selectedIndex + 1]; //Remove potential conjugation if has to make sure word is in its most baseform var nextWordBase = WordInformation.TryRemoveGodanPotential(nextWord, japEngDictionary); if (nextWordBase != null) { nextWord = nextWordBase; } string baseForm = currentSelectedWord.Surface + nextWord.BaseForm; var compoundWord = new WordInformation(nextWord.FirstConjugationType, nextWord.FirstConjugationForm, baseForm, nextWord.PartOfSpeech, false, nextWord.Conjugation); compoundWord.AddWordPart(currentSelectedWord.Surface + nextWord.Surface, currentSelectedWord.Reading + nextWord.Reading, currentSelectedWord.Pronunciation + nextWord.Pronunciation); return(FindTokenPerfectMatchInDictionary(compoundWord, japEngDictionary)); } return(null); }
/// <summary> /// Set example words. /// </summary> /// <param name="exampleWords">Example words.</param> public void SetExampleWords(WordInformation[] exampleWords) { if (exampleWords == null) { throw new ArgumentNullException("exampleWords"); } _exampleWords = exampleWords; }
private void ShowSearchResults(WordInformation currentSelectedWord) { searchResults = TokensDictSearcher.SearchTokenWord(currentSelectedWord, wordGridViewModel.CurrentSelectedIndex, tokenizer.Words, japEngDictionary); pageControl.ChangeNumberOfItem(searchResults.Count); }