public IEnumerable <Radical> AllRadicals() { return(radkfile.Radicals .Select(rad => (codePoint: CodePoint.FromInt(rad.CodePoint), strokeCount: rad.StrokeCount)) .Select(p => new Radical(p.codePoint, p.strokeCount)) .OrderBy(r => r.StrokeCount)); }
public static EasilyConfusedKana FromFile(string path) { return(new EasilyConfusedKana( File.ReadLines(path, Encoding.UTF8) .Where(line => !line.StartsWith("#")) .Select(line => line.AsCodePoints().Select(cp => CodePoint.FromInt(cp))))); }
public WordInfo(string word, PartOfSpeech partOfSpeech = PartOfSpeech.Unknown, string notInflected = null, bool?isIndependent = null, Option <EdictType> type = default(Option <EdictType>), IEnumerable <PartOfSpeechInfo> posInfo = null) { RawWord = word; CodePoints = new List <CodePoint>( word.AsCodePoints().Select(cp => CodePoint.FromInt(cp))); EstimatedPartOfSpeech = partOfSpeech; NotInflected = notInflected; Independent = isIndependent; Type = type; this.PartOfSpeechInfo = posInfo?.ToList() ?? Enumerable.Empty <PartOfSpeechInfo>(); }
public Result SelectRadical(IEnumerable<CodePoint> radicals, int sortingCriteriaIndex) { var result = new List<CodePoint>(); var possibleRadicals = new KeyValuePair<CodePoint, bool>[radicalCount]; var key = new Vector<ulong>[elementSize].AsSpan(); var vec = AsScalarSpan(key); foreach (var radical in radicals) { var radicalIndex = radicalToIndex[radical.Utf32]; vec[radicalIndex / ulongBitCount] |= (ulong)(1UL << radicalIndex); } var s = sortingCriteriaIndex; var radk = radkinfo[s]; var target = new Vector<ulong>[radk.Length]; for (int i = 0; i < kanjiCount; ++i) { for (int j = 0; j < elementSize; ++j) { target[i * elementSize + j] = radk[i * elementSize + j] & key[j]; } } var possible = new Vector<ulong>[elementSize].AsSpan(); for (int i = 0; i < kanjiCount; ++i) { bool isPresent = true; for (int j = 0; j < elementSize; ++j) { if (target[i * elementSize + j] != key[j]) isPresent = false; } if (isPresent) { result.Add(CodePoint.FromInt(indexToKanji[s][i])); for (int j = 0; j < elementSize; ++j) { possible[j] |= radk[i * elementSize + j]; } } } var possibleUlong = AsScalarSpan(possible); for (int radicalIndex = 0; radicalIndex < radicalCount; ++radicalIndex) { var mask = (ulong)(1UL << radicalIndex); bool isPresent = (possibleUlong[radicalIndex / ulongBitCount] & mask) == mask; possibleRadicals[radicalIndex] = new KeyValuePair<CodePoint, bool>(CodePoint.FromInt(indexToRadical[radicalIndex]), isPresent); } return new Result(result, possibleRadicals); }
public WordInfo( string word, PartOfSpeech partOfSpeech = PartOfSpeech.Unknown, string dictionaryForm = null, Option <EdictPartOfSpeech> type = default, string reading = null, string dictionaryFormReading = null) { RawWord = word; CodePoints = new List <CodePoint>( word.AsCodePoints().Select(cp => CodePoint.FromInt(cp))); EstimatedPartOfSpeech = partOfSpeech; DictionaryForm = dictionaryForm; Type = type; Reading = reading; DictionaryFormReading = dictionaryFormReading; }
public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint) { var result = new List <IGrouping <string, CodePoint> >(); if (hiraganaKatakanaMap.TryGetValue(codePoint.Utf32, out var katakana)) { result.Add(new CategoryGrouping <CodePoint>("Katakana", new[] { CodePoint.FromInt(katakana), })); } if (hiraganaKatakanaMap.TryGetKey(codePoint.Utf32, out var hiragana)) { result.Add(new CategoryGrouping <CodePoint>("Hiragana", new[] { CodePoint.FromInt(hiragana), })); } if (smallLargeMap.TryGetValue(codePoint.Utf32, out var large)) { result.Add(new CategoryGrouping <CodePoint>("Large", new [] { CodePoint.FromInt(large), })); } if (smallLargeMap.TryGetKey(codePoint.Utf32, out var small)) { result.Add(new CategoryGrouping <CodePoint>("Small", new[] { CodePoint.FromInt(small), })); } if (regularDakutenMap.TryGetValue(codePoint.Utf32, out var dakuten) || (regularHandakutenMap.TryGetKey(codePoint.Utf32, out var r1) && regularDakutenMap.TryGetValue(r1, out dakuten))) { result.Add(new CategoryGrouping <CodePoint>("Dakuten", new [] { CodePoint.FromInt(dakuten), })); } if (regularHandakutenMap.TryGetValue(codePoint.Utf32, out var handakuten) || (regularDakutenMap.TryGetKey(codePoint.Utf32, out var r2) && regularHandakutenMap.TryGetValue(r2, out handakuten))) { result.Add(new CategoryGrouping <CodePoint>("Handakuten", new[] { CodePoint.FromInt(handakuten), })); } if (regularDakutenMap.TryGetKey(codePoint.Utf32, out var regular) || regularHandakutenMap.TryGetKey(codePoint.Utf32, out regular)) { result.Add(new CategoryGrouping <CodePoint>("Regular", new[] { CodePoint.FromInt(regular), })); } return(result); }
public CodePoint LookupCharacter(int codePoint) { return(CodePoint.FromInt(codePoint)); }
public static Func <string, bool> CreateMatcher( this ILanguageService lang, IReadOnlyDictionary <CodePoint, KanjiPlaceholder> haystack, string template) { var privateUseAreaMatch = new Regex(@"\p{Co}"); var regex = new Regex("^" + privateUseAreaMatch.Replace(Regex.Escape(template).Replace(@"/\\", "."), ".") + "$"); return(word => regex.IsMatch(word) && KanjiPlaceholdersMatch(template, word)); bool KanjiPlaceholdersMatch(string t, string c) { t = t.Replace(@"/\\", "."); foreach (var(templateChar, concreteChar) in t.AsCodePoints().Zip(c.AsCodePoints(), (l, r) => (CodePoint.FromInt(l), CodePoint.FromInt(r)))) { if (!haystack.TryGetValue(templateChar, out var placeholder)) { continue; } var concreteRadicals = new HashSet <CodePoint>(concreteChar is Kanji k ? lang.LookupRadicals(k).ValueOr(Enumerable.Empty <CodePoint>()) : Enumerable.Empty <CodePoint>()); if (placeholder.Radicals.All(templateRadical => concreteRadicals.Contains(templateRadical.CodePoint))) { continue; } else { return(false); } } return(true); } }
public KanjiRadicalLookup(IEnumerable<Radkfile.Entry> entries, KanjiDict kanjiDict) { SortingCriteria = new ReadOnlyListWithSelector<IKanjiOrdering>(new IKanjiOrdering[] { KanjiOrdering.Create("Sort by stroke count", kanjiDict, x => x.StrokeCount), KanjiOrdering.Create("Sort by frequency", kanjiDict, x => x.FrequencyRating) }); SortingCriteria.SelectedIndex = 0; var entryList = entries.ToList(); radicalCount = entryList.Count; elementSize = DivideRoundUp(radicalCount, vectorBitCount); elementSize = elementSize == 0 ? 1 : elementSize; var kradMapping = entryList .ToDictionary(entry => entry.Radical.CodePoint, entry => entry.KanjiCodePoints.AsEnumerable()) .InvertMappingToSequence(); var kanjiCodePoints = entryList .SelectMany(entry => entry.KanjiCodePoints) .Distinct() .ToArray(); kanjiCount = kanjiCodePoints.Length; indexToKanji = SortingCriteria .Select(sortingCriterion => kanjiCodePoints .OrderBy(x => x, Comparer<int>.Create((l, r) => sortingCriterion.Compare( CodePoint.FromInt(l), CodePoint.FromInt(r)))) .ToArray()) .ToArray(); indexToRadical = entryList .Select(entry => entry.Radical.CodePoint) .ToArray(); radicalToIndex = indexToRadical .Indexed() .ToDictionary(p => p.element, p => p.index); var kanjiToIndex = indexToKanji .Select(a => a .Indexed() .ToDictionary(p => p.element, p => p.index)) .ToArray(); radkinfo = Enumerable.Range(0, SortingCriteria.Count) .Select(CreateRadkInfo) .ToArray(); Vector<ulong>[] CreateRadkInfo(int x) { var r = new Vector<ulong>[kanjiCount * elementSize]; foreach (var kanji in kanjiCodePoints) { var v = new Vector<ulong>[elementSize]; var kanjiIndex = kanjiToIndex[x][kanji]; var vec = AsScalarSpan(v); foreach (var radical in kradMapping[kanji]) { var radicalIndex = radicalToIndex[radical]; vec[radicalIndex / ulongBitCount] |= (ulong)(1UL << radicalIndex); } for (int i = 0; i < elementSize; ++i) { r[kanjiIndex * elementSize + i] = v[i]; } } return r; } }