public static EasilyConfusedKana FromFile(string path) { return(new EasilyConfusedKana( File.ReadLines(path, Encoding.UTF8) .Where(line => !line.StartsWith("#")) .Select(line => line.AsCodePoints().Select(cp => CodePoint.FromInt(cp))))); }
public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint) { return(new IGrouping <string, CodePoint>[] { new CategoryGrouping <CodePoint>("Similarly looking", FindSimilar(codePoint)) }); }
public RadicalSearcherResult(int start, int length, string text, CodePoint radical) { Start = start; Length = length; Text = text ?? throw new ArgumentNullException(nameof(text)); Radical = radical ?? throw new ArgumentNullException(nameof(radical)); }
public IEnumerable <Radical> AllRadicals() { return(radkfile.Radicals .Select(rad => (codePoint: CodePoint.FromInt(rad.CodePoint), strokeCount: rad.StrokeCount)) .Select(p => new Radical(p.codePoint, p.strokeCount)) .OrderBy(r => r.StrokeCount)); }
public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint) { similar.TryGetValue(codePoint.ToString(), out var resultList); IEnumerable <string> result = resultList ?? Enumerable.Empty <string>(); return(EnumerableExt.OfSingle(new CategoryGrouping <CodePoint>("Similar Kanji", result.Select(r => CodePoint.FromString(r))))); }
public IEnumerable <CodePoint> LookupByRadicals(IEnumerable <CodePoint> radicals) { return(radkfile.LookupMatching(radicals.Select(r => r.ToString())) .OrderBy(r => kanjidict.Lookup(r) .Map(e => e.StrokeCount) .ValueOr(int.MaxValue)) .Select(cp => CodePoint.FromString(cp))); }
public IEnumerable <CodePoint> LookupRelatedCharacters(CodePoint point) { return(EnumerableExt.IntersperseSequencesWith(new[] { kanaProperties.FindSimilar(point), confused.FindSimilar(point) }, null as CodePoint)); }
public WordInfo(string word, PartOfSpeech partOfSpeech = PartOfSpeech.Unknown, string notInflected = null, bool?isIndependent = null, Option <EdictType> type = default(Option <EdictType>), IEnumerable <PartOfSpeechInfo> posInfo = null) { RawWord = word; CodePoints = new List <CodePoint>( word.AsCodePoints().Select(cp => CodePoint.FromInt(cp))); EstimatedPartOfSpeech = partOfSpeech; NotInflected = notInflected; Independent = isIndependent; Type = type; this.PartOfSpeechInfo = posInfo?.ToList() ?? Enumerable.Empty <PartOfSpeechInfo>(); }
public Option <IEnumerable <CodePoint> > LookupRadicalsByKanji(Kanji kanji) { if (remapper == null) { return(kradfile.LookupRadicals(kanji.ToString()) .Map(radicals => radicals.Select(cp => CodePoint.FromString(cp)))); } return(remapper .LookupRadicals(kanji.ToString()) .Map(radicals => radicals.Select(cp => CodePoint.FromString(cp)))); }
public IEnumerable <CodePoint> FindSimilar(CodePoint point) { similarityGroups.TryGetValue(point, out var listOfSimilar); var similar = listOfSimilar ?? Enumerable.Empty <CodePoint>(); return(similar .Except(Enumerable.Repeat(point, 1)) .OrderBy(other => { return Math.Abs(position[point.GetType()] - position[other.GetType()]); })); }
public Result SelectRadical(IEnumerable<CodePoint> radicals, int sortingCriteriaIndex) { var result = new List<CodePoint>(); var possibleRadicals = new KeyValuePair<CodePoint, bool>[radicalCount]; var key = new Vector<ulong>[elementSize].AsSpan(); var vec = AsScalarSpan(key); foreach (var radical in radicals) { var radicalIndex = radicalToIndex[radical.Utf32]; vec[radicalIndex / ulongBitCount] |= (ulong)(1UL << radicalIndex); } var s = sortingCriteriaIndex; var radk = radkinfo[s]; var target = new Vector<ulong>[radk.Length]; for (int i = 0; i < kanjiCount; ++i) { for (int j = 0; j < elementSize; ++j) { target[i * elementSize + j] = radk[i * elementSize + j] & key[j]; } } var possible = new Vector<ulong>[elementSize].AsSpan(); for (int i = 0; i < kanjiCount; ++i) { bool isPresent = true; for (int j = 0; j < elementSize; ++j) { if (target[i * elementSize + j] != key[j]) isPresent = false; } if (isPresent) { result.Add(CodePoint.FromInt(indexToKanji[s][i])); for (int j = 0; j < elementSize; ++j) { possible[j] |= radk[i * elementSize + j]; } } } var possibleUlong = AsScalarSpan(possible); for (int radicalIndex = 0; radicalIndex < radicalCount; ++radicalIndex) { var mask = (ulong)(1UL << radicalIndex); bool isPresent = (possibleUlong[radicalIndex / ulongBitCount] & mask) == mask; possibleRadicals[radicalIndex] = new KeyValuePair<CodePoint, bool>(CodePoint.FromInt(indexToRadical[radicalIndex]), isPresent); } return new Result(result, possibleRadicals); }
public WordInfo( string word, PartOfSpeech partOfSpeech = PartOfSpeech.Unknown, string dictionaryForm = null, Option <EdictPartOfSpeech> type = default, string reading = null, string dictionaryFormReading = null) { RawWord = word; CodePoints = new List <CodePoint>( word.AsCodePoints().Select(cp => CodePoint.FromInt(cp))); EstimatedPartOfSpeech = partOfSpeech; DictionaryForm = dictionaryForm; Type = type; Reading = reading; DictionaryFormReading = dictionaryFormReading; }
public IEnumerable <CodePoint> LookupKanjiByRadicals(IEnumerable <CodePoint> radicals, IKanjiOrdering ordering) { if (remapper == null) { return(radkfile .LookupMatching(radicals.Select(s => s.ToString())) .Select(r => CodePoint.FromString(r)) .OrderBy(x => x, ordering) .ToList()); } return(remapper .LookupKanji(radicals.Select(s => s.ToString())) .Select(r => CodePoint.FromString(r)) .OrderBy(x => x, ordering) .ToList()); }
public IEnumerable <CodePoint> FindSimilar(CodePoint point) { var oppositeSizedCp = OppositeSizedVersionOf(point.Utf32); var oppositeSized = (oppositeSizedCp != null ? Enumerable.Repeat(char.ConvertFromUtf32(oppositeSizedCp.Value), 1) : Enumerable.Empty <string>()) .Select(s => CodePoint.FromString(s)); mapping.TryGetValue(point.ToString(), out var restStr); restStr = restStr ?? new List <string>(); // TOFIX: support combo kana var rest = restStr .Where(s => s.Length == 1) .Select(s => CodePoint.FromString(s)); return(oppositeSized.Concat(rest)); }
public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint) { var result = new List <IGrouping <string, CodePoint> >(); if (hiraganaKatakanaMap.TryGetValue(codePoint.Utf32, out var katakana)) { result.Add(new CategoryGrouping <CodePoint>("Katakana", new[] { CodePoint.FromInt(katakana), })); } if (hiraganaKatakanaMap.TryGetKey(codePoint.Utf32, out var hiragana)) { result.Add(new CategoryGrouping <CodePoint>("Hiragana", new[] { CodePoint.FromInt(hiragana), })); } if (smallLargeMap.TryGetValue(codePoint.Utf32, out var large)) { result.Add(new CategoryGrouping <CodePoint>("Large", new [] { CodePoint.FromInt(large), })); } if (smallLargeMap.TryGetKey(codePoint.Utf32, out var small)) { result.Add(new CategoryGrouping <CodePoint>("Small", new[] { CodePoint.FromInt(small), })); } if (regularDakutenMap.TryGetValue(codePoint.Utf32, out var dakuten) || (regularHandakutenMap.TryGetKey(codePoint.Utf32, out var r1) && regularDakutenMap.TryGetValue(r1, out dakuten))) { result.Add(new CategoryGrouping <CodePoint>("Dakuten", new [] { CodePoint.FromInt(dakuten), })); } if (regularHandakutenMap.TryGetValue(codePoint.Utf32, out var handakuten) || (regularDakutenMap.TryGetKey(codePoint.Utf32, out var r2) && regularHandakutenMap.TryGetValue(r2, out handakuten))) { result.Add(new CategoryGrouping <CodePoint>("Handakuten", new[] { CodePoint.FromInt(handakuten), })); } if (regularDakutenMap.TryGetKey(codePoint.Utf32, out var regular) || regularHandakutenMap.TryGetKey(codePoint.Utf32, out regular)) { result.Add(new CategoryGrouping <CodePoint>("Regular", new[] { CodePoint.FromInt(regular), })); } return(result); }
public Radical(CodePoint cp, int strokeCount) { CodePoint = cp; StrokeCount = strokeCount; }
public static Func <string, bool> CreateMatcher( this ILanguageService lang, IReadOnlyDictionary <CodePoint, KanjiPlaceholder> haystack, string template) { var privateUseAreaMatch = new Regex(@"\p{Co}"); var regex = new Regex("^" + privateUseAreaMatch.Replace(Regex.Escape(template).Replace(@"/\\", "."), ".") + "$"); return(word => regex.IsMatch(word) && KanjiPlaceholdersMatch(template, word)); bool KanjiPlaceholdersMatch(string t, string c) { t = t.Replace(@"/\\", "."); foreach (var(templateChar, concreteChar) in t.AsCodePoints().Zip(c.AsCodePoints(), (l, r) => (CodePoint.FromInt(l), CodePoint.FromInt(r)))) { if (!haystack.TryGetValue(templateChar, out var placeholder)) { continue; } var concreteRadicals = new HashSet <CodePoint>(concreteChar is Kanji k ? lang.LookupRadicals(k).ValueOr(Enumerable.Empty <CodePoint>()) : Enumerable.Empty <CodePoint>()); if (placeholder.Radicals.All(templateRadical => concreteRadicals.Contains(templateRadical.CodePoint))) { continue; } else { return(false); } } return(true); } }
public int Compare(CodePoint x, CodePoint y) { return(comparer.Compare(x, y)); }
public KanjiRadicalLookup(IEnumerable<Radkfile.Entry> entries, KanjiDict kanjiDict) { SortingCriteria = new ReadOnlyListWithSelector<IKanjiOrdering>(new IKanjiOrdering[] { KanjiOrdering.Create("Sort by stroke count", kanjiDict, x => x.StrokeCount), KanjiOrdering.Create("Sort by frequency", kanjiDict, x => x.FrequencyRating) }); SortingCriteria.SelectedIndex = 0; var entryList = entries.ToList(); radicalCount = entryList.Count; elementSize = DivideRoundUp(radicalCount, vectorBitCount); elementSize = elementSize == 0 ? 1 : elementSize; var kradMapping = entryList .ToDictionary(entry => entry.Radical.CodePoint, entry => entry.KanjiCodePoints.AsEnumerable()) .InvertMappingToSequence(); var kanjiCodePoints = entryList .SelectMany(entry => entry.KanjiCodePoints) .Distinct() .ToArray(); kanjiCount = kanjiCodePoints.Length; indexToKanji = SortingCriteria .Select(sortingCriterion => kanjiCodePoints .OrderBy(x => x, Comparer<int>.Create((l, r) => sortingCriterion.Compare( CodePoint.FromInt(l), CodePoint.FromInt(r)))) .ToArray()) .ToArray(); indexToRadical = entryList .Select(entry => entry.Radical.CodePoint) .ToArray(); radicalToIndex = indexToRadical .Indexed() .ToDictionary(p => p.element, p => p.index); var kanjiToIndex = indexToKanji .Select(a => a .Indexed() .ToDictionary(p => p.element, p => p.index)) .ToArray(); radkinfo = Enumerable.Range(0, SortingCriteria.Count) .Select(CreateRadkInfo) .ToArray(); Vector<ulong>[] CreateRadkInfo(int x) { var r = new Vector<ulong>[kanjiCount * elementSize]; foreach (var kanji in kanjiCodePoints) { var v = new Vector<ulong>[elementSize]; var kanjiIndex = kanjiToIndex[x][kanji]; var vec = AsScalarSpan(v); foreach (var radical in kradMapping[kanji]) { var radicalIndex = radicalToIndex[radical]; vec[radicalIndex / ulongBitCount] |= (ulong)(1UL << radicalIndex); } for (int i = 0; i < elementSize; ++i) { r[kanjiIndex * elementSize + i] = v[i]; } } return r; } }
public CodePoint LookupCharacter(string character, int position = 0) { return(CodePoint.FromString(character, position)); }
public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint) { return(relatedProviders.SelectMany(r => r.FindRelated(codePoint))); }
public CodePoint LookupCharacter(int codePoint) { return(CodePoint.FromInt(codePoint)); }
public IEnumerable <CodePoint> FindSimilar(CodePoint point) { return(Enumerable.Empty <CodePoint>()); }