public static EasilyConfusedKana FromFile(string path)
 {
     return(new EasilyConfusedKana(
                File.ReadLines(path, Encoding.UTF8)
                .Where(line => !line.StartsWith("#"))
                .Select(line => line.AsCodePoints().Select(cp => CodePoint.FromInt(cp)))));
 }
예제 #2
0
 public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint)
 {
     return(new IGrouping <string, CodePoint>[]
     {
         new CategoryGrouping <CodePoint>("Similarly looking", FindSimilar(codePoint))
     });
 }
예제 #3
0
 public RadicalSearcherResult(int start, int length, string text, CodePoint radical)
 {
     Start   = start;
     Length  = length;
     Text    = text ?? throw new ArgumentNullException(nameof(text));
     Radical = radical ?? throw new ArgumentNullException(nameof(radical));
 }
 public IEnumerable <Radical> AllRadicals()
 {
     return(radkfile.Radicals
            .Select(rad => (codePoint: CodePoint.FromInt(rad.CodePoint), strokeCount: rad.StrokeCount))
            .Select(p => new Radical(p.codePoint, p.strokeCount))
            .OrderBy(r => r.StrokeCount));
 }
        public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint)
        {
            similar.TryGetValue(codePoint.ToString(), out var resultList);
            IEnumerable <string> result = resultList ?? Enumerable.Empty <string>();

            return(EnumerableExt.OfSingle(new CategoryGrouping <CodePoint>("Similar Kanji",
                                                                           result.Select(r => CodePoint.FromString(r)))));
        }
 public IEnumerable <CodePoint> LookupByRadicals(IEnumerable <CodePoint> radicals)
 {
     return(radkfile.LookupMatching(radicals.Select(r => r.ToString()))
            .OrderBy(r => kanjidict.Lookup(r)
                     .Map(e => e.StrokeCount)
                     .ValueOr(int.MaxValue))
            .Select(cp => CodePoint.FromString(cp)));
 }
 public IEnumerable <CodePoint> LookupRelatedCharacters(CodePoint point)
 {
     return(EnumerableExt.IntersperseSequencesWith(new[]
     {
         kanaProperties.FindSimilar(point),
         confused.FindSimilar(point)
     },
                                                   null as CodePoint));
 }
예제 #8
0
 public WordInfo(string word, PartOfSpeech partOfSpeech = PartOfSpeech.Unknown, string notInflected = null, bool?isIndependent = null, Option <EdictType> type = default(Option <EdictType>), IEnumerable <PartOfSpeechInfo> posInfo = null)
 {
     RawWord    = word;
     CodePoints = new List <CodePoint>(
         word.AsCodePoints().Select(cp => CodePoint.FromInt(cp)));
     EstimatedPartOfSpeech = partOfSpeech;
     NotInflected          = notInflected;
     Independent           = isIndependent;
     Type = type;
     this.PartOfSpeechInfo = posInfo?.ToList() ?? Enumerable.Empty <PartOfSpeechInfo>();
 }
예제 #9
0
        public Option <IEnumerable <CodePoint> > LookupRadicalsByKanji(Kanji kanji)
        {
            if (remapper == null)
            {
                return(kradfile.LookupRadicals(kanji.ToString())
                       .Map(radicals => radicals.Select(cp => CodePoint.FromString(cp))));
            }

            return(remapper
                   .LookupRadicals(kanji.ToString())
                   .Map(radicals => radicals.Select(cp => CodePoint.FromString(cp))));
        }
        public IEnumerable <CodePoint> FindSimilar(CodePoint point)
        {
            similarityGroups.TryGetValue(point, out var listOfSimilar);
            var similar = listOfSimilar ?? Enumerable.Empty <CodePoint>();

            return(similar
                   .Except(Enumerable.Repeat(point, 1))
                   .OrderBy(other =>
            {
                return Math.Abs(position[point.GetType()] - position[other.GetType()]);
            }));
        }
        public Result SelectRadical(IEnumerable<CodePoint> radicals, int sortingCriteriaIndex)
        {
            var result = new List<CodePoint>();
            var possibleRadicals = new KeyValuePair<CodePoint, bool>[radicalCount];
            var key = new Vector<ulong>[elementSize].AsSpan();
            var vec = AsScalarSpan(key);
            foreach (var radical in radicals)
            {
                var radicalIndex = radicalToIndex[radical.Utf32];
                vec[radicalIndex / ulongBitCount] |= (ulong)(1UL << radicalIndex);
            }

            var s = sortingCriteriaIndex;
            var radk = radkinfo[s];
            var target = new Vector<ulong>[radk.Length];
            for (int i = 0; i < kanjiCount; ++i)
            {
                for (int j = 0; j < elementSize; ++j)
                {
                    target[i * elementSize + j] = radk[i * elementSize + j] & key[j];
                }
            }

            var possible = new Vector<ulong>[elementSize].AsSpan();
            for (int i = 0; i < kanjiCount; ++i)
            {
                bool isPresent = true;
                for (int j = 0; j < elementSize; ++j)
                {
                    if (target[i * elementSize + j] != key[j])
                        isPresent = false;
                }

                if (isPresent)
                {
                    result.Add(CodePoint.FromInt(indexToKanji[s][i]));
                    for (int j = 0; j < elementSize; ++j)
                    {
                        possible[j] |= radk[i * elementSize + j];
                    }
                }
            }

            var possibleUlong = AsScalarSpan(possible);
            for (int radicalIndex = 0; radicalIndex < radicalCount; ++radicalIndex)
            {
                var mask = (ulong)(1UL << radicalIndex);
                bool isPresent = (possibleUlong[radicalIndex / ulongBitCount] & mask) == mask;
                possibleRadicals[radicalIndex] = new KeyValuePair<CodePoint, bool>(CodePoint.FromInt(indexToRadical[radicalIndex]), isPresent);
            }
            return new Result(result, possibleRadicals);
        }
 public WordInfo(
     string word,
     PartOfSpeech partOfSpeech       = PartOfSpeech.Unknown,
     string dictionaryForm           = null,
     Option <EdictPartOfSpeech> type = default,
     string reading = null,
     string dictionaryFormReading = null)
 {
     RawWord    = word;
     CodePoints = new List <CodePoint>(
         word.AsCodePoints().Select(cp => CodePoint.FromInt(cp)));
     EstimatedPartOfSpeech = partOfSpeech;
     DictionaryForm        = dictionaryForm;
     Type    = type;
     Reading = reading;
     DictionaryFormReading = dictionaryFormReading;
 }
예제 #13
0
        public IEnumerable <CodePoint> LookupKanjiByRadicals(IEnumerable <CodePoint> radicals, IKanjiOrdering ordering)
        {
            if (remapper == null)
            {
                return(radkfile
                       .LookupMatching(radicals.Select(s => s.ToString()))
                       .Select(r => CodePoint.FromString(r))
                       .OrderBy(x => x, ordering)
                       .ToList());
            }

            return(remapper
                   .LookupKanji(radicals.Select(s => s.ToString()))
                   .Select(r => CodePoint.FromString(r))
                   .OrderBy(x => x, ordering)
                   .ToList());
        }
예제 #14
0
        public IEnumerable <CodePoint> FindSimilar(CodePoint point)
        {
            var oppositeSizedCp = OppositeSizedVersionOf(point.Utf32);
            var oppositeSized   = (oppositeSizedCp != null
                ? Enumerable.Repeat(char.ConvertFromUtf32(oppositeSizedCp.Value), 1)
                : Enumerable.Empty <string>())
                                  .Select(s => CodePoint.FromString(s));

            mapping.TryGetValue(point.ToString(), out var restStr);
            restStr = restStr ?? new List <string>();

            // TOFIX: support combo kana
            var rest = restStr
                       .Where(s => s.Length == 1)
                       .Select(s => CodePoint.FromString(s));

            return(oppositeSized.Concat(rest));
        }
예제 #15
0
        public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint)
        {
            var result = new List <IGrouping <string, CodePoint> >();

            if (hiraganaKatakanaMap.TryGetValue(codePoint.Utf32, out var katakana))
            {
                result.Add(new CategoryGrouping <CodePoint>("Katakana", new[] { CodePoint.FromInt(katakana), }));
            }
            if (hiraganaKatakanaMap.TryGetKey(codePoint.Utf32, out var hiragana))
            {
                result.Add(new CategoryGrouping <CodePoint>("Hiragana", new[] { CodePoint.FromInt(hiragana), }));
            }
            if (smallLargeMap.TryGetValue(codePoint.Utf32, out var large))
            {
                result.Add(new CategoryGrouping <CodePoint>("Large", new [] { CodePoint.FromInt(large), }));
            }
            if (smallLargeMap.TryGetKey(codePoint.Utf32, out var small))
            {
                result.Add(new CategoryGrouping <CodePoint>("Small", new[] { CodePoint.FromInt(small), }));
            }
            if (regularDakutenMap.TryGetValue(codePoint.Utf32, out var dakuten) ||
                (regularHandakutenMap.TryGetKey(codePoint.Utf32, out var r1) &&
                 regularDakutenMap.TryGetValue(r1, out dakuten)))
            {
                result.Add(new CategoryGrouping <CodePoint>("Dakuten", new [] { CodePoint.FromInt(dakuten), }));
            }
            if (regularHandakutenMap.TryGetValue(codePoint.Utf32, out var handakuten) ||
                (regularDakutenMap.TryGetKey(codePoint.Utf32, out var r2) &&
                 regularHandakutenMap.TryGetValue(r2, out handakuten)))
            {
                result.Add(new CategoryGrouping <CodePoint>("Handakuten", new[] { CodePoint.FromInt(handakuten), }));
            }
            if (regularDakutenMap.TryGetKey(codePoint.Utf32, out var regular) ||
                regularHandakutenMap.TryGetKey(codePoint.Utf32, out regular))
            {
                result.Add(new CategoryGrouping <CodePoint>("Regular", new[] { CodePoint.FromInt(regular), }));
            }

            return(result);
        }
 public Radical(CodePoint cp, int strokeCount)
 {
     CodePoint   = cp;
     StrokeCount = strokeCount;
 }
예제 #17
0
        public static Func <string, bool> CreateMatcher(
            this ILanguageService lang,
            IReadOnlyDictionary <CodePoint, KanjiPlaceholder> haystack,
            string template)
        {
            var privateUseAreaMatch = new Regex(@"\p{Co}");
            var regex = new Regex("^" + privateUseAreaMatch.Replace(Regex.Escape(template).Replace(@"/\\", "."), ".") + "$");

            return(word => regex.IsMatch(word) && KanjiPlaceholdersMatch(template, word));

            bool KanjiPlaceholdersMatch(string t, string c)
            {
                t = t.Replace(@"/\\", ".");
                foreach (var(templateChar, concreteChar) in t.AsCodePoints().Zip(c.AsCodePoints(),
                                                                                 (l, r) => (CodePoint.FromInt(l), CodePoint.FromInt(r))))
                {
                    if (!haystack.TryGetValue(templateChar, out var placeholder))
                    {
                        continue;
                    }

                    var concreteRadicals = new HashSet <CodePoint>(concreteChar is Kanji k
                        ? lang.LookupRadicals(k).ValueOr(Enumerable.Empty <CodePoint>())
                        : Enumerable.Empty <CodePoint>());
                    if (placeholder.Radicals.All(templateRadical =>
                                                 concreteRadicals.Contains(templateRadical.CodePoint)))
                    {
                        continue;
                    }
                    else
                    {
                        return(false);
                    }
                }

                return(true);
            }
        }
예제 #18
0
 public int Compare(CodePoint x, CodePoint y)
 {
     return(comparer.Compare(x, y));
 }
        public KanjiRadicalLookup(IEnumerable<Radkfile.Entry> entries, KanjiDict kanjiDict)
        {
            SortingCriteria = new ReadOnlyListWithSelector<IKanjiOrdering>(new IKanjiOrdering[]
            {
                KanjiOrdering.Create("Sort by stroke count", kanjiDict, x => x.StrokeCount),
                KanjiOrdering.Create("Sort by frequency", kanjiDict, x => x.FrequencyRating)
            });
            SortingCriteria.SelectedIndex = 0;
            var entryList = entries.ToList();
            radicalCount = entryList.Count;
            elementSize = DivideRoundUp(radicalCount, vectorBitCount);
            elementSize = elementSize == 0 ? 1 : elementSize;

            var kradMapping = entryList
                .ToDictionary(entry => entry.Radical.CodePoint, entry => entry.KanjiCodePoints.AsEnumerable())
                .InvertMappingToSequence();

            var kanjiCodePoints = entryList
                .SelectMany(entry => entry.KanjiCodePoints)
                .Distinct()
                .ToArray();
            kanjiCount = kanjiCodePoints.Length;

            indexToKanji = SortingCriteria
                .Select(sortingCriterion => kanjiCodePoints
                    .OrderBy(x => x, Comparer<int>.Create((l, r) => sortingCriterion.Compare(
                        CodePoint.FromInt(l),
                        CodePoint.FromInt(r))))
                    .ToArray())
                .ToArray();

            indexToRadical = entryList
                .Select(entry => entry.Radical.CodePoint)
                .ToArray();

            radicalToIndex = indexToRadical
                .Indexed()
                .ToDictionary(p => p.element, p => p.index);

            var kanjiToIndex = indexToKanji
                .Select(a => a
                    .Indexed()
                    .ToDictionary(p => p.element, p => p.index))
                .ToArray();

            radkinfo = Enumerable.Range(0, SortingCriteria.Count)
                .Select(CreateRadkInfo)
                .ToArray();

            Vector<ulong>[] CreateRadkInfo(int x)
            {
                var r = new Vector<ulong>[kanjiCount * elementSize];
                foreach (var kanji in kanjiCodePoints)
                {
                    var v = new Vector<ulong>[elementSize];
                    var kanjiIndex = kanjiToIndex[x][kanji];
                    var vec = AsScalarSpan(v);
                    foreach (var radical in kradMapping[kanji])
                    {
                        var radicalIndex = radicalToIndex[radical];
                        vec[radicalIndex / ulongBitCount] |= (ulong)(1UL << radicalIndex);
                    }

                    for (int i = 0; i < elementSize; ++i)
                    {
                        r[kanjiIndex * elementSize + i] = v[i];
                    }
                }

                return r;
            }
        }
예제 #20
0
 public CodePoint LookupCharacter(string character, int position = 0)
 {
     return(CodePoint.FromString(character, position));
 }
예제 #21
0
 public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint)
 {
     return(relatedProviders.SelectMany(r => r.FindRelated(codePoint)));
 }
예제 #22
0
 public CodePoint LookupCharacter(int codePoint)
 {
     return(CodePoint.FromInt(codePoint));
 }
예제 #23
0
 public IEnumerable <CodePoint> FindSimilar(CodePoint point)
 {
     return(Enumerable.Empty <CodePoint>());
 }