public IEnumerable <Radical> AllRadicals()
 {
     return(radkfile.Radicals
            .Select(rad => (codePoint: CodePoint.FromInt(rad.CodePoint), strokeCount: rad.StrokeCount))
            .Select(p => new Radical(p.codePoint, p.strokeCount))
            .OrderBy(r => r.StrokeCount));
 }
 public static EasilyConfusedKana FromFile(string path)
 {
     return(new EasilyConfusedKana(
                File.ReadLines(path, Encoding.UTF8)
                .Where(line => !line.StartsWith("#"))
                .Select(line => line.AsCodePoints().Select(cp => CodePoint.FromInt(cp)))));
 }
Exemplo n.º 3
0
 public WordInfo(string word, PartOfSpeech partOfSpeech = PartOfSpeech.Unknown, string notInflected = null, bool?isIndependent = null, Option <EdictType> type = default(Option <EdictType>), IEnumerable <PartOfSpeechInfo> posInfo = null)
 {
     RawWord    = word;
     CodePoints = new List <CodePoint>(
         word.AsCodePoints().Select(cp => CodePoint.FromInt(cp)));
     EstimatedPartOfSpeech = partOfSpeech;
     NotInflected          = notInflected;
     Independent           = isIndependent;
     Type = type;
     this.PartOfSpeechInfo = posInfo?.ToList() ?? Enumerable.Empty <PartOfSpeechInfo>();
 }
        public Result SelectRadical(IEnumerable<CodePoint> radicals, int sortingCriteriaIndex)
        {
            var result = new List<CodePoint>();
            var possibleRadicals = new KeyValuePair<CodePoint, bool>[radicalCount];
            var key = new Vector<ulong>[elementSize].AsSpan();
            var vec = AsScalarSpan(key);
            foreach (var radical in radicals)
            {
                var radicalIndex = radicalToIndex[radical.Utf32];
                vec[radicalIndex / ulongBitCount] |= (ulong)(1UL << radicalIndex);
            }

            var s = sortingCriteriaIndex;
            var radk = radkinfo[s];
            var target = new Vector<ulong>[radk.Length];
            for (int i = 0; i < kanjiCount; ++i)
            {
                for (int j = 0; j < elementSize; ++j)
                {
                    target[i * elementSize + j] = radk[i * elementSize + j] & key[j];
                }
            }

            var possible = new Vector<ulong>[elementSize].AsSpan();
            for (int i = 0; i < kanjiCount; ++i)
            {
                bool isPresent = true;
                for (int j = 0; j < elementSize; ++j)
                {
                    if (target[i * elementSize + j] != key[j])
                        isPresent = false;
                }

                if (isPresent)
                {
                    result.Add(CodePoint.FromInt(indexToKanji[s][i]));
                    for (int j = 0; j < elementSize; ++j)
                    {
                        possible[j] |= radk[i * elementSize + j];
                    }
                }
            }

            var possibleUlong = AsScalarSpan(possible);
            for (int radicalIndex = 0; radicalIndex < radicalCount; ++radicalIndex)
            {
                var mask = (ulong)(1UL << radicalIndex);
                bool isPresent = (possibleUlong[radicalIndex / ulongBitCount] & mask) == mask;
                possibleRadicals[radicalIndex] = new KeyValuePair<CodePoint, bool>(CodePoint.FromInt(indexToRadical[radicalIndex]), isPresent);
            }
            return new Result(result, possibleRadicals);
        }
 public WordInfo(
     string word,
     PartOfSpeech partOfSpeech       = PartOfSpeech.Unknown,
     string dictionaryForm           = null,
     Option <EdictPartOfSpeech> type = default,
     string reading = null,
     string dictionaryFormReading = null)
 {
     RawWord    = word;
     CodePoints = new List <CodePoint>(
         word.AsCodePoints().Select(cp => CodePoint.FromInt(cp)));
     EstimatedPartOfSpeech = partOfSpeech;
     DictionaryForm        = dictionaryForm;
     Type    = type;
     Reading = reading;
     DictionaryFormReading = dictionaryFormReading;
 }
Exemplo n.º 6
0
        public IEnumerable <IGrouping <string, CodePoint> > FindRelated(CodePoint codePoint)
        {
            var result = new List <IGrouping <string, CodePoint> >();

            if (hiraganaKatakanaMap.TryGetValue(codePoint.Utf32, out var katakana))
            {
                result.Add(new CategoryGrouping <CodePoint>("Katakana", new[] { CodePoint.FromInt(katakana), }));
            }
            if (hiraganaKatakanaMap.TryGetKey(codePoint.Utf32, out var hiragana))
            {
                result.Add(new CategoryGrouping <CodePoint>("Hiragana", new[] { CodePoint.FromInt(hiragana), }));
            }
            if (smallLargeMap.TryGetValue(codePoint.Utf32, out var large))
            {
                result.Add(new CategoryGrouping <CodePoint>("Large", new [] { CodePoint.FromInt(large), }));
            }
            if (smallLargeMap.TryGetKey(codePoint.Utf32, out var small))
            {
                result.Add(new CategoryGrouping <CodePoint>("Small", new[] { CodePoint.FromInt(small), }));
            }
            if (regularDakutenMap.TryGetValue(codePoint.Utf32, out var dakuten) ||
                (regularHandakutenMap.TryGetKey(codePoint.Utf32, out var r1) &&
                 regularDakutenMap.TryGetValue(r1, out dakuten)))
            {
                result.Add(new CategoryGrouping <CodePoint>("Dakuten", new [] { CodePoint.FromInt(dakuten), }));
            }
            if (regularHandakutenMap.TryGetValue(codePoint.Utf32, out var handakuten) ||
                (regularDakutenMap.TryGetKey(codePoint.Utf32, out var r2) &&
                 regularHandakutenMap.TryGetValue(r2, out handakuten)))
            {
                result.Add(new CategoryGrouping <CodePoint>("Handakuten", new[] { CodePoint.FromInt(handakuten), }));
            }
            if (regularDakutenMap.TryGetKey(codePoint.Utf32, out var regular) ||
                regularHandakutenMap.TryGetKey(codePoint.Utf32, out regular))
            {
                result.Add(new CategoryGrouping <CodePoint>("Regular", new[] { CodePoint.FromInt(regular), }));
            }

            return(result);
        }
 public CodePoint LookupCharacter(int codePoint)
 {
     return(CodePoint.FromInt(codePoint));
 }
Exemplo n.º 8
0
        public static Func <string, bool> CreateMatcher(
            this ILanguageService lang,
            IReadOnlyDictionary <CodePoint, KanjiPlaceholder> haystack,
            string template)
        {
            var privateUseAreaMatch = new Regex(@"\p{Co}");
            var regex = new Regex("^" + privateUseAreaMatch.Replace(Regex.Escape(template).Replace(@"/\\", "."), ".") + "$");

            return(word => regex.IsMatch(word) && KanjiPlaceholdersMatch(template, word));

            bool KanjiPlaceholdersMatch(string t, string c)
            {
                t = t.Replace(@"/\\", ".");
                foreach (var(templateChar, concreteChar) in t.AsCodePoints().Zip(c.AsCodePoints(),
                                                                                 (l, r) => (CodePoint.FromInt(l), CodePoint.FromInt(r))))
                {
                    if (!haystack.TryGetValue(templateChar, out var placeholder))
                    {
                        continue;
                    }

                    var concreteRadicals = new HashSet <CodePoint>(concreteChar is Kanji k
                        ? lang.LookupRadicals(k).ValueOr(Enumerable.Empty <CodePoint>())
                        : Enumerable.Empty <CodePoint>());
                    if (placeholder.Radicals.All(templateRadical =>
                                                 concreteRadicals.Contains(templateRadical.CodePoint)))
                    {
                        continue;
                    }
                    else
                    {
                        return(false);
                    }
                }

                return(true);
            }
        }
        public KanjiRadicalLookup(IEnumerable<Radkfile.Entry> entries, KanjiDict kanjiDict)
        {
            SortingCriteria = new ReadOnlyListWithSelector<IKanjiOrdering>(new IKanjiOrdering[]
            {
                KanjiOrdering.Create("Sort by stroke count", kanjiDict, x => x.StrokeCount),
                KanjiOrdering.Create("Sort by frequency", kanjiDict, x => x.FrequencyRating)
            });
            SortingCriteria.SelectedIndex = 0;
            var entryList = entries.ToList();
            radicalCount = entryList.Count;
            elementSize = DivideRoundUp(radicalCount, vectorBitCount);
            elementSize = elementSize == 0 ? 1 : elementSize;

            var kradMapping = entryList
                .ToDictionary(entry => entry.Radical.CodePoint, entry => entry.KanjiCodePoints.AsEnumerable())
                .InvertMappingToSequence();

            var kanjiCodePoints = entryList
                .SelectMany(entry => entry.KanjiCodePoints)
                .Distinct()
                .ToArray();
            kanjiCount = kanjiCodePoints.Length;

            indexToKanji = SortingCriteria
                .Select(sortingCriterion => kanjiCodePoints
                    .OrderBy(x => x, Comparer<int>.Create((l, r) => sortingCriterion.Compare(
                        CodePoint.FromInt(l),
                        CodePoint.FromInt(r))))
                    .ToArray())
                .ToArray();

            indexToRadical = entryList
                .Select(entry => entry.Radical.CodePoint)
                .ToArray();

            radicalToIndex = indexToRadical
                .Indexed()
                .ToDictionary(p => p.element, p => p.index);

            var kanjiToIndex = indexToKanji
                .Select(a => a
                    .Indexed()
                    .ToDictionary(p => p.element, p => p.index))
                .ToArray();

            radkinfo = Enumerable.Range(0, SortingCriteria.Count)
                .Select(CreateRadkInfo)
                .ToArray();

            Vector<ulong>[] CreateRadkInfo(int x)
            {
                var r = new Vector<ulong>[kanjiCount * elementSize];
                foreach (var kanji in kanjiCodePoints)
                {
                    var v = new Vector<ulong>[elementSize];
                    var kanjiIndex = kanjiToIndex[x][kanji];
                    var vec = AsScalarSpan(v);
                    foreach (var radical in kradMapping[kanji])
                    {
                        var radicalIndex = radicalToIndex[radical];
                        vec[radicalIndex / ulongBitCount] |= (ulong)(1UL << radicalIndex);
                    }

                    for (int i = 0; i < elementSize; ++i)
                    {
                        r[kanjiIndex * elementSize + i] = v[i];
                    }
                }

                return r;
            }
        }