Beispiel #1
0
        public Dictionary <string, string> GetAffixReplaceMap(string[] words, string fileName, double minRatio)
        {
            var items = jsonSerializer.Deserialize <SubstData[]>(File.ReadAllText(Path.Combine(dir, fileName)));

            items = items
                    .Where(i => i.Ok >= i.Fail * minRatio)
                    .GroupBy(i => i.SuffixFrom).Select(gr =>
            {
                var sameFromItems = gr.ToArray();
                if (sameFromItems.Length == 1)
                {
                    return(sameFromItems[0]);
                }

                var bestByCount = sameFromItems.BestElementByCompare((x, y) => (x.Ok > y.Ok) || (x.Ok == y.Ok && x.Fail < y.Fail));

                if (bestByCount.Fail > sameFromItems.Min(i => i.Fail))
                {
                    throw new InvalidOperationException(jsonSerializer.SerializeUserFriendly(sameFromItems));
                }

                return(bestByCount);
            }).ToArray();

            var totalLen = items.Length + items.Sum(i => i.SuffixFrom.Length + i.SuffixTo.Length);

            Console.WriteLine($"{fileName}, min ratio {minRatio}, loaded {items.Length} affixes to replace, length {totalLen}");

            //foreach (var item in items)
            //	Console.WriteLine(item);

            return(items.ToDictionary(i => i.SuffixFrom, i => i.SuffixTo));
        }
        private static void SaveC3StatsCount()
        {
            var c2strings = Alphabet.SelectMany(c => Alphabet.Select(cc => c.ToString() + cc).ToArray()).ToArray();
            var c3strings = Alphabet.SelectMany(c => c2strings.Select(cc => c + cc)).ToArray();

            var c3stats = c3strings.Select(c3 =>
            {
                Console.WriteLine(c3);
                return(new PartStat {
                    Part = c3, Count = CountOfWordsContaining(words, c3)
                });
            }).ToArray();

            File.WriteAllText(Path.Combine(dir, "c3.json"), jsonSerializer.SerializeUserFriendly(c3stats));
        }