public Dictionary <string, string> GetAffixReplaceMap(string[] words, string fileName, double minRatio) { var items = jsonSerializer.Deserialize <SubstData[]>(File.ReadAllText(Path.Combine(dir, fileName))); items = items .Where(i => i.Ok >= i.Fail * minRatio) .GroupBy(i => i.SuffixFrom).Select(gr => { var sameFromItems = gr.ToArray(); if (sameFromItems.Length == 1) { return(sameFromItems[0]); } var bestByCount = sameFromItems.BestElementByCompare((x, y) => (x.Ok > y.Ok) || (x.Ok == y.Ok && x.Fail < y.Fail)); if (bestByCount.Fail > sameFromItems.Min(i => i.Fail)) { throw new InvalidOperationException(jsonSerializer.SerializeUserFriendly(sameFromItems)); } return(bestByCount); }).ToArray(); var totalLen = items.Length + items.Sum(i => i.SuffixFrom.Length + i.SuffixTo.Length); Console.WriteLine($"{fileName}, min ratio {minRatio}, loaded {items.Length} affixes to replace, length {totalLen}"); //foreach (var item in items) // Console.WriteLine(item); return(items.ToDictionary(i => i.SuffixFrom, i => i.SuffixTo)); }
public static void CalcWordsCount(JsonSerializerMaster jsonSerializer, int requestCount) { var words = 0; var notWords = 0; for (var i = 0; i < requestCount; i++) { using (var web = new WebClient()) { web.BaseAddress = baseUrl; var json = web.DownloadString(""); var data = jsonSerializer.Deserialize <Dictionary <string, bool> >(json); var wordsHere = data.Count(kvp => kvp.Value); var notWordsHere = data.Count - wordsHere; words += wordsHere; notWords += notWordsHere; Console.WriteLine($"[{i}]: words {wordsHere}, not words {notWordsHere}"); } } Console.WriteLine(new { words, notWords, wordsMean = (double)words / (words + notWords) }); }
public static WordFeatureRow[] Read(JsonSerializerMaster jsonSerializer, string dir) { return(jsonSerializer.Deserialize <WordFeatureRow[]>(File.ReadAllText(Path.Combine(dir, "word_features.json")))); }
private static PartStat[] LoadStatsCount(string file) { var json = File.ReadAllText(Path.Combine(dir, file)); return(jsonSerializer.Deserialize <PartStat[]>(json)); }