private static void BloomTrivial() { Console.WriteLine("Bloom trivial"); var filter = new BloomFilter(60000 * 8, 1); foreach (var s in words) { filter.Add(s); } CalcErrors(filter.Contains); }
private static void ApostropheEnd_Func_Bloom(Func <WordFeatureRow, bool> isWord_Word, Func <WordFeatureRow, bool> isUnknown_Word, int bloomFilterSize) { var n = 0; var filter = new BloomFilter(bloomFilterSize, 1); //var filter = new ArrayBloomFilter(6000, 60000 * 8, 2); foreach (var feature in wordFeatures) { filter.Add(feature.Word); n++; } filter.ShowInfo(); solutionCreator.WriteData(filter); var negativeStats = new WeightedSet <string>(); CalcErrors(word => { var w = GetApostropheEndBase(word); w = substCalc.SubstituteWordByCommands(w); if (!filter.Contains(w)) { negativeStats.Increment("!filter.Contains", 1); return(false); } var feature = new WordFeatureRow(w); if (!isWord_Word(feature)) { negativeStats.Increment("!isWord_Word", 1); return(false); } if (!isUnknown_Word(feature)) { negativeStats.Increment("!isUnknown_Word", 1); return(false); } return(true); }); Console.WriteLine("Negative stats:"); foreach (var kvp in negativeStats) { Console.WriteLine(new { reason = kvp.Key, count = kvp.Value }); } Console.WriteLine(); }
private static void BloomAndApostropheS() { Console.WriteLine("Bloom and 's trick"); var filter = new BloomFilter(60000 * 8, 1); var n = 0; foreach (var s in words) { if (!s.EndsWith("'s")) { filter.Add(s); n++; } } Console.WriteLine($"Bloom filter contains {n} words"); CalcErrors(w => w.EndsWith("'s") ? filter.Contains(w.Substring(0, w.Length - 2)) : filter.Contains(w)); }
private static void ApostropheEnd_C2_LetterPositions_BloomPrefix_Func_Bloom(Func <WordFeatureRow, bool> isWord_Word) { const int infixFreqsThreshold = 15; // Вычитание невозможных (и маловероятных) подстрок из 2х символов var notWordsC2strings = infixCalc.CalcInfixFreqs(wordFeatures, infixFreqsThreshold); solutionCreator.NotWordsC2strings = notWordsC2strings; // Вычитание невозможных символов на конкретных позициях var notPresentLetters = Enumerable.Range(0, wordFeatures.Max(f => f.Length)) .Select(i => wordFeatures.Where(f => f.Word.Length > i).Select(f => f.Word[i]).Distinct().ToArray()) .Select(a => new string(Alphabet.Except(a).ToArray())) .ToArray(); for (var i = 0; i < notPresentLetters.Length; i++) { if (notPresentLetters[i].Length > 0) { Console.WriteLine($"On position {i} can't use letters {notPresentLetters[i]}"); } } // Блюм фильтр возможных подстрок var substrBloomStarts = new[] { 0 }; var substrBloomCutLength = new[] { 3 }; var substrBloomMinLength = new[] { 4 }; var substrBlooms = Enumerable.Range(0, substrBloomStarts.Length) .Select(i => { var start = substrBloomStarts[i]; var minLength = substrBloomMinLength[i]; var cutLength = substrBloomCutLength[i]; var substrBloom = new BloomFilter(1000 * 8, 1); var substrs = wordFeatures.Where(f => f.Length >= minLength) .Select(f => start < 0 ? f.Word.Substring(f.Word.Length - cutLength, cutLength) : f.Word.Substring(start, cutLength)) .Distinct().ToArray(); foreach (var substr in substrs) { substrBloom.Add(substr); } substrBloom.ShowInfo(); return(substrBloom); }).ToArray(); solutionCreator.PrefixFilter = substrBlooms[0]; ApostropheEnd_Func_Bloom( isWord_Word, w => { if (w.C2Parts.Any(c2 => notWordsC2strings.Contains(c2))) { return(false); } var word = w.Word; for (var i = 0; i < word.Length; i++) { if (notPresentLetters[i].Contains(word[i])) { return(false); } } for (var i = 0; i < substrBloomStarts.Length; i++) { var start = substrBloomStarts[i]; var minLength = substrBloomMinLength[i]; var cutLength = substrBloomCutLength[i]; if (word.Length >= minLength && !substrBlooms[i].Contains(start < 0 ? word.Substring(word.Length - cutLength, cutLength) : word.Substring(start, cutLength))) { return(false); } } return(true); }, mainBloomLength * 8); }