private static HashSet <string> GetExcludedSamples(SampleExclusionParameters parameters) { if (parameters.Disabled) { return(new HashSet <string>(StringComparer.Ordinal)); } var existingSamples = GetSamples( parameters.SampleSize, parameters.BeginAnchor, parameters.EndAnchor, parameters.GetMinWordSize(), parameters.StartIndex, parameters.Length); var possibleSamples = GetAllPossibleSamples(parameters.SampleSize, parameters.BeginAnchor, parameters.EndAnchor); foreach (var s in possibleSamples) { if (!existingSamples.ContainsKey(s)) { existingSamples.Add(s, 0); } } var excludedSamples = new HashSet <string>( existingSamples .Where(d => d.Value <= parameters.MaxCount) .Select(kv => kv.Key), StringComparer.Ordinal); return(excludedSamples); }
private static bool MatchSampleExclusion(string value, HashSet <string> data, SampleExclusionParameters parameters) { if (parameters.Disabled) { return(true); } var exclusionParameters = parameters; var wordSamples = SampleSplitter.SplitSamples(value, exclusionParameters.SampleSize, exclusionParameters.BeginAnchor, exclusionParameters.EndAnchor, exclusionParameters.GetMinWordSize(), exclusionParameters.StartIndex, exclusionParameters.Length); foreach (var sample in wordSamples) { if (data.Contains(sample)) { return(false); } } return(true); }