Beispiel #1
0
        private static HashSet <string> GetExcludedSamples(SampleExclusionParameters parameters)
        {
            if (parameters.Disabled)
            {
                return(new HashSet <string>(StringComparer.Ordinal));
            }

            var existingSamples = GetSamples(
                parameters.SampleSize,
                parameters.BeginAnchor,
                parameters.EndAnchor,
                parameters.GetMinWordSize(),
                parameters.StartIndex,
                parameters.Length);
            var possibleSamples = GetAllPossibleSamples(parameters.SampleSize, parameters.BeginAnchor, parameters.EndAnchor);

            foreach (var s in possibleSamples)
            {
                if (!existingSamples.ContainsKey(s))
                {
                    existingSamples.Add(s, 0);
                }
            }

            var excludedSamples = new HashSet <string>(
                existingSamples
                .Where(d => d.Value <= parameters.MaxCount)
                .Select(kv => kv.Key),
                StringComparer.Ordinal);

            return(excludedSamples);
        }
        private static bool MatchSampleExclusion(string value, HashSet <string> data, SampleExclusionParameters parameters)
        {
            if (parameters.Disabled)
            {
                return(true);
            }

            var exclusionParameters = parameters;
            var wordSamples         = SampleSplitter.SplitSamples(value,
                                                                  exclusionParameters.SampleSize,
                                                                  exclusionParameters.BeginAnchor,
                                                                  exclusionParameters.EndAnchor,
                                                                  exclusionParameters.GetMinWordSize(),
                                                                  exclusionParameters.StartIndex,
                                                                  exclusionParameters.Length);

            foreach (var sample in wordSamples)
            {
                if (data.Contains(sample))
                {
                    return(false);
                }
            }

            return(true);
        }