public static IEnumerable <string> GetFalsePositives() { return(File.ReadLines(@"C:\temp\ID160-false-positives.txt") .Where(w => !SampleSplitter.PreExcludeValue(w)) // .Distinct(StringComparer.Ordinal) .ToArray()); }
public static IEnumerable <string> GetWords() { return(File.ReadLines(@"c:\temp\words.txt") .Where(w => !SampleSplitter.PreExcludeValue(w)) // .Distinct(StringComparer.Ordinal) .ToArray()); }
private static IEnumerable <string> GetWordsPreExcluded() { return(File.ReadLines(@"c:\temp\words.txt") .Where(w => !SampleSplitter.PreExcludeValue(w)) // .Distinct() .ToArray()); }
private void CalculateModel() { sampleSplitter = new SampleSplitter(samples, NumOfPhases); modelList = new List<List<double>>(); foreach(var sampleList in sampleSplitter) { modelList.Add(Regression.GetModel(sampleList)); } }
public void RegexFinder() { var fakes = DataGenerator.FakeWords.Value .Where(w => !SampleSplitter.PreExcludeValue(w)) .ToArray(); var words = GetWords() .Where(w => !SampleSplitter.PreExcludeValue(w)) .ToArray(); var path = @"c:/temp/hc-regex-bruteforce.txt"; // File.Create(path).Dispose(); var lines = File.ReadAllLines(path).ToList(); var done = lines .Select(line => line.Split(';')[0]) .ToList(); //lines = done // .Select(pattern => EvalRegex(fakes, words, pattern)) // .ToList(); var random = new Random(); var i = 16; var newLines = Enumerable.Range(0, i) .AsParallel() .Select(_ => { var pattern = GetRandomPattern(done, random); var result = EvalRegex(fakes, words, pattern); return(result); }) .ToArray(); lines.AddRange(newLines); lines = lines .Select(line => line.Split(';')) .Select(line => new { p = String.Join(";", line), fakeRatio = double.Parse(line[1].Slice(0, -1)), wordRatio = double.Parse(line[2].Slice(0, -1)), boost = double.Parse(line[3].Slice(0, -1)), diffRatio = double.Parse(line[4].Slice(0, -1)), }) .OrderByDescending(x => x.diffRatio * Math.Max(x.boost - 0.7, 0)) .Select(x => x.p) .ToList(); File.WriteAllLines(path, lines); }
public void OutputMostFreq2Letters() { var samples = GetWordsPreExcluded() .Select(w => w.Substring(0, Math.Min(6, w.Length))) .SelectMany(w => SampleSplitter.SplitSamples(w, 2)) .ToArray() .GroupBy(s => s) .Select(s => new { Sample = s.Key, Count = s.Count() }) .ToArray() .OrderByDescending(s => s.Count) //.Take(26) .Take(96) .ToArray(); Dump(samples.Select(s => s.Sample)); }