Beispiel #1
0
 public static IEnumerable <string> GetFalsePositives()
 {
     return(File.ReadLines(@"C:\temp\ID160-false-positives.txt")
            .Where(w => !SampleSplitter.PreExcludeValue(w))
            // .Distinct(StringComparer.Ordinal)
            .ToArray());
 }
Beispiel #2
0
 public static IEnumerable <string> GetWords()
 {
     return(File.ReadLines(@"c:\temp\words.txt")
            .Where(w => !SampleSplitter.PreExcludeValue(w))
            // .Distinct(StringComparer.Ordinal)
            .ToArray());
 }
Beispiel #3
0
 private static IEnumerable <string> GetWordsPreExcluded()
 {
     return(File.ReadLines(@"c:\temp\words.txt")
            .Where(w => !SampleSplitter.PreExcludeValue(w))
            // .Distinct()
            .ToArray());
 }
        private void CalculateModel()
        {
            sampleSplitter = new SampleSplitter(samples, NumOfPhases);

            modelList = new List<List<double>>();

            foreach(var sampleList in sampleSplitter)
            {
                modelList.Add(Regression.GetModel(sampleList));
            }
        }
Beispiel #5
0
        public void RegexFinder()
        {
            var fakes = DataGenerator.FakeWords.Value
                        .Where(w => !SampleSplitter.PreExcludeValue(w))
                        .ToArray();

            var words = GetWords()
                        .Where(w => !SampleSplitter.PreExcludeValue(w))
                        .ToArray();

            var path = @"c:/temp/hc-regex-bruteforce.txt";
            // File.Create(path).Dispose();
            var lines = File.ReadAllLines(path).ToList();
            var done  = lines
                        .Select(line => line.Split(';')[0])
                        .ToList();

            //lines = done
            //	.Select(pattern => EvalRegex(fakes, words, pattern))
            //	.ToList();

            var random   = new Random();
            var i        = 16;
            var newLines = Enumerable.Range(0, i)
                           .AsParallel()
                           .Select(_ =>
            {
                var pattern = GetRandomPattern(done, random);
                var result  = EvalRegex(fakes, words, pattern);
                return(result);
            })
                           .ToArray();

            lines.AddRange(newLines);

            lines = lines
                    .Select(line => line.Split(';'))
                    .Select(line => new
            {
                p         = String.Join(";", line),
                fakeRatio = double.Parse(line[1].Slice(0, -1)),
                wordRatio = double.Parse(line[2].Slice(0, -1)),
                boost     = double.Parse(line[3].Slice(0, -1)),
                diffRatio = double.Parse(line[4].Slice(0, -1)),
            })
                    .OrderByDescending(x => x.diffRatio * Math.Max(x.boost - 0.7, 0))
                    .Select(x => x.p)
                    .ToList();

            File.WriteAllLines(path, lines);
        }
Beispiel #6
0
        public void OutputMostFreq2Letters()
        {
            var samples = GetWordsPreExcluded()
                          .Select(w => w.Substring(0, Math.Min(6, w.Length)))
                          .SelectMany(w => SampleSplitter.SplitSamples(w, 2))
                          .ToArray()
                          .GroupBy(s => s)
                          .Select(s => new { Sample = s.Key, Count = s.Count() })
                          .ToArray()
                          .OrderByDescending(s => s.Count)
                          //.Take(26)
                          .Take(96)
                          .ToArray();

            Dump(samples.Select(s => s.Sample));
        }