Пример #1
0
        private static FileInfo CreateFullBlob(GenerationParameters parameters, FileInfo binaryData, FileInfo txtData, string dataPath)
        {
            var filePath = Path.Combine(dataPath, "data");

            if (File.Exists(filePath))
            {
                File.Delete(filePath);
            }

            var binaryDataLength = binaryData.Exists ? binaryData.Length : 0L;
            var jsBlobPath       = GetJsBlobPath(parameters, txtData);

            var javascriptBlob = File.ReadAllText(jsBlobPath)
                                 .Replace(DATATXT_SIZE_PLACEHOLDER, txtData.Length.ToString())
                                 .Replace(DATABIN_SIZE_PLACEHOLDER, binaryDataLength.ToString());

            using (var stream = File.OpenWrite(filePath))
                using (var dataStream = File.OpenRead(txtData.FullName))
                    using (var writer = new StreamWriter(stream, Encoding.ASCII))
                    {
                        if (binaryData.Exists)
                        {
                            using (var binaryStream = File.OpenRead(binaryData.FullName))
                            {
                                binaryStream.CopyTo(stream);
                            }
                        }

                        dataStream.CopyTo(stream);
                        writer.Write(javascriptBlob);
                    }

            return(new FileInfo(filePath));
        }
Пример #2
0
        public GeneratedData Generate(GenerationParameters generationParameters)
        {
            using (new Timer("DataGenerator.Generate"))
            {
                var excludedSamples1 = GetExcludedSamples(generationParameters.SampleExclusion_1);
                var excludedSamples2 = GetExcludedSamples(generationParameters.SampleExclusion_2);
                var excludedSamples3 = GetExcludedSamples(generationParameters.SampleExclusion_3);

                var excludedPairs1 = GetExcludedPairs(generationParameters.PairExclusion_1);
                var excludedPairs2 = GetExcludedPairs(generationParameters.PairExclusion_2);

                var data = new GeneratedData
                {
                    Parameters        = generationParameters,
                    ExcludedSamples_1 = excludedSamples1,
                    ExcludedSamples_2 = excludedSamples2,
                    ExcludedSamples_3 = excludedSamples3,
                    ExcludedPairs_1   = excludedPairs1,
                    ExcludedPairs_2   = excludedPairs2,
                };

                RemoveRedundantData(data);

                BuildBloomFilter(generationParameters.BloomFilter, data);

                return(data);
            }
        }
Пример #3
0
 private static string GetJsBlobPath(GenerationParameters parameters, FileInfo txtData)
 {
     if (parameters.IsBloomFilterOnly())
     {
         return(Settings.JavascriptBlobBloomFilterOnlyPath);
     }
     // if (txtData.Name.EndsWith(".txt")) return Settings.JavascriptBlobNoBZ2Path;
     return(Settings.JavascriptBlobPath);
 }
 public TestResult Test(GenerationParameters generationParameters, string dataTxtPath, string dataBinPath)
 {
     using (new Timer("TestExecutor.Test"))
     {
         var serializer = new DataSerializer();
         var data       = serializer.Deserialize(generationParameters, dataTxtPath, dataBinPath);
         return(Test(data));
     }
 }
        public static bool Match(GeneratedData data, string value, GenerationParameters parameters, MatchCounter counter, bool skipBloomFilter = false)
        {
            if (SampleSplitter.PreExcludeValue(value))
            {
                // if (!skipBloomFilter) Console.WriteLine("preex: " + value);
                counter.PreExclusion++;
                return(false);
            }

            if (!MatchSampleExclusion(value, data.ExcludedSamples_1, parameters.SampleExclusion_1))
            {
                counter.SampleExclusion_1++;
                return(false);
            }

            if (!MatchSampleExclusion(value, data.ExcludedSamples_2, parameters.SampleExclusion_2))
            {
                // if (!skipBloomFilter) Console.WriteLine("se: " + value);
                counter.SampleExclusion_2++;
                return(false);
            }

            if (!MatchSampleExclusion(value, data.ExcludedSamples_3, parameters.SampleExclusion_3))
            {
                counter.SampleExclusion_3++;
                return(false);
            }

            if (!MatchPairExclusion(value, data.ExcludedPairs_1, parameters.PairExclusion_1))
            {
                counter.PairExclusion++;
                return(false);
            }

            if (!MatchPairExclusion(value, data.ExcludedPairs_2, parameters.PairExclusion_2))
            {
                //if (!skipBloomFilter) Console.WriteLine("pe: " + value);
                counter.PairExclusion2++;
                return(false);
            }

            if (!skipBloomFilter && !MatchBloomFilter(value, data.BloomFilter, data.Parameters.BloomFilter))
            {
                // if (!skipBloomFilter) Console.WriteLine("bf: " + value);
                counter.BloomFilter++;
                return(false);
            }

            // if (!skipBloomFilter) Console.WriteLine("pass: " + value);

            return(true);
        }
Пример #6
0
        internal static DataBlobInfo GenerateDataBlob(GenerationParameters parameters, string dataTxtFilePath, string dataBinaryFilePath)
        {
            var dataPath   = Path.GetDirectoryName(dataTxtFilePath);
            var rawTxtData = new FileInfo(dataTxtFilePath);

            // var textData = rawTxtData.Length > (1024 * 8) ? CompressBzip2(rawTxtData) : rawTxtData;
            var textData     = rawTxtData;
            var binaryData   = new FileInfo(dataBinaryFilePath);
            var concatenated = CreateFullBlob(parameters, binaryData, textData, dataPath);
            var gziped       = CompressGzip(concatenated);

            return(new DataBlobInfo
            {
                BlobFile = gziped,
                DataTxtLength = textData.Length,
                DataBinLength = binaryData.Exists ? binaryData.Length : 0L,
            });
        }
Пример #7
0
        public GeneratedData Deserialize(GenerationParameters parameters, string txtPath, string binPath)
        {
            var data = new GeneratedData
            {
                Parameters = parameters,
            };

            var prev = default(string);
            var exclusionSampleIndex = 0;

            var allSampleExclusionParameters = parameters.GetSampleExclusions()
                                               .Concat(default(SampleExclusionParameters).Yield())
                                               .ToArray();

            var allExcludedParameters = data.GetExcludedSamples()
                                        .Concat(default(HashSet <string>).Yield())
                                        .ToArray();

            var excludedSamples           = allExcludedParameters[exclusionSampleIndex];
            var sampleExclusionParameters = allSampleExclusionParameters[exclusionSampleIndex];

            var exclusionPairKey    = default(string);
            var exclusionPairValues = default(List <string>);

            foreach (var line in File.ReadLines(txtPath))
            {
                if (exclusionSampleIndex < allSampleExclusionParameters.Length - 1)
                {
                    if (line.Length == 0)
                    {
                        exclusionSampleIndex++;
                        sampleExclusionParameters = allSampleExclusionParameters[exclusionSampleIndex];
                        excludedSamples           = allExcludedParameters[exclusionSampleIndex];
                        prev = null;
                        continue;
                    }

                    var exclusionSample = line;

                    if (DELTA_ENCODING)
                    {
                        if (line.Length < sampleExclusionParameters.SampleSize)
                        {
                            exclusionSample = prev.Substring(0, sampleExclusionParameters.SampleSize - line.Length) + line;
                        }
                    }

                    excludedSamples.Add(exclusionSample);
                    prev = exclusionSample;
                }
                else
                {
                    if (line.Length == 0)
                    {
                        var excludedPairs = exclusionPairKey.Length == data.Parameters.PairExclusion_1.SampleSize ?
                                            data.ExcludedPairs_1 : data.ExcludedPairs_2;
                        excludedPairs.Add(exclusionPairKey, new HashSet <string>(exclusionPairValues, StringComparer.Ordinal));
                        exclusionPairKey    = null;
                        exclusionPairValues = null;
                        prev = null;
                        continue;
                    }
                    else if (exclusionPairKey == null)
                    {
                        exclusionPairKey    = line;
                        exclusionPairValues = new List <string>();
                        prev = line;
                    }
                    else
                    {
                        var sampleSize     = exclusionPairKey.Length;
                        var exclusionValue = line;
                        if (DELTA_ENCODING)
                        {
                            if (line.Length < sampleSize)
                            {
                                exclusionValue = prev.Substring(0, sampleSize - line.Length) + line;
                            }
                        }
                        exclusionPairValues.Add(exclusionValue);
                        prev = exclusionValue;
                    }
                }
            }

            if (exclusionPairKey != null)
            {
                throw new Exception("Unexpected end of data");
            }

            if (!parameters.BloomFilter.Disabled)
            {
                data.BloomFilter = BloomFilter.Deserialize(binPath, parameters.BloomFilter.HashFunctionsCount);
            }

            return(data);
        }
Пример #8
0
        private static EvaluationResult Evaluate(GenerationParameters candidate, object @lock, bool generate = true)
        {
            using (new Timer(String.Format("TestRunner.Evaluate(ID{0:000})", candidate.Id)))
            {
                try
                {
                    Console.WriteLine("Starting evaluation of candidate {0}...", candidate.Id);

                    var dataPath           = Path.Combine(Settings.TempFolder, String.Format("ID{0:000}", candidate.Id));
                    var dataTxtFilePath    = Path.Combine(dataPath, "data.txt");
                    var dataBinaryFilePath = Path.Combine(dataPath, "data.bin");

                    if (generate && Directory.Exists(dataPath))
                    {
                        foreach (var file in Directory.GetFiles(dataPath))
                        {
                            File.Delete(file);
                        }
                    }

                    Directory.CreateDirectory(dataPath);

                    if (generate)
                    {
                        var generator = new DataGenerator();
                        var generated = generator.Generate(candidate);

                        var serializer = new DataSerializer();
                        serializer.Serialize(generated, dataTxtFilePath, dataBinaryFilePath);
                    }

                    var dataBlobInfo   = Utils.GenerateDataBlob(candidate, dataTxtFilePath, dataBinaryFilePath);
                    var javascriptFile = Utils.GenerateJavascript(dataPath, dataBlobInfo.DataTxtLength, dataBlobInfo.DataBinLength);
                    var totalSize      = dataBlobInfo.BlobFile.Length + javascriptFile.Length;
                    var sizeSuccess    = totalSize <= (64 * 1024);

                    var executor   = new TestExecutor();
                    var testResult = executor.Test(candidate, dataTxtFilePath, dataBinaryFilePath);

                    lock (@lock)
                    {
                        Console.BackgroundColor = sizeSuccess ? ConsoleColor.Green : ConsoleColor.Red;
                        Console.ForegroundColor = sizeSuccess ? ConsoleColor.Black : ConsoleColor.White;
                        Console.WriteLine("Evaluation of candidate {0} finished! Size: {1} Score: {2}",
                                          candidate.Id,
                                          sizeSuccess ? "OK" : "TOO BIG",
                                          testResult.AvgScore);
                        Console.ResetColor();
                    }

                    WriteFalsePositives(dataPath, testResult.FalsePositives.OrderBy(v => v, StringComparer.Ordinal));

                    return(new EvaluationResult
                    {
                        Parameters = candidate,
                        TestResult = testResult,
                        TotalSize = totalSize,
                        SizeSuccess = sizeSuccess,
                    });
                }
                catch (Exception)
                {
                    Console.WriteLine("Evaluation of candidate {0} has yielded an error", candidate.Id);
                    throw;
                }
            }
        }