private static FileInfo CreateFullBlob(GenerationParameters parameters, FileInfo binaryData, FileInfo txtData, string dataPath) { var filePath = Path.Combine(dataPath, "data"); if (File.Exists(filePath)) { File.Delete(filePath); } var binaryDataLength = binaryData.Exists ? binaryData.Length : 0L; var jsBlobPath = GetJsBlobPath(parameters, txtData); var javascriptBlob = File.ReadAllText(jsBlobPath) .Replace(DATATXT_SIZE_PLACEHOLDER, txtData.Length.ToString()) .Replace(DATABIN_SIZE_PLACEHOLDER, binaryDataLength.ToString()); using (var stream = File.OpenWrite(filePath)) using (var dataStream = File.OpenRead(txtData.FullName)) using (var writer = new StreamWriter(stream, Encoding.ASCII)) { if (binaryData.Exists) { using (var binaryStream = File.OpenRead(binaryData.FullName)) { binaryStream.CopyTo(stream); } } dataStream.CopyTo(stream); writer.Write(javascriptBlob); } return(new FileInfo(filePath)); }
public GeneratedData Generate(GenerationParameters generationParameters) { using (new Timer("DataGenerator.Generate")) { var excludedSamples1 = GetExcludedSamples(generationParameters.SampleExclusion_1); var excludedSamples2 = GetExcludedSamples(generationParameters.SampleExclusion_2); var excludedSamples3 = GetExcludedSamples(generationParameters.SampleExclusion_3); var excludedPairs1 = GetExcludedPairs(generationParameters.PairExclusion_1); var excludedPairs2 = GetExcludedPairs(generationParameters.PairExclusion_2); var data = new GeneratedData { Parameters = generationParameters, ExcludedSamples_1 = excludedSamples1, ExcludedSamples_2 = excludedSamples2, ExcludedSamples_3 = excludedSamples3, ExcludedPairs_1 = excludedPairs1, ExcludedPairs_2 = excludedPairs2, }; RemoveRedundantData(data); BuildBloomFilter(generationParameters.BloomFilter, data); return(data); } }
private static string GetJsBlobPath(GenerationParameters parameters, FileInfo txtData) { if (parameters.IsBloomFilterOnly()) { return(Settings.JavascriptBlobBloomFilterOnlyPath); } // if (txtData.Name.EndsWith(".txt")) return Settings.JavascriptBlobNoBZ2Path; return(Settings.JavascriptBlobPath); }
public TestResult Test(GenerationParameters generationParameters, string dataTxtPath, string dataBinPath) { using (new Timer("TestExecutor.Test")) { var serializer = new DataSerializer(); var data = serializer.Deserialize(generationParameters, dataTxtPath, dataBinPath); return(Test(data)); } }
public static bool Match(GeneratedData data, string value, GenerationParameters parameters, MatchCounter counter, bool skipBloomFilter = false) { if (SampleSplitter.PreExcludeValue(value)) { // if (!skipBloomFilter) Console.WriteLine("preex: " + value); counter.PreExclusion++; return(false); } if (!MatchSampleExclusion(value, data.ExcludedSamples_1, parameters.SampleExclusion_1)) { counter.SampleExclusion_1++; return(false); } if (!MatchSampleExclusion(value, data.ExcludedSamples_2, parameters.SampleExclusion_2)) { // if (!skipBloomFilter) Console.WriteLine("se: " + value); counter.SampleExclusion_2++; return(false); } if (!MatchSampleExclusion(value, data.ExcludedSamples_3, parameters.SampleExclusion_3)) { counter.SampleExclusion_3++; return(false); } if (!MatchPairExclusion(value, data.ExcludedPairs_1, parameters.PairExclusion_1)) { counter.PairExclusion++; return(false); } if (!MatchPairExclusion(value, data.ExcludedPairs_2, parameters.PairExclusion_2)) { //if (!skipBloomFilter) Console.WriteLine("pe: " + value); counter.PairExclusion2++; return(false); } if (!skipBloomFilter && !MatchBloomFilter(value, data.BloomFilter, data.Parameters.BloomFilter)) { // if (!skipBloomFilter) Console.WriteLine("bf: " + value); counter.BloomFilter++; return(false); } // if (!skipBloomFilter) Console.WriteLine("pass: " + value); return(true); }
internal static DataBlobInfo GenerateDataBlob(GenerationParameters parameters, string dataTxtFilePath, string dataBinaryFilePath) { var dataPath = Path.GetDirectoryName(dataTxtFilePath); var rawTxtData = new FileInfo(dataTxtFilePath); // var textData = rawTxtData.Length > (1024 * 8) ? CompressBzip2(rawTxtData) : rawTxtData; var textData = rawTxtData; var binaryData = new FileInfo(dataBinaryFilePath); var concatenated = CreateFullBlob(parameters, binaryData, textData, dataPath); var gziped = CompressGzip(concatenated); return(new DataBlobInfo { BlobFile = gziped, DataTxtLength = textData.Length, DataBinLength = binaryData.Exists ? binaryData.Length : 0L, }); }
public GeneratedData Deserialize(GenerationParameters parameters, string txtPath, string binPath) { var data = new GeneratedData { Parameters = parameters, }; var prev = default(string); var exclusionSampleIndex = 0; var allSampleExclusionParameters = parameters.GetSampleExclusions() .Concat(default(SampleExclusionParameters).Yield()) .ToArray(); var allExcludedParameters = data.GetExcludedSamples() .Concat(default(HashSet <string>).Yield()) .ToArray(); var excludedSamples = allExcludedParameters[exclusionSampleIndex]; var sampleExclusionParameters = allSampleExclusionParameters[exclusionSampleIndex]; var exclusionPairKey = default(string); var exclusionPairValues = default(List <string>); foreach (var line in File.ReadLines(txtPath)) { if (exclusionSampleIndex < allSampleExclusionParameters.Length - 1) { if (line.Length == 0) { exclusionSampleIndex++; sampleExclusionParameters = allSampleExclusionParameters[exclusionSampleIndex]; excludedSamples = allExcludedParameters[exclusionSampleIndex]; prev = null; continue; } var exclusionSample = line; if (DELTA_ENCODING) { if (line.Length < sampleExclusionParameters.SampleSize) { exclusionSample = prev.Substring(0, sampleExclusionParameters.SampleSize - line.Length) + line; } } excludedSamples.Add(exclusionSample); prev = exclusionSample; } else { if (line.Length == 0) { var excludedPairs = exclusionPairKey.Length == data.Parameters.PairExclusion_1.SampleSize ? data.ExcludedPairs_1 : data.ExcludedPairs_2; excludedPairs.Add(exclusionPairKey, new HashSet <string>(exclusionPairValues, StringComparer.Ordinal)); exclusionPairKey = null; exclusionPairValues = null; prev = null; continue; } else if (exclusionPairKey == null) { exclusionPairKey = line; exclusionPairValues = new List <string>(); prev = line; } else { var sampleSize = exclusionPairKey.Length; var exclusionValue = line; if (DELTA_ENCODING) { if (line.Length < sampleSize) { exclusionValue = prev.Substring(0, sampleSize - line.Length) + line; } } exclusionPairValues.Add(exclusionValue); prev = exclusionValue; } } } if (exclusionPairKey != null) { throw new Exception("Unexpected end of data"); } if (!parameters.BloomFilter.Disabled) { data.BloomFilter = BloomFilter.Deserialize(binPath, parameters.BloomFilter.HashFunctionsCount); } return(data); }
private static EvaluationResult Evaluate(GenerationParameters candidate, object @lock, bool generate = true) { using (new Timer(String.Format("TestRunner.Evaluate(ID{0:000})", candidate.Id))) { try { Console.WriteLine("Starting evaluation of candidate {0}...", candidate.Id); var dataPath = Path.Combine(Settings.TempFolder, String.Format("ID{0:000}", candidate.Id)); var dataTxtFilePath = Path.Combine(dataPath, "data.txt"); var dataBinaryFilePath = Path.Combine(dataPath, "data.bin"); if (generate && Directory.Exists(dataPath)) { foreach (var file in Directory.GetFiles(dataPath)) { File.Delete(file); } } Directory.CreateDirectory(dataPath); if (generate) { var generator = new DataGenerator(); var generated = generator.Generate(candidate); var serializer = new DataSerializer(); serializer.Serialize(generated, dataTxtFilePath, dataBinaryFilePath); } var dataBlobInfo = Utils.GenerateDataBlob(candidate, dataTxtFilePath, dataBinaryFilePath); var javascriptFile = Utils.GenerateJavascript(dataPath, dataBlobInfo.DataTxtLength, dataBlobInfo.DataBinLength); var totalSize = dataBlobInfo.BlobFile.Length + javascriptFile.Length; var sizeSuccess = totalSize <= (64 * 1024); var executor = new TestExecutor(); var testResult = executor.Test(candidate, dataTxtFilePath, dataBinaryFilePath); lock (@lock) { Console.BackgroundColor = sizeSuccess ? ConsoleColor.Green : ConsoleColor.Red; Console.ForegroundColor = sizeSuccess ? ConsoleColor.Black : ConsoleColor.White; Console.WriteLine("Evaluation of candidate {0} finished! Size: {1} Score: {2}", candidate.Id, sizeSuccess ? "OK" : "TOO BIG", testResult.AvgScore); Console.ResetColor(); } WriteFalsePositives(dataPath, testResult.FalsePositives.OrderBy(v => v, StringComparer.Ordinal)); return(new EvaluationResult { Parameters = candidate, TestResult = testResult, TotalSize = totalSize, SizeSuccess = sizeSuccess, }); } catch (Exception) { Console.WriteLine("Evaluation of candidate {0} has yielded an error", candidate.Id); throw; } } }