Ejemplo n.º 1
0
        public void Serialize(GeneratedData data, string txtFilePath, string dataBinaryFilePath)
        {
            if (File.Exists(txtFilePath))
            {
                File.Delete(txtFilePath);
            }
            if (File.Exists(dataBinaryFilePath))
            {
                File.Delete(dataBinaryFilePath);
            }

            using (var file = File.OpenWrite(txtFilePath))
                using (var writer = new StreamWriter(file, Encoding.ASCII))
                {
                    foreach (var excludedSamples in data.GetExcludedSamples())
                    {
                        var excludedSamplesSorted = excludedSamples
                                                    .OrderBy(v => v, StringComparer.Ordinal);

                        WriteSameSizeLines(excludedSamplesSorted, writer);
                    }

                    foreach (var excludedPairs in data.GetExcludedPairs())
                    {
                        foreach (var kv in excludedPairs)
                        {
                            var sortedValues = kv.Value
                                               .OrderBy(v => v, StringComparer.Ordinal);

                            var lines = kv.Key.Yield().Concat(sortedValues);
                            WriteSameSizeLines(lines, writer);
                        }
                    }

                    writer.Flush();
                }

            if (!data.Parameters.BloomFilter.Disabled)
            {
                BloomFilter.Serialize(data.BloomFilter, dataBinaryFilePath);
            }
        }
Ejemplo n.º 2
0
        public GeneratedData Deserialize(GenerationParameters parameters, string txtPath, string binPath)
        {
            var data = new GeneratedData
            {
                Parameters = parameters,
            };

            var prev = default(string);
            var exclusionSampleIndex = 0;

            var allSampleExclusionParameters = parameters.GetSampleExclusions()
                                               .Concat(default(SampleExclusionParameters).Yield())
                                               .ToArray();

            var allExcludedParameters = data.GetExcludedSamples()
                                        .Concat(default(HashSet <string>).Yield())
                                        .ToArray();

            var excludedSamples           = allExcludedParameters[exclusionSampleIndex];
            var sampleExclusionParameters = allSampleExclusionParameters[exclusionSampleIndex];

            var exclusionPairKey    = default(string);
            var exclusionPairValues = default(List <string>);

            foreach (var line in File.ReadLines(txtPath))
            {
                if (exclusionSampleIndex < allSampleExclusionParameters.Length - 1)
                {
                    if (line.Length == 0)
                    {
                        exclusionSampleIndex++;
                        sampleExclusionParameters = allSampleExclusionParameters[exclusionSampleIndex];
                        excludedSamples           = allExcludedParameters[exclusionSampleIndex];
                        prev = null;
                        continue;
                    }

                    var exclusionSample = line;

                    if (DELTA_ENCODING)
                    {
                        if (line.Length < sampleExclusionParameters.SampleSize)
                        {
                            exclusionSample = prev.Substring(0, sampleExclusionParameters.SampleSize - line.Length) + line;
                        }
                    }

                    excludedSamples.Add(exclusionSample);
                    prev = exclusionSample;
                }
                else
                {
                    if (line.Length == 0)
                    {
                        var excludedPairs = exclusionPairKey.Length == data.Parameters.PairExclusion_1.SampleSize ?
                                            data.ExcludedPairs_1 : data.ExcludedPairs_2;
                        excludedPairs.Add(exclusionPairKey, new HashSet <string>(exclusionPairValues, StringComparer.Ordinal));
                        exclusionPairKey    = null;
                        exclusionPairValues = null;
                        prev = null;
                        continue;
                    }
                    else if (exclusionPairKey == null)
                    {
                        exclusionPairKey    = line;
                        exclusionPairValues = new List <string>();
                        prev = line;
                    }
                    else
                    {
                        var sampleSize     = exclusionPairKey.Length;
                        var exclusionValue = line;
                        if (DELTA_ENCODING)
                        {
                            if (line.Length < sampleSize)
                            {
                                exclusionValue = prev.Substring(0, sampleSize - line.Length) + line;
                            }
                        }
                        exclusionPairValues.Add(exclusionValue);
                        prev = exclusionValue;
                    }
                }
            }

            if (exclusionPairKey != null)
            {
                throw new Exception("Unexpected end of data");
            }

            if (!parameters.BloomFilter.Disabled)
            {
                data.BloomFilter = BloomFilter.Deserialize(binPath, parameters.BloomFilter.HashFunctionsCount);
            }

            return(data);
        }
Ejemplo n.º 3
0
        private void RemoveRedundantData(GeneratedData data)
        {
            var excludedSamplesSets = data.GetExcludedSamples()
                                      .Where(es => es.Count > 0)
                                      .OrderBy(es => es.First().Length)
                                      .ToArray();

            // step 1: remove excluded samples already covered by smaller sized excluded samples
            {
                for (var index = 1; index < excludedSamplesSets.Length; index++)
                {
                    for (var pIndex = 0; pIndex < index; pIndex++)
                    {
                        var excludedSamples        = excludedSamplesSets[index];
                        var alreadyExcludedSamples = excludedSamplesSets[pIndex];
                        RemoveRedundantExclusionSamples(excludedSamples, alreadyExcludedSamples);
                    }
                }
            }

            //// step 2: remove excluded samples already covered by pairs
            //{

            //	for (var index = 0; index < excludedSamplesSets.Length; index++)
            //	{
            //		var excludedSamples = excludedSamplesSets[index];
            //		var excludedSampleSize = excludedSamples.First().Length;
            //		var excludedPairsSets = data.GetExcludedPairs()
            //			.Where(s => s.Count > 0)
            //			.ToArray();

            //		foreach (var pairExclusion in excludedPairsSets)
            //		{
            //			var pairSampleSize = pairExclusion.First().Key.Length;

            //			if (excludedSampleSize <= pairSampleSize) continue;

            //			foreach (var excludedSample in excludedSamples.ToArray())
            //			{
            //				var lineSamples = SampleSplitter.SplitSamples(excludedSample, pairSampleSize);
            //				var linePairs =
            //					from x in lineSamples //.Select((s, i) => new { s, i })
            //					from y in lineSamples //.Select((s, i) => new { s, i })
            //					where x.CompareTo(y) < 0
            //					select Tuple.Create(x, y);
            //				//where x.i < y.i
            //				//select Tuple.Create(x.s, y.s);

            //				foreach (var linePair in linePairs)
            //				{
            //					var excludedItem2s = default(HashSet<string>);
            //					if (pairExclusion.TryGetValue(linePair.Item1, out excludedItem2s))
            //					{
            //						if (excludedItem2s.Contains(linePair.Item2))
            //						{
            //							excludedSamples.Remove(excludedSample);
            //							break;
            //						}
            //					}
            //				}
            //			}
            //		}
            //	}
            //}

            //// step 3: remove pairs already covered by smaller pairs
            //{
            //	var excludedPairsSets = data.GetExcludedPairs()
            //		.Where(es => es.Count > 0)
            //		.OrderBy(es => es.First().Key.Length)
            //		.ToArray();

            //	for (var index = 1; index < excludedPairsSets.Length; index++)
            //	{
            //		for (var pIndex = 0; pIndex < index; pIndex++)
            //		{
            //			var excludedPairs = excludedPairsSets[index];
            //			var alreadyExcludedPairs = excludedPairsSets[pIndex];
            //			RemoveRedundantExclusionPairs(excludedPairs, alreadyExcludedPairs);
            //		}
            //	}
            //}
        }