public static void PatternMatchTest() { var zeroOrMany = Sequences.Sequences.ZeroOrMany; using (var scope = new TempLinksTestScope(useSequences: true)) { var links = scope.Links; var sequences = scope.Sequences; var e1 = links.Create(); var e2 = links.Create(); var sequence = new[] { e1, e2, e1, e2 // mama / papa }; var balancedVariantConverter = new BalancedVariantConverter <ulong>(links); var balancedVariant = balancedVariantConverter.Convert(sequence); // 1: [1] // 2: [2] // 3: [1,2] // 4: [1,2,1,2] var doublet = links.GetSource(balancedVariant); var matchedSequences1 = sequences.MatchPattern(e2, e1, zeroOrMany); Assert.True(matchedSequences1.Count == 0); var matchedSequences2 = sequences.MatchPattern(zeroOrMany, e2, e1); Assert.True(matchedSequences2.Count == 0); var matchedSequences3 = sequences.MatchPattern(e1, zeroOrMany, e1); Assert.True(matchedSequences3.Count == 0); var matchedSequences4 = sequences.MatchPattern(e1, zeroOrMany, e2); Assert.Contains(doublet, matchedSequences4); Assert.Contains(balancedVariant, matchedSequences4); for (var i = 0; i < sequence.Length; i++) { links.Delete(sequence[i]); } } }
public static void ReadSequenceTest() { const long sequenceLength = 2000; using (var scope = new TempLinksTestScope(useSequences: false)) { var links = scope.Links; var sequences = new Sequences.Sequences(links, new SequencesOptions <ulong> { Walker = new LeveledSequenceWalker <ulong>(links) }); var sequence = new ulong[sequenceLength]; for (var i = 0; i < sequenceLength; i++) { sequence[i] = links.Create(); } var balancedVariantConverter = new BalancedVariantConverter <ulong>(links); var sw1 = Stopwatch.StartNew(); var balancedVariant = balancedVariantConverter.Convert(sequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var readSequence1 = sequences.ToList(balancedVariant); sw2.Stop(); var sw3 = Stopwatch.StartNew(); var readSequence2 = new List <ulong>(); SequenceWalker.WalkRight(balancedVariant, links.GetSource, links.GetTarget, links.IsPartialPoint, readSequence2.Add); sw3.Stop(); Assert.True(sequence.SequenceEqual(readSequence1)); Assert.True(sequence.SequenceEqual(readSequence2)); // Assert.True(sw2.Elapsed < sw3.Elapsed); Console.WriteLine($"Stack-based walker: {sw3.Elapsed}, Level-based reader: {sw2.Elapsed}"); for (var i = 0; i < sequenceLength; i++) { links.Delete(sequence[i]); } } }
public static void BalancedVariantSearchTest() { const long sequenceLength = 200; using (var scope = new TempLinksTestScope(useSequences: true)) { var links = scope.Links; var sequences = scope.Sequences; var sequence = new ulong[sequenceLength]; for (var i = 0; i < sequenceLength; i++) { sequence[i] = links.Create(); } var balancedVariantConverter = new BalancedVariantConverter <ulong>(links); var sw1 = Stopwatch.StartNew(); var balancedVariant = balancedVariantConverter.Convert(sequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.GetAllMatchingSequences0(sequence); sw2.Stop(); var sw3 = Stopwatch.StartNew(); var searchResults3 = sequences.GetAllMatchingSequences1(sequence); sw3.Stop(); // На количестве в 200 элементов это будет занимать вечность //var sw4 = Stopwatch.StartNew(); //var searchResults4 = sequences.Each(sequence); sw4.Stop(); Assert.True(searchResults2.Count == 1 && balancedVariant == searchResults2[0]); Assert.True(searchResults3.Count == 1 && balancedVariant == searchResults3.First()); //Assert.True(sw1.Elapsed < sw2.Elapsed); for (var i = 0; i < sequenceLength; i++) { links.Delete(sequence[i]); } } }
public static void BalancedPartialVariantsSearchTest() { const long sequenceLength = 200; using (var scope = new TempLinksTestScope(useSequences: true)) { var links = scope.Links; var sequences = scope.Sequences; var sequence = new ulong[sequenceLength]; for (var i = 0; i < sequenceLength; i++) { sequence[i] = links.Create(); } var balancedVariantConverter = new BalancedVariantConverter <ulong>(links); var balancedVariant = balancedVariantConverter.Convert(sequence); var partialSequence = new ulong[sequenceLength - 2]; Array.Copy(sequence, 1, partialSequence, 0, (int)sequenceLength - 2); var sw1 = Stopwatch.StartNew(); var searchResults1 = sequences.GetAllPartiallyMatchingSequences0(partialSequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.GetAllPartiallyMatchingSequences1(partialSequence); sw2.Stop(); Assert.True(searchResults1.Count == 1 && balancedVariant == searchResults1[0]); Assert.True(searchResults2.Count == 1 && balancedVariant == searchResults2.First()); for (var i = 0; i < sequenceLength; i++) { links.Delete(sequence[i]); } } }
public static void RundomNumbersCompressionQualityTest() { const ulong N = 500; //const ulong minNumbers = 10000; //const ulong maxNumbers = 20000; //var strings = new List<string>(); //for (ulong i = 0; i < N; i++) // strings.Add(RandomHelpers.DefaultFactory.NextUInt64(minNumbers, maxNumbers).ToString()); var strings = new List <string>(); for (ulong i = 0; i < N; i++) { strings.Add(RandomHelpers.Default.NextUInt64().ToString()); } strings = strings.Distinct().ToList(); var arrays = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray(); var totalCharacters = arrays.Select(x => x.Length).Sum(); using (var scope1 = new TempLinksTestScope(useSequences: true, sequencesOptions: new SequencesOptions <ulong> { UseCompression = true, EnforceSingleSequenceVersionOnWriteBasedOnExisting = true })) using (var scope2 = new TempLinksTestScope(useSequences: true)) { scope1.Links.UseUnicode(); scope2.Links.UseUnicode(); var compressor1 = scope1.Sequences; var compressor2 = scope2.Sequences; var compressed1 = new ulong[arrays.Length]; var compressed2 = new ulong[arrays.Length]; var sw1 = Stopwatch.StartNew(); var START = 0; var END = arrays.Length; for (int i = START; i < END; i++) { compressed1[i] = compressor1.Create(arrays[i].ShiftRight()); } var elapsed1 = sw1.Elapsed; var balancedVariantConverter = new BalancedVariantConverter <ulong>(scope2.Links); var sw2 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { compressed2[i] = balancedVariantConverter.Convert(arrays[i]); } var elapsed2 = sw2.Elapsed; Debug.WriteLine($"Compressor: {elapsed1}, Balanced sequence creator: {elapsed2}"); Assert.True(elapsed1 > elapsed2); // Checks for (int i = START; i < END; i++) { var sequence1 = compressed1[i]; var sequence2 = compressed2[i]; if (sequence1 != _constants.Null && sequence2 != _constants.Null) { var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links); var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links); Assert.True(strings[i] == decompress1 && decompress1 == decompress2); } } Assert.True((int)(scope1.Links.Count() - UnicodeMap.MapSize) < totalCharacters); Assert.True((int)(scope2.Links.Count() - UnicodeMap.MapSize) < totalCharacters); Debug.WriteLine($"{(double)(scope1.Links.Count() - UnicodeMap.MapSize) / totalCharacters} | {(double)(scope2.Links.Count() - UnicodeMap.MapSize) / totalCharacters}"); // Can be worse than balanced variant //Assert.True(scope1.Links.Count() <= scope2.Links.Count()); //compressor1.ValidateFrequencies(); } }
public static void CompressionStabilityTest() { // TODO: Fix bug (do a separate test) //const ulong minNumbers = 0; //const ulong maxNumbers = 1000; const ulong minNumbers = 10000; const ulong maxNumbers = 12500; var strings = new List <string>(); for (ulong i = minNumbers; i < maxNumbers; i++) { strings.Add(i.ToString()); } var arrays = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray(); var totalCharacters = arrays.Select(x => x.Length).Sum(); using (var scope1 = new TempLinksTestScope(useSequences: true, sequencesOptions: new SequencesOptions <ulong> { UseCompression = true, EnforceSingleSequenceVersionOnWriteBasedOnExisting = true })) using (var scope2 = new TempLinksTestScope(useSequences: true)) { scope1.Links.UseUnicode(); scope2.Links.UseUnicode(); //var compressor1 = new Compressor(scope1.Links.Unsync, scope1.Sequences); var compressor1 = scope1.Sequences; var compressor2 = scope2.Sequences; var compressed1 = new ulong[arrays.Length]; var compressed2 = new ulong[arrays.Length]; var sw1 = Stopwatch.StartNew(); var START = 0; var END = arrays.Length; // Collisions proved (cannot be solved by max doublet comparison, no stable rule) // Stability issue starts at 10001 or 11000 //for (int i = START; i < END; i++) //{ // var first = compressor1.Compress(arrays[i]); // var second = compressor1.Compress(arrays[i]); // if (first == second) // compressed1[i] = first; // else // { // // TODO: Find a solution for this case // } //} for (int i = START; i < END; i++) { var first = compressor1.Create(arrays[i].ShiftRight()); var second = compressor1.Create(arrays[i].ShiftRight()); if (first == second) { compressed1[i] = first; } else { // TODO: Find a solution for this case } } var elapsed1 = sw1.Elapsed; var balancedVariantConverter = new BalancedVariantConverter <ulong>(scope2.Links); var sw2 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { var first = balancedVariantConverter.Convert(arrays[i]); var second = balancedVariantConverter.Convert(arrays[i]); if (first == second) { compressed2[i] = first; } } var elapsed2 = sw2.Elapsed; Debug.WriteLine($"Compressor: {elapsed1}, Balanced sequence creator: {elapsed2}"); Assert.True(elapsed1 > elapsed2); // Checks for (int i = START; i < END; i++) { var sequence1 = compressed1[i]; var sequence2 = compressed2[i]; if (sequence1 != _constants.Null && sequence2 != _constants.Null) { var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links); var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links); //var structure1 = scope1.Links.FormatStructure(sequence1, link => link.IsPartialPoint()); //var structure2 = scope2.Links.FormatStructure(sequence2, link => link.IsPartialPoint()); //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure1 == structure2); Assert.True(strings[i] == decompress1 && decompress1 == decompress2); } } Assert.True((int)(scope1.Links.Count() - UnicodeMap.MapSize) < totalCharacters); Assert.True((int)(scope2.Links.Count() - UnicodeMap.MapSize) < totalCharacters); Debug.WriteLine($"{(double)(scope1.Links.Count() - UnicodeMap.MapSize) / totalCharacters} | {(double)(scope2.Links.Count() - UnicodeMap.MapSize) / totalCharacters}"); Assert.True(scope1.Links.Count() <= scope2.Links.Count()); //compressor1.ValidateFrequencies(); } }
public static void CompressionEfficiencyTest() { var strings = _exampleLoremIpsumText.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var arrays = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray(); var totalCharacters = arrays.Select(x => x.Length).Sum(); using (var scope1 = new TempLinksTestScope(useSequences: true)) using (var scope2 = new TempLinksTestScope(useSequences: true)) using (var scope3 = new TempLinksTestScope(useSequences: true)) { scope1.Links.Unsync.UseUnicode(); scope2.Links.Unsync.UseUnicode(); scope3.Links.Unsync.UseUnicode(); var balancedVariantConverter1 = new BalancedVariantConverter <ulong>(scope1.Links.Unsync); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(scope1.Links.Unsync); var linkFrequenciesCache1 = new LinkFrequenciesCache <ulong>(scope1.Links.Unsync, totalSequenceSymbolFrequencyCounter); var compressor1 = new CompressingConverter <ulong>(scope1.Links.Unsync, balancedVariantConverter1, linkFrequenciesCache1, doInitialFrequenciesIncrement: false); //var compressor2 = scope2.Sequences; var compressor3 = scope3.Sequences; var constants = Default <LinksConstants <ulong> > .Instance; var sequences = compressor3; //var meaningRoot = links.CreatePoint(); //var unaryOne = links.CreateAndUpdate(meaningRoot, constants.Itself); //var frequencyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); //var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); //var unaryNumberToAddressConverter = new UnaryNumberToAddressAddOperationConverter<ulong>(links, unaryOne); //var unaryNumberIncrementer = new UnaryNumberIncrementer<ulong>(links, unaryOne); //var frequencyIncrementer = new FrequencyIncrementer<ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer); //var frequencyPropertyOperator = new FrequencyPropertyOperator<ulong>(links, frequencyPropertyMarker, frequencyMarker); //var linkFrequencyIncrementer = new LinkFrequencyIncrementer<ulong>(links, frequencyPropertyOperator, frequencyIncrementer); //var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter<ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter); var linkFrequenciesCache3 = new LinkFrequenciesCache <ulong>(scope3.Links.Unsync, totalSequenceSymbolFrequencyCounter); var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache3); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(scope3.Links.Unsync, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(scope3.Links.Unsync, sequenceToItsLocalElementLevelsConverter); var compressed1 = new ulong[arrays.Length]; var compressed2 = new ulong[arrays.Length]; var compressed3 = new ulong[arrays.Length]; var START = 0; var END = arrays.Length; //for (int i = START; i < END; i++) // linkFrequenciesCache1.IncrementFrequencies(arrays[i]); var initialCount1 = scope2.Links.Unsync.Count(); var sw1 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { linkFrequenciesCache1.IncrementFrequencies(arrays[i]); compressed1[i] = compressor1.Convert(arrays[i]); } var elapsed1 = sw1.Elapsed; var balancedVariantConverter2 = new BalancedVariantConverter <ulong>(scope2.Links.Unsync); var initialCount2 = scope2.Links.Unsync.Count(); var sw2 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { compressed2[i] = balancedVariantConverter2.Convert(arrays[i]); } var elapsed2 = sw2.Elapsed; for (int i = START; i < END; i++) { linkFrequenciesCache3.IncrementFrequencies(arrays[i]); } var initialCount3 = scope3.Links.Unsync.Count(); var sw3 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { //linkFrequenciesCache3.IncrementFrequencies(arrays[i]); compressed3[i] = optimalVariantConverter.Convert(arrays[i]); } var elapsed3 = sw3.Elapsed; Console.WriteLine($"Compressor: {elapsed1}, Balanced variant: {elapsed2}, Optimal variant: {elapsed3}"); // Assert.True(elapsed1 > elapsed2); // Checks for (int i = START; i < END; i++) { var sequence1 = compressed1[i]; var sequence2 = compressed2[i]; var sequence3 = compressed3[i]; var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links.Unsync); var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links.Unsync); var decompress3 = UnicodeMap.FromSequenceLinkToString(sequence3, scope3.Links.Unsync); var structure1 = scope1.Links.Unsync.FormatStructure(sequence1, link => link.IsPartialPoint()); var structure2 = scope2.Links.Unsync.FormatStructure(sequence2, link => link.IsPartialPoint()); var structure3 = scope3.Links.Unsync.FormatStructure(sequence3, link => link.IsPartialPoint()); //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure1 == structure2); //if (sequence3 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure3 == structure2); Assert.True(strings[i] == decompress1 && decompress1 == decompress2); Assert.True(strings[i] == decompress3 && decompress3 == decompress2); } Assert.True((int)(scope1.Links.Unsync.Count() - initialCount1) < totalCharacters); Assert.True((int)(scope2.Links.Unsync.Count() - initialCount2) < totalCharacters); Assert.True((int)(scope3.Links.Unsync.Count() - initialCount3) < totalCharacters); Console.WriteLine($"{(double)(scope1.Links.Unsync.Count() - initialCount1) / totalCharacters} | {(double)(scope2.Links.Unsync.Count() - initialCount2) / totalCharacters} | {(double)(scope3.Links.Unsync.Count() - initialCount3) / totalCharacters}"); Assert.True(scope1.Links.Unsync.Count() - initialCount1 < scope2.Links.Unsync.Count() - initialCount2); Assert.True(scope3.Links.Unsync.Count() - initialCount3 < scope2.Links.Unsync.Count() - initialCount2); var duplicateProvider1 = new DuplicateSegmentsProvider <ulong>(scope1.Links.Unsync, scope1.Sequences); var duplicateProvider2 = new DuplicateSegmentsProvider <ulong>(scope2.Links.Unsync, scope2.Sequences); var duplicateProvider3 = new DuplicateSegmentsProvider <ulong>(scope3.Links.Unsync, scope3.Sequences); var duplicateCounter1 = new DuplicateSegmentsCounter <ulong>(duplicateProvider1); var duplicateCounter2 = new DuplicateSegmentsCounter <ulong>(duplicateProvider2); var duplicateCounter3 = new DuplicateSegmentsCounter <ulong>(duplicateProvider3); var duplicates1 = duplicateCounter1.Count(); ConsoleHelpers.Debug("------"); var duplicates2 = duplicateCounter2.Count(); ConsoleHelpers.Debug("------"); var duplicates3 = duplicateCounter3.Count(); Console.WriteLine($"{duplicates1} | {duplicates2} | {duplicates3}"); linkFrequenciesCache1.ValidateFrequencies(); linkFrequenciesCache3.ValidateFrequencies(); } }