예제 #1
0
        public static void PatternMatchTest()
        {
            var zeroOrMany = Sequences.Sequences.ZeroOrMany;

            using (var scope = new TempLinksTestScope(useSequences: true))
            {
                var links     = scope.Links;
                var sequences = scope.Sequences;

                var e1 = links.Create();
                var e2 = links.Create();

                var sequence = new[]
                {
                    e1, e2, e1, e2 // mama / papa
                };

                var balancedVariantConverter = new BalancedVariantConverter <ulong>(links);

                var balancedVariant = balancedVariantConverter.Convert(sequence);

                // 1: [1]
                // 2: [2]
                // 3: [1,2]
                // 4: [1,2,1,2]

                var doublet = links.GetSource(balancedVariant);

                var matchedSequences1 = sequences.MatchPattern(e2, e1, zeroOrMany);

                Assert.True(matchedSequences1.Count == 0);

                var matchedSequences2 = sequences.MatchPattern(zeroOrMany, e2, e1);

                Assert.True(matchedSequences2.Count == 0);

                var matchedSequences3 = sequences.MatchPattern(e1, zeroOrMany, e1);

                Assert.True(matchedSequences3.Count == 0);

                var matchedSequences4 = sequences.MatchPattern(e1, zeroOrMany, e2);

                Assert.Contains(doublet, matchedSequences4);
                Assert.Contains(balancedVariant, matchedSequences4);

                for (var i = 0; i < sequence.Length; i++)
                {
                    links.Delete(sequence[i]);
                }
            }
        }
예제 #2
0
        public static void ReadSequenceTest()
        {
            const long sequenceLength = 2000;

            using (var scope = new TempLinksTestScope(useSequences: false))
            {
                var links     = scope.Links;
                var sequences = new Sequences.Sequences(links, new SequencesOptions <ulong> {
                    Walker = new LeveledSequenceWalker <ulong>(links)
                });

                var sequence = new ulong[sequenceLength];
                for (var i = 0; i < sequenceLength; i++)
                {
                    sequence[i] = links.Create();
                }

                var balancedVariantConverter = new BalancedVariantConverter <ulong>(links);

                var sw1             = Stopwatch.StartNew();
                var balancedVariant = balancedVariantConverter.Convert(sequence); sw1.Stop();

                var sw2           = Stopwatch.StartNew();
                var readSequence1 = sequences.ToList(balancedVariant); sw2.Stop();

                var sw3           = Stopwatch.StartNew();
                var readSequence2 = new List <ulong>();
                SequenceWalker.WalkRight(balancedVariant,
                                         links.GetSource,
                                         links.GetTarget,
                                         links.IsPartialPoint,
                                         readSequence2.Add);
                sw3.Stop();

                Assert.True(sequence.SequenceEqual(readSequence1));

                Assert.True(sequence.SequenceEqual(readSequence2));

                // Assert.True(sw2.Elapsed < sw3.Elapsed);

                Console.WriteLine($"Stack-based walker: {sw3.Elapsed}, Level-based reader: {sw2.Elapsed}");

                for (var i = 0; i < sequenceLength; i++)
                {
                    links.Delete(sequence[i]);
                }
            }
        }
예제 #3
0
        public static void BalancedVariantSearchTest()
        {
            const long sequenceLength = 200;

            using (var scope = new TempLinksTestScope(useSequences: true))
            {
                var links     = scope.Links;
                var sequences = scope.Sequences;

                var sequence = new ulong[sequenceLength];
                for (var i = 0; i < sequenceLength; i++)
                {
                    sequence[i] = links.Create();
                }

                var balancedVariantConverter = new BalancedVariantConverter <ulong>(links);

                var sw1             = Stopwatch.StartNew();
                var balancedVariant = balancedVariantConverter.Convert(sequence); sw1.Stop();

                var sw2            = Stopwatch.StartNew();
                var searchResults2 = sequences.GetAllMatchingSequences0(sequence); sw2.Stop();

                var sw3            = Stopwatch.StartNew();
                var searchResults3 = sequences.GetAllMatchingSequences1(sequence); sw3.Stop();

                // На количестве в 200 элементов это будет занимать вечность
                //var sw4 = Stopwatch.StartNew();
                //var searchResults4 = sequences.Each(sequence); sw4.Stop();

                Assert.True(searchResults2.Count == 1 && balancedVariant == searchResults2[0]);

                Assert.True(searchResults3.Count == 1 && balancedVariant == searchResults3.First());

                //Assert.True(sw1.Elapsed < sw2.Elapsed);

                for (var i = 0; i < sequenceLength; i++)
                {
                    links.Delete(sequence[i]);
                }
            }
        }
예제 #4
0
        public static void BalancedPartialVariantsSearchTest()
        {
            const long sequenceLength = 200;

            using (var scope = new TempLinksTestScope(useSequences: true))
            {
                var links     = scope.Links;
                var sequences = scope.Sequences;

                var sequence = new ulong[sequenceLength];
                for (var i = 0; i < sequenceLength; i++)
                {
                    sequence[i] = links.Create();
                }

                var balancedVariantConverter = new BalancedVariantConverter <ulong>(links);

                var balancedVariant = balancedVariantConverter.Convert(sequence);

                var partialSequence = new ulong[sequenceLength - 2];

                Array.Copy(sequence, 1, partialSequence, 0, (int)sequenceLength - 2);

                var sw1            = Stopwatch.StartNew();
                var searchResults1 = sequences.GetAllPartiallyMatchingSequences0(partialSequence); sw1.Stop();

                var sw2            = Stopwatch.StartNew();
                var searchResults2 = sequences.GetAllPartiallyMatchingSequences1(partialSequence); sw2.Stop();

                Assert.True(searchResults1.Count == 1 && balancedVariant == searchResults1[0]);

                Assert.True(searchResults2.Count == 1 && balancedVariant == searchResults2.First());

                for (var i = 0; i < sequenceLength; i++)
                {
                    links.Delete(sequence[i]);
                }
            }
        }
예제 #5
0
        public static void RundomNumbersCompressionQualityTest()
        {
            const ulong N = 500;

            //const ulong minNumbers = 10000;
            //const ulong maxNumbers = 20000;

            //var strings = new List<string>();

            //for (ulong i = 0; i < N; i++)
            //    strings.Add(RandomHelpers.DefaultFactory.NextUInt64(minNumbers, maxNumbers).ToString());

            var strings = new List <string>();

            for (ulong i = 0; i < N; i++)
            {
                strings.Add(RandomHelpers.Default.NextUInt64().ToString());
            }

            strings = strings.Distinct().ToList();

            var arrays          = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray();
            var totalCharacters = arrays.Select(x => x.Length).Sum();

            using (var scope1 = new TempLinksTestScope(useSequences: true, sequencesOptions: new SequencesOptions <ulong> {
                UseCompression = true, EnforceSingleSequenceVersionOnWriteBasedOnExisting = true
            }))
                using (var scope2 = new TempLinksTestScope(useSequences: true))
                {
                    scope1.Links.UseUnicode();
                    scope2.Links.UseUnicode();

                    var compressor1 = scope1.Sequences;
                    var compressor2 = scope2.Sequences;

                    var compressed1 = new ulong[arrays.Length];
                    var compressed2 = new ulong[arrays.Length];

                    var sw1 = Stopwatch.StartNew();

                    var START = 0;
                    var END   = arrays.Length;

                    for (int i = START; i < END; i++)
                    {
                        compressed1[i] = compressor1.Create(arrays[i].ShiftRight());
                    }

                    var elapsed1 = sw1.Elapsed;

                    var balancedVariantConverter = new BalancedVariantConverter <ulong>(scope2.Links);

                    var sw2 = Stopwatch.StartNew();

                    for (int i = START; i < END; i++)
                    {
                        compressed2[i] = balancedVariantConverter.Convert(arrays[i]);
                    }

                    var elapsed2 = sw2.Elapsed;

                    Debug.WriteLine($"Compressor: {elapsed1}, Balanced sequence creator: {elapsed2}");

                    Assert.True(elapsed1 > elapsed2);

                    // Checks
                    for (int i = START; i < END; i++)
                    {
                        var sequence1 = compressed1[i];
                        var sequence2 = compressed2[i];

                        if (sequence1 != _constants.Null && sequence2 != _constants.Null)
                        {
                            var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links);

                            var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links);

                            Assert.True(strings[i] == decompress1 && decompress1 == decompress2);
                        }
                    }

                    Assert.True((int)(scope1.Links.Count() - UnicodeMap.MapSize) < totalCharacters);
                    Assert.True((int)(scope2.Links.Count() - UnicodeMap.MapSize) < totalCharacters);

                    Debug.WriteLine($"{(double)(scope1.Links.Count() - UnicodeMap.MapSize) / totalCharacters} | {(double)(scope2.Links.Count() - UnicodeMap.MapSize) / totalCharacters}");

                    // Can be worse than balanced variant
                    //Assert.True(scope1.Links.Count() <= scope2.Links.Count());

                    //compressor1.ValidateFrequencies();
                }
        }
예제 #6
0
        public static void CompressionStabilityTest()
        {
            // TODO: Fix bug (do a separate test)
            //const ulong minNumbers = 0;
            //const ulong maxNumbers = 1000;

            const ulong minNumbers = 10000;
            const ulong maxNumbers = 12500;

            var strings = new List <string>();

            for (ulong i = minNumbers; i < maxNumbers; i++)
            {
                strings.Add(i.ToString());
            }

            var arrays          = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray();
            var totalCharacters = arrays.Select(x => x.Length).Sum();

            using (var scope1 = new TempLinksTestScope(useSequences: true, sequencesOptions: new SequencesOptions <ulong> {
                UseCompression = true, EnforceSingleSequenceVersionOnWriteBasedOnExisting = true
            }))
                using (var scope2 = new TempLinksTestScope(useSequences: true))
                {
                    scope1.Links.UseUnicode();
                    scope2.Links.UseUnicode();

                    //var compressor1 = new Compressor(scope1.Links.Unsync, scope1.Sequences);
                    var compressor1 = scope1.Sequences;
                    var compressor2 = scope2.Sequences;

                    var compressed1 = new ulong[arrays.Length];
                    var compressed2 = new ulong[arrays.Length];

                    var sw1 = Stopwatch.StartNew();

                    var START = 0;
                    var END   = arrays.Length;

                    // Collisions proved (cannot be solved by max doublet comparison, no stable rule)
                    // Stability issue starts at 10001 or 11000
                    //for (int i = START; i < END; i++)
                    //{
                    //    var first = compressor1.Compress(arrays[i]);
                    //    var second = compressor1.Compress(arrays[i]);

                    //    if (first == second)
                    //        compressed1[i] = first;
                    //    else
                    //    {
                    //        // TODO: Find a solution for this case
                    //    }
                    //}

                    for (int i = START; i < END; i++)
                    {
                        var first  = compressor1.Create(arrays[i].ShiftRight());
                        var second = compressor1.Create(arrays[i].ShiftRight());

                        if (first == second)
                        {
                            compressed1[i] = first;
                        }
                        else
                        {
                            // TODO: Find a solution for this case
                        }
                    }

                    var elapsed1 = sw1.Elapsed;

                    var balancedVariantConverter = new BalancedVariantConverter <ulong>(scope2.Links);

                    var sw2 = Stopwatch.StartNew();

                    for (int i = START; i < END; i++)
                    {
                        var first  = balancedVariantConverter.Convert(arrays[i]);
                        var second = balancedVariantConverter.Convert(arrays[i]);

                        if (first == second)
                        {
                            compressed2[i] = first;
                        }
                    }

                    var elapsed2 = sw2.Elapsed;

                    Debug.WriteLine($"Compressor: {elapsed1}, Balanced sequence creator: {elapsed2}");

                    Assert.True(elapsed1 > elapsed2);

                    // Checks
                    for (int i = START; i < END; i++)
                    {
                        var sequence1 = compressed1[i];
                        var sequence2 = compressed2[i];

                        if (sequence1 != _constants.Null && sequence2 != _constants.Null)
                        {
                            var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links);

                            var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links);

                            //var structure1 = scope1.Links.FormatStructure(sequence1, link => link.IsPartialPoint());
                            //var structure2 = scope2.Links.FormatStructure(sequence2, link => link.IsPartialPoint());

                            //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3)
                            //    Assert.False(structure1 == structure2);

                            Assert.True(strings[i] == decompress1 && decompress1 == decompress2);
                        }
                    }

                    Assert.True((int)(scope1.Links.Count() - UnicodeMap.MapSize) < totalCharacters);
                    Assert.True((int)(scope2.Links.Count() - UnicodeMap.MapSize) < totalCharacters);

                    Debug.WriteLine($"{(double)(scope1.Links.Count() - UnicodeMap.MapSize) / totalCharacters} | {(double)(scope2.Links.Count() - UnicodeMap.MapSize) / totalCharacters}");

                    Assert.True(scope1.Links.Count() <= scope2.Links.Count());

                    //compressor1.ValidateFrequencies();
                }
        }
예제 #7
0
        public static void CompressionEfficiencyTest()
        {
            var strings         = _exampleLoremIpsumText.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
            var arrays          = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray();
            var totalCharacters = arrays.Select(x => x.Length).Sum();

            using (var scope1 = new TempLinksTestScope(useSequences: true))
                using (var scope2 = new TempLinksTestScope(useSequences: true))
                    using (var scope3 = new TempLinksTestScope(useSequences: true))
                    {
                        scope1.Links.Unsync.UseUnicode();
                        scope2.Links.Unsync.UseUnicode();
                        scope3.Links.Unsync.UseUnicode();

                        var balancedVariantConverter1           = new BalancedVariantConverter <ulong>(scope1.Links.Unsync);
                        var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(scope1.Links.Unsync);
                        var linkFrequenciesCache1 = new LinkFrequenciesCache <ulong>(scope1.Links.Unsync, totalSequenceSymbolFrequencyCounter);
                        var compressor1           = new CompressingConverter <ulong>(scope1.Links.Unsync, balancedVariantConverter1, linkFrequenciesCache1, doInitialFrequenciesIncrement: false);

                        //var compressor2 = scope2.Sequences;
                        var compressor3 = scope3.Sequences;

                        var constants = Default <LinksConstants <ulong> > .Instance;

                        var sequences = compressor3;
                        //var meaningRoot = links.CreatePoint();
                        //var unaryOne = links.CreateAndUpdate(meaningRoot, constants.Itself);
                        //var frequencyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself);
                        //var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself);

                        //var unaryNumberToAddressConverter = new UnaryNumberToAddressAddOperationConverter<ulong>(links, unaryOne);
                        //var unaryNumberIncrementer = new UnaryNumberIncrementer<ulong>(links, unaryOne);
                        //var frequencyIncrementer = new FrequencyIncrementer<ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer);
                        //var frequencyPropertyOperator = new FrequencyPropertyOperator<ulong>(links, frequencyPropertyMarker, frequencyMarker);
                        //var linkFrequencyIncrementer = new LinkFrequencyIncrementer<ulong>(links, frequencyPropertyOperator, frequencyIncrementer);
                        //var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter<ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter);

                        var linkFrequenciesCache3 = new LinkFrequenciesCache <ulong>(scope3.Links.Unsync, totalSequenceSymbolFrequencyCounter);

                        var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache3);

                        var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(scope3.Links.Unsync, linkToItsFrequencyNumberConverter);
                        var optimalVariantConverter = new OptimalVariantConverter <ulong>(scope3.Links.Unsync, sequenceToItsLocalElementLevelsConverter);

                        var compressed1 = new ulong[arrays.Length];
                        var compressed2 = new ulong[arrays.Length];
                        var compressed3 = new ulong[arrays.Length];

                        var START = 0;
                        var END   = arrays.Length;

                        //for (int i = START; i < END; i++)
                        //    linkFrequenciesCache1.IncrementFrequencies(arrays[i]);

                        var initialCount1 = scope2.Links.Unsync.Count();

                        var sw1 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            linkFrequenciesCache1.IncrementFrequencies(arrays[i]);
                            compressed1[i] = compressor1.Convert(arrays[i]);
                        }

                        var elapsed1 = sw1.Elapsed;

                        var balancedVariantConverter2 = new BalancedVariantConverter <ulong>(scope2.Links.Unsync);

                        var initialCount2 = scope2.Links.Unsync.Count();

                        var sw2 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            compressed2[i] = balancedVariantConverter2.Convert(arrays[i]);
                        }

                        var elapsed2 = sw2.Elapsed;

                        for (int i = START; i < END; i++)
                        {
                            linkFrequenciesCache3.IncrementFrequencies(arrays[i]);
                        }

                        var initialCount3 = scope3.Links.Unsync.Count();

                        var sw3 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            //linkFrequenciesCache3.IncrementFrequencies(arrays[i]);
                            compressed3[i] = optimalVariantConverter.Convert(arrays[i]);
                        }

                        var elapsed3 = sw3.Elapsed;

                        Console.WriteLine($"Compressor: {elapsed1}, Balanced variant: {elapsed2}, Optimal variant: {elapsed3}");

                        // Assert.True(elapsed1 > elapsed2);

                        // Checks
                        for (int i = START; i < END; i++)
                        {
                            var sequence1 = compressed1[i];
                            var sequence2 = compressed2[i];
                            var sequence3 = compressed3[i];

                            var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links.Unsync);

                            var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links.Unsync);

                            var decompress3 = UnicodeMap.FromSequenceLinkToString(sequence3, scope3.Links.Unsync);

                            var structure1 = scope1.Links.Unsync.FormatStructure(sequence1, link => link.IsPartialPoint());
                            var structure2 = scope2.Links.Unsync.FormatStructure(sequence2, link => link.IsPartialPoint());
                            var structure3 = scope3.Links.Unsync.FormatStructure(sequence3, link => link.IsPartialPoint());

                            //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3)
                            //    Assert.False(structure1 == structure2);
                            //if (sequence3 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3)
                            //    Assert.False(structure3 == structure2);

                            Assert.True(strings[i] == decompress1 && decompress1 == decompress2);
                            Assert.True(strings[i] == decompress3 && decompress3 == decompress2);
                        }

                        Assert.True((int)(scope1.Links.Unsync.Count() - initialCount1) < totalCharacters);
                        Assert.True((int)(scope2.Links.Unsync.Count() - initialCount2) < totalCharacters);
                        Assert.True((int)(scope3.Links.Unsync.Count() - initialCount3) < totalCharacters);

                        Console.WriteLine($"{(double)(scope1.Links.Unsync.Count() - initialCount1) / totalCharacters} | {(double)(scope2.Links.Unsync.Count() - initialCount2) / totalCharacters} | {(double)(scope3.Links.Unsync.Count() - initialCount3) / totalCharacters}");

                        Assert.True(scope1.Links.Unsync.Count() - initialCount1 < scope2.Links.Unsync.Count() - initialCount2);
                        Assert.True(scope3.Links.Unsync.Count() - initialCount3 < scope2.Links.Unsync.Count() - initialCount2);

                        var duplicateProvider1 = new DuplicateSegmentsProvider <ulong>(scope1.Links.Unsync, scope1.Sequences);
                        var duplicateProvider2 = new DuplicateSegmentsProvider <ulong>(scope2.Links.Unsync, scope2.Sequences);
                        var duplicateProvider3 = new DuplicateSegmentsProvider <ulong>(scope3.Links.Unsync, scope3.Sequences);

                        var duplicateCounter1 = new DuplicateSegmentsCounter <ulong>(duplicateProvider1);
                        var duplicateCounter2 = new DuplicateSegmentsCounter <ulong>(duplicateProvider2);
                        var duplicateCounter3 = new DuplicateSegmentsCounter <ulong>(duplicateProvider3);

                        var duplicates1 = duplicateCounter1.Count();

                        ConsoleHelpers.Debug("------");

                        var duplicates2 = duplicateCounter2.Count();

                        ConsoleHelpers.Debug("------");

                        var duplicates3 = duplicateCounter3.Count();

                        Console.WriteLine($"{duplicates1} | {duplicates2} | {duplicates3}");

                        linkFrequenciesCache1.ValidateFrequencies();
                        linkFrequenciesCache3.ValidateFrequencies();
                    }
        }