Beispiel #1
0
        public void InitOptions(ISynchronizedLinks <TLink> links)
        {
            if (UseSequenceMarker)
            {
                if (_equalityComparer.Equals(SequenceMarkerLink, links.Constants.Null))
                {
                    SequenceMarkerLink = links.CreatePoint();
                }
                else
                {
                    if (!links.Exists(SequenceMarkerLink))
                    {
                        var link = links.CreatePoint();
                        if (!_equalityComparer.Equals(link, SequenceMarkerLink))
                        {
                            throw new InvalidOperationException("Cannot recreate sequence marker link.");
                        }
                    }
                }
                if (MarkedSequenceMatcher == null)
                {
                    MarkedSequenceMatcher = new MarkedSequenceCriterionMatcher <TLink>(links, SequenceMarkerLink);
                }
            }
            var balancedVariantConverter = new BalancedVariantConverter <TLink>(links);

            if (UseCompression)
            {
                if (LinksToSequenceConverter == null)
                {
                    ICounter <TLink, TLink> totalSequenceSymbolFrequencyCounter;
                    if (UseSequenceMarker)
                    {
                        totalSequenceSymbolFrequencyCounter = new TotalMarkedSequenceSymbolFrequencyCounter <TLink>(links, MarkedSequenceMatcher);
                    }
                    else
                    {
                        totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <TLink>(links);
                    }
                    var doubletFrequenciesCache = new LinkFrequenciesCache <TLink>(links, totalSequenceSymbolFrequencyCounter);
                    var compressingConverter    = new CompressingConverter <TLink>(links, balancedVariantConverter, doubletFrequenciesCache);
                    LinksToSequenceConverter = compressingConverter;
                }
            }
            else
            {
                if (LinksToSequenceConverter == null)
                {
                    LinksToSequenceConverter = balancedVariantConverter;
                }
            }
            if (UseIndex && Index == null)
            {
                Index = new SequenceIndex <TLink>(links);
            }
            if (Walker == null)
            {
                Walker = new RightSequenceWalker <TLink>(links, new DefaultStack <TLink>());
            }
        }
Beispiel #2
0
        public static void CompressionTest()
        {
            using (var scope = new TempLinksTestScope(useSequences: true))
            {
                var links     = scope.Links;
                var sequences = scope.Sequences;

                var e1 = links.Create();
                var e2 = links.Create();

                var sequence = new[]
                {
                    e1, e2, e1, e2 // mama / papa / template [(m/p), a] { [1] [2] [1] [2] }
                };

                var balancedVariantConverter            = new BalancedVariantConverter <ulong>(links.Unsync);
                var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(links.Unsync);
                var doubletFrequenciesCache             = new LinkFrequenciesCache <ulong>(links.Unsync, totalSequenceSymbolFrequencyCounter);
                var compressingConverter = new CompressingConverter <ulong>(links.Unsync, balancedVariantConverter, doubletFrequenciesCache);

                var compressedVariant = compressingConverter.Convert(sequence);

                // 1: [1]       (1->1) point
                // 2: [2]       (2->2) point
                // 3: [1,2]     (1->2) doublet
                // 4: [1,2,1,2] (3->3) doublet

                Assert.True(links.GetSource(links.GetSource(compressedVariant)) == sequence[0]);
                Assert.True(links.GetTarget(links.GetSource(compressedVariant)) == sequence[1]);
                Assert.True(links.GetSource(links.GetTarget(compressedVariant)) == sequence[2]);
                Assert.True(links.GetTarget(links.GetTarget(compressedVariant)) == sequence[3]);

                var source = _constants.SourcePart;
                var target = _constants.TargetPart;

                Assert.True(links.GetByKeys(compressedVariant, source, source) == sequence[0]);
                Assert.True(links.GetByKeys(compressedVariant, source, target) == sequence[1]);
                Assert.True(links.GetByKeys(compressedVariant, target, source) == sequence[2]);
                Assert.True(links.GetByKeys(compressedVariant, target, target) == sequence[3]);

                // 4 - length of sequence
                Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 0) == sequence[0]);
                Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 1) == sequence[1]);
                Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 2) == sequence[2]);
                Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 3) == sequence[3]);
            }
        }
Beispiel #3
0
        public static void CompressionEfficiencyTest()
        {
            var strings         = _exampleLoremIpsumText.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
            var arrays          = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray();
            var totalCharacters = arrays.Select(x => x.Length).Sum();

            using (var scope1 = new TempLinksTestScope(useSequences: true))
                using (var scope2 = new TempLinksTestScope(useSequences: true))
                    using (var scope3 = new TempLinksTestScope(useSequences: true))
                    {
                        scope1.Links.Unsync.UseUnicode();
                        scope2.Links.Unsync.UseUnicode();
                        scope3.Links.Unsync.UseUnicode();

                        var balancedVariantConverter1           = new BalancedVariantConverter <ulong>(scope1.Links.Unsync);
                        var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(scope1.Links.Unsync);
                        var linkFrequenciesCache1 = new LinkFrequenciesCache <ulong>(scope1.Links.Unsync, totalSequenceSymbolFrequencyCounter);
                        var compressor1           = new CompressingConverter <ulong>(scope1.Links.Unsync, balancedVariantConverter1, linkFrequenciesCache1, doInitialFrequenciesIncrement: false);

                        //var compressor2 = scope2.Sequences;
                        var compressor3 = scope3.Sequences;

                        var constants = Default <LinksConstants <ulong> > .Instance;

                        var sequences = compressor3;
                        //var meaningRoot = links.CreatePoint();
                        //var unaryOne = links.CreateAndUpdate(meaningRoot, constants.Itself);
                        //var frequencyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself);
                        //var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself);

                        //var unaryNumberToAddressConverter = new UnaryNumberToAddressAddOperationConverter<ulong>(links, unaryOne);
                        //var unaryNumberIncrementer = new UnaryNumberIncrementer<ulong>(links, unaryOne);
                        //var frequencyIncrementer = new FrequencyIncrementer<ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer);
                        //var frequencyPropertyOperator = new FrequencyPropertyOperator<ulong>(links, frequencyPropertyMarker, frequencyMarker);
                        //var linkFrequencyIncrementer = new LinkFrequencyIncrementer<ulong>(links, frequencyPropertyOperator, frequencyIncrementer);
                        //var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter<ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter);

                        var linkFrequenciesCache3 = new LinkFrequenciesCache <ulong>(scope3.Links.Unsync, totalSequenceSymbolFrequencyCounter);

                        var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache3);

                        var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(scope3.Links.Unsync, linkToItsFrequencyNumberConverter);
                        var optimalVariantConverter = new OptimalVariantConverter <ulong>(scope3.Links.Unsync, sequenceToItsLocalElementLevelsConverter);

                        var compressed1 = new ulong[arrays.Length];
                        var compressed2 = new ulong[arrays.Length];
                        var compressed3 = new ulong[arrays.Length];

                        var START = 0;
                        var END   = arrays.Length;

                        //for (int i = START; i < END; i++)
                        //    linkFrequenciesCache1.IncrementFrequencies(arrays[i]);

                        var initialCount1 = scope2.Links.Unsync.Count();

                        var sw1 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            linkFrequenciesCache1.IncrementFrequencies(arrays[i]);
                            compressed1[i] = compressor1.Convert(arrays[i]);
                        }

                        var elapsed1 = sw1.Elapsed;

                        var balancedVariantConverter2 = new BalancedVariantConverter <ulong>(scope2.Links.Unsync);

                        var initialCount2 = scope2.Links.Unsync.Count();

                        var sw2 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            compressed2[i] = balancedVariantConverter2.Convert(arrays[i]);
                        }

                        var elapsed2 = sw2.Elapsed;

                        for (int i = START; i < END; i++)
                        {
                            linkFrequenciesCache3.IncrementFrequencies(arrays[i]);
                        }

                        var initialCount3 = scope3.Links.Unsync.Count();

                        var sw3 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            //linkFrequenciesCache3.IncrementFrequencies(arrays[i]);
                            compressed3[i] = optimalVariantConverter.Convert(arrays[i]);
                        }

                        var elapsed3 = sw3.Elapsed;

                        Console.WriteLine($"Compressor: {elapsed1}, Balanced variant: {elapsed2}, Optimal variant: {elapsed3}");

                        // Assert.True(elapsed1 > elapsed2);

                        // Checks
                        for (int i = START; i < END; i++)
                        {
                            var sequence1 = compressed1[i];
                            var sequence2 = compressed2[i];
                            var sequence3 = compressed3[i];

                            var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links.Unsync);

                            var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links.Unsync);

                            var decompress3 = UnicodeMap.FromSequenceLinkToString(sequence3, scope3.Links.Unsync);

                            var structure1 = scope1.Links.Unsync.FormatStructure(sequence1, link => link.IsPartialPoint());
                            var structure2 = scope2.Links.Unsync.FormatStructure(sequence2, link => link.IsPartialPoint());
                            var structure3 = scope3.Links.Unsync.FormatStructure(sequence3, link => link.IsPartialPoint());

                            //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3)
                            //    Assert.False(structure1 == structure2);
                            //if (sequence3 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3)
                            //    Assert.False(structure3 == structure2);

                            Assert.True(strings[i] == decompress1 && decompress1 == decompress2);
                            Assert.True(strings[i] == decompress3 && decompress3 == decompress2);
                        }

                        Assert.True((int)(scope1.Links.Unsync.Count() - initialCount1) < totalCharacters);
                        Assert.True((int)(scope2.Links.Unsync.Count() - initialCount2) < totalCharacters);
                        Assert.True((int)(scope3.Links.Unsync.Count() - initialCount3) < totalCharacters);

                        Console.WriteLine($"{(double)(scope1.Links.Unsync.Count() - initialCount1) / totalCharacters} | {(double)(scope2.Links.Unsync.Count() - initialCount2) / totalCharacters} | {(double)(scope3.Links.Unsync.Count() - initialCount3) / totalCharacters}");

                        Assert.True(scope1.Links.Unsync.Count() - initialCount1 < scope2.Links.Unsync.Count() - initialCount2);
                        Assert.True(scope3.Links.Unsync.Count() - initialCount3 < scope2.Links.Unsync.Count() - initialCount2);

                        var duplicateProvider1 = new DuplicateSegmentsProvider <ulong>(scope1.Links.Unsync, scope1.Sequences);
                        var duplicateProvider2 = new DuplicateSegmentsProvider <ulong>(scope2.Links.Unsync, scope2.Sequences);
                        var duplicateProvider3 = new DuplicateSegmentsProvider <ulong>(scope3.Links.Unsync, scope3.Sequences);

                        var duplicateCounter1 = new DuplicateSegmentsCounter <ulong>(duplicateProvider1);
                        var duplicateCounter2 = new DuplicateSegmentsCounter <ulong>(duplicateProvider2);
                        var duplicateCounter3 = new DuplicateSegmentsCounter <ulong>(duplicateProvider3);

                        var duplicates1 = duplicateCounter1.Count();

                        ConsoleHelpers.Debug("------");

                        var duplicates2 = duplicateCounter2.Count();

                        ConsoleHelpers.Debug("------");

                        var duplicates3 = duplicateCounter3.Count();

                        Console.WriteLine($"{duplicates1} | {duplicates2} | {duplicates3}");

                        linkFrequenciesCache1.ValidateFrequencies();
                        linkFrequenciesCache3.ValidateFrequencies();
                    }
        }