Example #1
0
        public void InitOptions(ISynchronizedLinks <TLink> links)
        {
            if (UseSequenceMarker)
            {
                if (_equalityComparer.Equals(SequenceMarkerLink, links.Constants.Null))
                {
                    SequenceMarkerLink = links.CreatePoint();
                }
                else
                {
                    if (!links.Exists(SequenceMarkerLink))
                    {
                        var link = links.CreatePoint();
                        if (!_equalityComparer.Equals(link, SequenceMarkerLink))
                        {
                            throw new InvalidOperationException("Cannot recreate sequence marker link.");
                        }
                    }
                }
                if (MarkedSequenceMatcher == null)
                {
                    MarkedSequenceMatcher = new MarkedSequenceCriterionMatcher <TLink>(links, SequenceMarkerLink);
                }
            }
            var balancedVariantConverter = new BalancedVariantConverter <TLink>(links);

            if (UseCompression)
            {
                if (LinksToSequenceConverter == null)
                {
                    ICounter <TLink, TLink> totalSequenceSymbolFrequencyCounter;
                    if (UseSequenceMarker)
                    {
                        totalSequenceSymbolFrequencyCounter = new TotalMarkedSequenceSymbolFrequencyCounter <TLink>(links, MarkedSequenceMatcher);
                    }
                    else
                    {
                        totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <TLink>(links);
                    }
                    var doubletFrequenciesCache = new LinkFrequenciesCache <TLink>(links, totalSequenceSymbolFrequencyCounter);
                    var compressingConverter    = new CompressingConverter <TLink>(links, balancedVariantConverter, doubletFrequenciesCache);
                    LinksToSequenceConverter = compressingConverter;
                }
            }
            else
            {
                if (LinksToSequenceConverter == null)
                {
                    LinksToSequenceConverter = balancedVariantConverter;
                }
            }
            if (UseIndex && Index == null)
            {
                Index = new SequenceIndex <TLink>(links);
            }
            if (Walker == null)
            {
                Walker = new RightSequenceWalker <TLink>(links, new DefaultStack <TLink>());
            }
        }
Example #2
0
        /// <summary>
        /// <para>
        /// Initializes a new <see cref="FileStorage"/> instance.
        /// </para>
        /// <para></para>
        /// </summary>
        /// <param name="DBFilename">
        /// <para>A db filename.</para>
        /// <para></para>
        /// </param>
        public FileStorage(string DBFilename)
        {
            var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true);
            var dataMemory     = new FileMappedResizableDirectMemory(DBFilename);

            _disposableLinks   = new UnitedMemoryLinks <TLinkAddress>(dataMemory, UnitedMemoryLinks <UInt64> .DefaultLinksSizeStep, linksConstants, IndexTreeType.Default);
            _synchronizedLinks = new SynchronizedLinks <TLinkAddress>(_disposableLinks);
            var link = _synchronizedLinks.Create();

            link = _synchronizedLinks.Update(link, newSource: link, newTarget: link);
            ushort currentMappingLinkIndex = 1;

            Any                       = _synchronizedLinks.Constants.Any;
            _meaningRoot              = GetOrCreateMeaningRoot(currentMappingLinkIndex++);
            _unicodeSymbolMarker      = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _unicodeSequenceMarker    = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _setMarker                = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _fileMarker               = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>();
            _numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>();
            var balancedVariantConverter        = new BalancedVariantConverter <TLinkAddress>(_synchronizedLinks);
            var unicodeSymbolCriterionMatcher   = new TargetMatcher <TLinkAddress>(_synchronizedLinks, _unicodeSymbolMarker);
            var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(_synchronizedLinks, _unicodeSequenceMarker);
            var charToUnicodeSymbolConverter    = new CharToUnicodeSymbolConverter <TLinkAddress>(_synchronizedLinks, _addressToNumberConverter, _unicodeSymbolMarker);
            var unicodeSymbolToCharConverter    = new UnicodeSymbolToCharConverter <TLinkAddress>(_synchronizedLinks, _numberToAddressConverter, unicodeSymbolCriterionMatcher);
            var sequenceWalker = new RightSequenceWalker <TLinkAddress>(_synchronizedLinks, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched);

            _stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(_synchronizedLinks, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker));
            _unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(_synchronizedLinks, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter));
        }
Example #3
0
        public static void PatternMatchTest()
        {
            var zeroOrMany = Sequences.Sequences.ZeroOrMany;

            using (var scope = new TempLinksTestScope(useSequences: true))
            {
                var links     = scope.Links;
                var sequences = scope.Sequences;

                var e1 = links.Create();
                var e2 = links.Create();

                var sequence = new[]
                {
                    e1, e2, e1, e2 // mama / papa
                };

                var balancedVariantConverter = new BalancedVariantConverter <ulong>(links);

                var balancedVariant = balancedVariantConverter.Convert(sequence);

                // 1: [1]
                // 2: [2]
                // 3: [1,2]
                // 4: [1,2,1,2]

                var doublet = links.GetSource(balancedVariant);

                var matchedSequences1 = sequences.MatchPattern(e2, e1, zeroOrMany);

                Assert.True(matchedSequences1.Count == 0);

                var matchedSequences2 = sequences.MatchPattern(zeroOrMany, e2, e1);

                Assert.True(matchedSequences2.Count == 0);

                var matchedSequences3 = sequences.MatchPattern(e1, zeroOrMany, e1);

                Assert.True(matchedSequences3.Count == 0);

                var matchedSequences4 = sequences.MatchPattern(e1, zeroOrMany, e2);

                Assert.Contains(doublet, matchedSequences4);
                Assert.Contains(balancedVariant, matchedSequences4);

                for (var i = 0; i < sequence.Length; i++)
                {
                    links.Delete(sequence[i]);
                }
            }
        }
Example #4
0
        public static void ReadSequenceTest()
        {
            const long sequenceLength = 2000;

            using (var scope = new TempLinksTestScope(useSequences: false))
            {
                var links     = scope.Links;
                var sequences = new Sequences.Sequences(links, new SequencesOptions <ulong> {
                    Walker = new LeveledSequenceWalker <ulong>(links)
                });

                var sequence = new ulong[sequenceLength];
                for (var i = 0; i < sequenceLength; i++)
                {
                    sequence[i] = links.Create();
                }

                var balancedVariantConverter = new BalancedVariantConverter <ulong>(links);

                var sw1             = Stopwatch.StartNew();
                var balancedVariant = balancedVariantConverter.Convert(sequence); sw1.Stop();

                var sw2           = Stopwatch.StartNew();
                var readSequence1 = sequences.ToList(balancedVariant); sw2.Stop();

                var sw3           = Stopwatch.StartNew();
                var readSequence2 = new List <ulong>();
                SequenceWalker.WalkRight(balancedVariant,
                                         links.GetSource,
                                         links.GetTarget,
                                         links.IsPartialPoint,
                                         readSequence2.Add);
                sw3.Stop();

                Assert.True(sequence.SequenceEqual(readSequence1));

                Assert.True(sequence.SequenceEqual(readSequence2));

                // Assert.True(sw2.Elapsed < sw3.Elapsed);

                Console.WriteLine($"Stack-based walker: {sw3.Elapsed}, Level-based reader: {sw2.Elapsed}");

                for (var i = 0; i < sequenceLength; i++)
                {
                    links.Delete(sequence[i]);
                }
            }
        }
Example #5
0
        public static void CompressionTest()
        {
            using (var scope = new TempLinksTestScope(useSequences: true))
            {
                var links     = scope.Links;
                var sequences = scope.Sequences;

                var e1 = links.Create();
                var e2 = links.Create();

                var sequence = new[]
                {
                    e1, e2, e1, e2 // mama / papa / template [(m/p), a] { [1] [2] [1] [2] }
                };

                var balancedVariantConverter            = new BalancedVariantConverter <ulong>(links.Unsync);
                var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(links.Unsync);
                var doubletFrequenciesCache             = new LinkFrequenciesCache <ulong>(links.Unsync, totalSequenceSymbolFrequencyCounter);
                var compressingConverter = new CompressingConverter <ulong>(links.Unsync, balancedVariantConverter, doubletFrequenciesCache);

                var compressedVariant = compressingConverter.Convert(sequence);

                // 1: [1]       (1->1) point
                // 2: [2]       (2->2) point
                // 3: [1,2]     (1->2) doublet
                // 4: [1,2,1,2] (3->3) doublet

                Assert.True(links.GetSource(links.GetSource(compressedVariant)) == sequence[0]);
                Assert.True(links.GetTarget(links.GetSource(compressedVariant)) == sequence[1]);
                Assert.True(links.GetSource(links.GetTarget(compressedVariant)) == sequence[2]);
                Assert.True(links.GetTarget(links.GetTarget(compressedVariant)) == sequence[3]);

                var source = _constants.SourcePart;
                var target = _constants.TargetPart;

                Assert.True(links.GetByKeys(compressedVariant, source, source) == sequence[0]);
                Assert.True(links.GetByKeys(compressedVariant, source, target) == sequence[1]);
                Assert.True(links.GetByKeys(compressedVariant, target, source) == sequence[2]);
                Assert.True(links.GetByKeys(compressedVariant, target, target) == sequence[3]);

                // 4 - length of sequence
                Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 0) == sequence[0]);
                Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 1) == sequence[1]);
                Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 2) == sequence[2]);
                Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 3) == sequence[3]);
            }
        }
Example #6
0
        public DoubletsDbContext(string dataDBFilename, string indexDBFilename)
        {
            var dataMemory  = new FileMappedResizableDirectMemory(dataDBFilename);
            var indexMemory = new FileMappedResizableDirectMemory(indexDBFilename);

            var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true);

            // Init the links storage
            _disposableLinks = new UInt32SplitMemoryLinks(dataMemory, indexMemory, UInt32SplitMemoryLinks.DefaultLinksSizeStep, linksConstants); // Low-level logic
            _links           = new UInt32Links(_disposableLinks);                                                                                // Main logic in the combined decorator

            // Set up constant links (markers, aka mapped links)
            TLinkAddress currentMappingLinkIndex = 1;

            _meaningRoot                       = GetOrCreateMeaningRoot(currentMappingLinkIndex++);
            _unicodeSymbolMarker               = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _unicodeSequenceMarker             = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _titlePropertyMarker               = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _contentPropertyMarker             = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _publicationDateTimePropertyMarker = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _blogPostMarker                    = GetOrCreateNextMapping(currentMappingLinkIndex++);

            // Create properties operator that is able to control reading and writing properties for any link (object)
            _defaultLinkPropertyOperator = new PropertiesOperator <TLinkAddress>(_links);

            // Create converters that are able to convert link's address (UInt64 value) to a raw number represented with another UInt64 value and back
            _numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>();
            _addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>();

            // Create converters for dates
            _longRawNumberToDateTimeConverter = new LongRawNumberSequenceToDateTimeConverter <TLinkAddress>(new LongRawNumberSequenceToNumberConverter <TLinkAddress, long>(_links, _numberToAddressConverter));
            _dateTimeToLongRawNumberConverter = new DateTimeToLongRawNumberSequenceConverter <TLinkAddress>(new NumberToLongRawNumberSequenceConverter <long, TLinkAddress>(_links, _addressToNumberConverter));

            // Create converters that are able to convert string to unicode sequence stored as link and back
            var balancedVariantConverter        = new BalancedVariantConverter <TLinkAddress>(_links);
            var unicodeSymbolCriterionMatcher   = new TargetMatcher <TLinkAddress>(_links, _unicodeSymbolMarker);
            var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(_links, _unicodeSequenceMarker);
            var charToUnicodeSymbolConverter    = new CharToUnicodeSymbolConverter <TLinkAddress>(_links, _addressToNumberConverter, _unicodeSymbolMarker);
            var unicodeSymbolToCharConverter    = new UnicodeSymbolToCharConverter <TLinkAddress>(_links, _numberToAddressConverter, unicodeSymbolCriterionMatcher);
            var sequenceWalker = new RightSequenceWalker <TLinkAddress>(_links, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched);

            _stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(_links, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker));
            _unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(_links, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter));
        }
Example #7
0
        public static void BalancedVariantSearchTest()
        {
            const long sequenceLength = 200;

            using (var scope = new TempLinksTestScope(useSequences: true))
            {
                var links     = scope.Links;
                var sequences = scope.Sequences;

                var sequence = new ulong[sequenceLength];
                for (var i = 0; i < sequenceLength; i++)
                {
                    sequence[i] = links.Create();
                }

                var balancedVariantConverter = new BalancedVariantConverter <ulong>(links);

                var sw1             = Stopwatch.StartNew();
                var balancedVariant = balancedVariantConverter.Convert(sequence); sw1.Stop();

                var sw2            = Stopwatch.StartNew();
                var searchResults2 = sequences.GetAllMatchingSequences0(sequence); sw2.Stop();

                var sw3            = Stopwatch.StartNew();
                var searchResults3 = sequences.GetAllMatchingSequences1(sequence); sw3.Stop();

                // На количестве в 200 элементов это будет занимать вечность
                //var sw4 = Stopwatch.StartNew();
                //var searchResults4 = sequences.Each(sequence); sw4.Stop();

                Assert.True(searchResults2.Count == 1 && balancedVariant == searchResults2[0]);

                Assert.True(searchResults3.Count == 1 && balancedVariant == searchResults3.First());

                //Assert.True(sw1.Elapsed < sw2.Elapsed);

                for (var i = 0; i < sequenceLength; i++)
                {
                    links.Delete(sequence[i]);
                }
            }
        }
Example #8
0
        public static void BalancedPartialVariantsSearchTest()
        {
            const long sequenceLength = 200;

            using (var scope = new TempLinksTestScope(useSequences: true))
            {
                var links     = scope.Links;
                var sequences = scope.Sequences;

                var sequence = new ulong[sequenceLength];
                for (var i = 0; i < sequenceLength; i++)
                {
                    sequence[i] = links.Create();
                }

                var balancedVariantConverter = new BalancedVariantConverter <ulong>(links);

                var balancedVariant = balancedVariantConverter.Convert(sequence);

                var partialSequence = new ulong[sequenceLength - 2];

                Array.Copy(sequence, 1, partialSequence, 0, (int)sequenceLength - 2);

                var sw1            = Stopwatch.StartNew();
                var searchResults1 = sequences.GetAllPartiallyMatchingSequences0(partialSequence); sw1.Stop();

                var sw2            = Stopwatch.StartNew();
                var searchResults2 = sequences.GetAllPartiallyMatchingSequences1(partialSequence); sw2.Stop();

                Assert.True(searchResults1.Count == 1 && balancedVariant == searchResults1[0]);

                Assert.True(searchResults2.Count == 1 && balancedVariant == searchResults2.First());

                for (var i = 0; i < sequenceLength; i++)
                {
                    links.Delete(sequence[i]);
                }
            }
        }
Example #9
0
        public PlatformDataBase(string indexFileName, string dataFileName)
        {
            this.indexFileName = indexFileName;
            this.dataFileName  = dataFileName;

            var dataMemory  = new FileMappedResizableDirectMemory(this.dataFileName);
            var indexMemory = new FileMappedResizableDirectMemory(this.indexFileName);

            var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true);

            // Init the links storage
            this._disposableLinks = new UInt32SplitMemoryLinks(dataMemory, indexMemory, UInt32SplitMemoryLinks.DefaultLinksSizeStep, linksConstants); // Low-level logic
            this.links            = new UInt32Links(_disposableLinks);                                                                                // Main logic in the combined decorator

            // Set up constant links (markers, aka mapped links)
            TLinkAddress currentMappingLinkIndex = 1;

            this._meaningRoot           = GerOrCreateMeaningRoot(currentMappingLinkIndex++);
            this._unicodeSymbolMarker   = GetOrCreateNextMapping(currentMappingLinkIndex++);
            this._unicodeSequenceMarker = GetOrCreateNextMapping(currentMappingLinkIndex++);
            this._bookMarker            = GetOrCreateNextMapping(currentMappingLinkIndex++);
            // Create converters that are able to convert link's address (UInt64 value) to a raw number represented with another UInt64 value and back
            this._numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>();
            this._addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>();

            // Create converters that are able to convert string to unicode sequence stored as link and back
            var balancedVariantConverter        = new BalancedVariantConverter <TLinkAddress>(links);
            var unicodeSymbolCriterionMatcher   = new TargetMatcher <TLinkAddress>(links, _unicodeSymbolMarker);
            var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(links, _unicodeSequenceMarker);
            var charToUnicodeSymbolConverter    = new CharToUnicodeSymbolConverter <TLinkAddress>(links, _addressToNumberConverter, _unicodeSymbolMarker);
            var unicodeSymbolToCharConverter    = new UnicodeSymbolToCharConverter <TLinkAddress>(links, _numberToAddressConverter, unicodeSymbolCriterionMatcher);
            var sequenceWalker = new RightSequenceWalker <TLinkAddress>(links, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched);

            this._stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(links, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker));
            this._unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(links, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter));
        }
Example #10
0
        public static void RundomNumbersCompressionQualityTest()
        {
            const ulong N = 500;

            //const ulong minNumbers = 10000;
            //const ulong maxNumbers = 20000;

            //var strings = new List<string>();

            //for (ulong i = 0; i < N; i++)
            //    strings.Add(RandomHelpers.DefaultFactory.NextUInt64(minNumbers, maxNumbers).ToString());

            var strings = new List <string>();

            for (ulong i = 0; i < N; i++)
            {
                strings.Add(RandomHelpers.Default.NextUInt64().ToString());
            }

            strings = strings.Distinct().ToList();

            var arrays          = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray();
            var totalCharacters = arrays.Select(x => x.Length).Sum();

            using (var scope1 = new TempLinksTestScope(useSequences: true, sequencesOptions: new SequencesOptions <ulong> {
                UseCompression = true, EnforceSingleSequenceVersionOnWriteBasedOnExisting = true
            }))
                using (var scope2 = new TempLinksTestScope(useSequences: true))
                {
                    scope1.Links.UseUnicode();
                    scope2.Links.UseUnicode();

                    var compressor1 = scope1.Sequences;
                    var compressor2 = scope2.Sequences;

                    var compressed1 = new ulong[arrays.Length];
                    var compressed2 = new ulong[arrays.Length];

                    var sw1 = Stopwatch.StartNew();

                    var START = 0;
                    var END   = arrays.Length;

                    for (int i = START; i < END; i++)
                    {
                        compressed1[i] = compressor1.Create(arrays[i].ShiftRight());
                    }

                    var elapsed1 = sw1.Elapsed;

                    var balancedVariantConverter = new BalancedVariantConverter <ulong>(scope2.Links);

                    var sw2 = Stopwatch.StartNew();

                    for (int i = START; i < END; i++)
                    {
                        compressed2[i] = balancedVariantConverter.Convert(arrays[i]);
                    }

                    var elapsed2 = sw2.Elapsed;

                    Debug.WriteLine($"Compressor: {elapsed1}, Balanced sequence creator: {elapsed2}");

                    Assert.True(elapsed1 > elapsed2);

                    // Checks
                    for (int i = START; i < END; i++)
                    {
                        var sequence1 = compressed1[i];
                        var sequence2 = compressed2[i];

                        if (sequence1 != _constants.Null && sequence2 != _constants.Null)
                        {
                            var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links);

                            var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links);

                            Assert.True(strings[i] == decompress1 && decompress1 == decompress2);
                        }
                    }

                    Assert.True((int)(scope1.Links.Count() - UnicodeMap.MapSize) < totalCharacters);
                    Assert.True((int)(scope2.Links.Count() - UnicodeMap.MapSize) < totalCharacters);

                    Debug.WriteLine($"{(double)(scope1.Links.Count() - UnicodeMap.MapSize) / totalCharacters} | {(double)(scope2.Links.Count() - UnicodeMap.MapSize) / totalCharacters}");

                    // Can be worse than balanced variant
                    //Assert.True(scope1.Links.Count() <= scope2.Links.Count());

                    //compressor1.ValidateFrequencies();
                }
        }
Example #11
0
        public static void CompressionStabilityTest()
        {
            // TODO: Fix bug (do a separate test)
            //const ulong minNumbers = 0;
            //const ulong maxNumbers = 1000;

            const ulong minNumbers = 10000;
            const ulong maxNumbers = 12500;

            var strings = new List <string>();

            for (ulong i = minNumbers; i < maxNumbers; i++)
            {
                strings.Add(i.ToString());
            }

            var arrays          = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray();
            var totalCharacters = arrays.Select(x => x.Length).Sum();

            using (var scope1 = new TempLinksTestScope(useSequences: true, sequencesOptions: new SequencesOptions <ulong> {
                UseCompression = true, EnforceSingleSequenceVersionOnWriteBasedOnExisting = true
            }))
                using (var scope2 = new TempLinksTestScope(useSequences: true))
                {
                    scope1.Links.UseUnicode();
                    scope2.Links.UseUnicode();

                    //var compressor1 = new Compressor(scope1.Links.Unsync, scope1.Sequences);
                    var compressor1 = scope1.Sequences;
                    var compressor2 = scope2.Sequences;

                    var compressed1 = new ulong[arrays.Length];
                    var compressed2 = new ulong[arrays.Length];

                    var sw1 = Stopwatch.StartNew();

                    var START = 0;
                    var END   = arrays.Length;

                    // Collisions proved (cannot be solved by max doublet comparison, no stable rule)
                    // Stability issue starts at 10001 or 11000
                    //for (int i = START; i < END; i++)
                    //{
                    //    var first = compressor1.Compress(arrays[i]);
                    //    var second = compressor1.Compress(arrays[i]);

                    //    if (first == second)
                    //        compressed1[i] = first;
                    //    else
                    //    {
                    //        // TODO: Find a solution for this case
                    //    }
                    //}

                    for (int i = START; i < END; i++)
                    {
                        var first  = compressor1.Create(arrays[i].ShiftRight());
                        var second = compressor1.Create(arrays[i].ShiftRight());

                        if (first == second)
                        {
                            compressed1[i] = first;
                        }
                        else
                        {
                            // TODO: Find a solution for this case
                        }
                    }

                    var elapsed1 = sw1.Elapsed;

                    var balancedVariantConverter = new BalancedVariantConverter <ulong>(scope2.Links);

                    var sw2 = Stopwatch.StartNew();

                    for (int i = START; i < END; i++)
                    {
                        var first  = balancedVariantConverter.Convert(arrays[i]);
                        var second = balancedVariantConverter.Convert(arrays[i]);

                        if (first == second)
                        {
                            compressed2[i] = first;
                        }
                    }

                    var elapsed2 = sw2.Elapsed;

                    Debug.WriteLine($"Compressor: {elapsed1}, Balanced sequence creator: {elapsed2}");

                    Assert.True(elapsed1 > elapsed2);

                    // Checks
                    for (int i = START; i < END; i++)
                    {
                        var sequence1 = compressed1[i];
                        var sequence2 = compressed2[i];

                        if (sequence1 != _constants.Null && sequence2 != _constants.Null)
                        {
                            var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links);

                            var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links);

                            //var structure1 = scope1.Links.FormatStructure(sequence1, link => link.IsPartialPoint());
                            //var structure2 = scope2.Links.FormatStructure(sequence2, link => link.IsPartialPoint());

                            //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3)
                            //    Assert.False(structure1 == structure2);

                            Assert.True(strings[i] == decompress1 && decompress1 == decompress2);
                        }
                    }

                    Assert.True((int)(scope1.Links.Count() - UnicodeMap.MapSize) < totalCharacters);
                    Assert.True((int)(scope2.Links.Count() - UnicodeMap.MapSize) < totalCharacters);

                    Debug.WriteLine($"{(double)(scope1.Links.Count() - UnicodeMap.MapSize) / totalCharacters} | {(double)(scope2.Links.Count() - UnicodeMap.MapSize) / totalCharacters}");

                    Assert.True(scope1.Links.Count() <= scope2.Links.Count());

                    //compressor1.ValidateFrequencies();
                }
        }
Example #12
0
        public static void CompressionEfficiencyTest()
        {
            var strings         = _exampleLoremIpsumText.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
            var arrays          = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray();
            var totalCharacters = arrays.Select(x => x.Length).Sum();

            using (var scope1 = new TempLinksTestScope(useSequences: true))
                using (var scope2 = new TempLinksTestScope(useSequences: true))
                    using (var scope3 = new TempLinksTestScope(useSequences: true))
                    {
                        scope1.Links.Unsync.UseUnicode();
                        scope2.Links.Unsync.UseUnicode();
                        scope3.Links.Unsync.UseUnicode();

                        var balancedVariantConverter1           = new BalancedVariantConverter <ulong>(scope1.Links.Unsync);
                        var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(scope1.Links.Unsync);
                        var linkFrequenciesCache1 = new LinkFrequenciesCache <ulong>(scope1.Links.Unsync, totalSequenceSymbolFrequencyCounter);
                        var compressor1           = new CompressingConverter <ulong>(scope1.Links.Unsync, balancedVariantConverter1, linkFrequenciesCache1, doInitialFrequenciesIncrement: false);

                        //var compressor2 = scope2.Sequences;
                        var compressor3 = scope3.Sequences;

                        var constants = Default <LinksConstants <ulong> > .Instance;

                        var sequences = compressor3;
                        //var meaningRoot = links.CreatePoint();
                        //var unaryOne = links.CreateAndUpdate(meaningRoot, constants.Itself);
                        //var frequencyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself);
                        //var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself);

                        //var unaryNumberToAddressConverter = new UnaryNumberToAddressAddOperationConverter<ulong>(links, unaryOne);
                        //var unaryNumberIncrementer = new UnaryNumberIncrementer<ulong>(links, unaryOne);
                        //var frequencyIncrementer = new FrequencyIncrementer<ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer);
                        //var frequencyPropertyOperator = new FrequencyPropertyOperator<ulong>(links, frequencyPropertyMarker, frequencyMarker);
                        //var linkFrequencyIncrementer = new LinkFrequencyIncrementer<ulong>(links, frequencyPropertyOperator, frequencyIncrementer);
                        //var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter<ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter);

                        var linkFrequenciesCache3 = new LinkFrequenciesCache <ulong>(scope3.Links.Unsync, totalSequenceSymbolFrequencyCounter);

                        var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache3);

                        var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(scope3.Links.Unsync, linkToItsFrequencyNumberConverter);
                        var optimalVariantConverter = new OptimalVariantConverter <ulong>(scope3.Links.Unsync, sequenceToItsLocalElementLevelsConverter);

                        var compressed1 = new ulong[arrays.Length];
                        var compressed2 = new ulong[arrays.Length];
                        var compressed3 = new ulong[arrays.Length];

                        var START = 0;
                        var END   = arrays.Length;

                        //for (int i = START; i < END; i++)
                        //    linkFrequenciesCache1.IncrementFrequencies(arrays[i]);

                        var initialCount1 = scope2.Links.Unsync.Count();

                        var sw1 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            linkFrequenciesCache1.IncrementFrequencies(arrays[i]);
                            compressed1[i] = compressor1.Convert(arrays[i]);
                        }

                        var elapsed1 = sw1.Elapsed;

                        var balancedVariantConverter2 = new BalancedVariantConverter <ulong>(scope2.Links.Unsync);

                        var initialCount2 = scope2.Links.Unsync.Count();

                        var sw2 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            compressed2[i] = balancedVariantConverter2.Convert(arrays[i]);
                        }

                        var elapsed2 = sw2.Elapsed;

                        for (int i = START; i < END; i++)
                        {
                            linkFrequenciesCache3.IncrementFrequencies(arrays[i]);
                        }

                        var initialCount3 = scope3.Links.Unsync.Count();

                        var sw3 = Stopwatch.StartNew();

                        for (int i = START; i < END; i++)
                        {
                            //linkFrequenciesCache3.IncrementFrequencies(arrays[i]);
                            compressed3[i] = optimalVariantConverter.Convert(arrays[i]);
                        }

                        var elapsed3 = sw3.Elapsed;

                        Console.WriteLine($"Compressor: {elapsed1}, Balanced variant: {elapsed2}, Optimal variant: {elapsed3}");

                        // Assert.True(elapsed1 > elapsed2);

                        // Checks
                        for (int i = START; i < END; i++)
                        {
                            var sequence1 = compressed1[i];
                            var sequence2 = compressed2[i];
                            var sequence3 = compressed3[i];

                            var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links.Unsync);

                            var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links.Unsync);

                            var decompress3 = UnicodeMap.FromSequenceLinkToString(sequence3, scope3.Links.Unsync);

                            var structure1 = scope1.Links.Unsync.FormatStructure(sequence1, link => link.IsPartialPoint());
                            var structure2 = scope2.Links.Unsync.FormatStructure(sequence2, link => link.IsPartialPoint());
                            var structure3 = scope3.Links.Unsync.FormatStructure(sequence3, link => link.IsPartialPoint());

                            //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3)
                            //    Assert.False(structure1 == structure2);
                            //if (sequence3 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3)
                            //    Assert.False(structure3 == structure2);

                            Assert.True(strings[i] == decompress1 && decompress1 == decompress2);
                            Assert.True(strings[i] == decompress3 && decompress3 == decompress2);
                        }

                        Assert.True((int)(scope1.Links.Unsync.Count() - initialCount1) < totalCharacters);
                        Assert.True((int)(scope2.Links.Unsync.Count() - initialCount2) < totalCharacters);
                        Assert.True((int)(scope3.Links.Unsync.Count() - initialCount3) < totalCharacters);

                        Console.WriteLine($"{(double)(scope1.Links.Unsync.Count() - initialCount1) / totalCharacters} | {(double)(scope2.Links.Unsync.Count() - initialCount2) / totalCharacters} | {(double)(scope3.Links.Unsync.Count() - initialCount3) / totalCharacters}");

                        Assert.True(scope1.Links.Unsync.Count() - initialCount1 < scope2.Links.Unsync.Count() - initialCount2);
                        Assert.True(scope3.Links.Unsync.Count() - initialCount3 < scope2.Links.Unsync.Count() - initialCount2);

                        var duplicateProvider1 = new DuplicateSegmentsProvider <ulong>(scope1.Links.Unsync, scope1.Sequences);
                        var duplicateProvider2 = new DuplicateSegmentsProvider <ulong>(scope2.Links.Unsync, scope2.Sequences);
                        var duplicateProvider3 = new DuplicateSegmentsProvider <ulong>(scope3.Links.Unsync, scope3.Sequences);

                        var duplicateCounter1 = new DuplicateSegmentsCounter <ulong>(duplicateProvider1);
                        var duplicateCounter2 = new DuplicateSegmentsCounter <ulong>(duplicateProvider2);
                        var duplicateCounter3 = new DuplicateSegmentsCounter <ulong>(duplicateProvider3);

                        var duplicates1 = duplicateCounter1.Count();

                        ConsoleHelpers.Debug("------");

                        var duplicates2 = duplicateCounter2.Count();

                        ConsoleHelpers.Debug("------");

                        var duplicates3 = duplicateCounter3.Count();

                        Console.WriteLine($"{duplicates1} | {duplicates2} | {duplicates3}");

                        linkFrequenciesCache1.ValidateFrequencies();
                        linkFrequenciesCache3.ValidateFrequencies();
                    }
        }
 public static DefaultXmlStorage <TLinkAddress> CreateXmlStorage(ILinks <TLinkAddress> links)
 {
     BalancedVariantConverter = new(links);
     return(new DefaultXmlStorage <TLinkAddress>(links, BalancedVariantConverter));
 }