public void InitOptions(ISynchronizedLinks <TLink> links) { if (UseSequenceMarker) { if (_equalityComparer.Equals(SequenceMarkerLink, links.Constants.Null)) { SequenceMarkerLink = links.CreatePoint(); } else { if (!links.Exists(SequenceMarkerLink)) { var link = links.CreatePoint(); if (!_equalityComparer.Equals(link, SequenceMarkerLink)) { throw new InvalidOperationException("Cannot recreate sequence marker link."); } } } if (MarkedSequenceMatcher == null) { MarkedSequenceMatcher = new MarkedSequenceCriterionMatcher <TLink>(links, SequenceMarkerLink); } } var balancedVariantConverter = new BalancedVariantConverter <TLink>(links); if (UseCompression) { if (LinksToSequenceConverter == null) { ICounter <TLink, TLink> totalSequenceSymbolFrequencyCounter; if (UseSequenceMarker) { totalSequenceSymbolFrequencyCounter = new TotalMarkedSequenceSymbolFrequencyCounter <TLink>(links, MarkedSequenceMatcher); } else { totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <TLink>(links); } var doubletFrequenciesCache = new LinkFrequenciesCache <TLink>(links, totalSequenceSymbolFrequencyCounter); var compressingConverter = new CompressingConverter <TLink>(links, balancedVariantConverter, doubletFrequenciesCache); LinksToSequenceConverter = compressingConverter; } } else { if (LinksToSequenceConverter == null) { LinksToSequenceConverter = balancedVariantConverter; } } if (UseIndex && Index == null) { Index = new SequenceIndex <TLink>(links); } if (Walker == null) { Walker = new RightSequenceWalker <TLink>(links, new DefaultStack <TLink>()); } }
public static void CompressionTest() { using (var scope = new TempLinksTestScope(useSequences: true)) { var links = scope.Links; var sequences = scope.Sequences; var e1 = links.Create(); var e2 = links.Create(); var sequence = new[] { e1, e2, e1, e2 // mama / papa / template [(m/p), a] { [1] [2] [1] [2] } }; var balancedVariantConverter = new BalancedVariantConverter <ulong>(links.Unsync); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(links.Unsync); var doubletFrequenciesCache = new LinkFrequenciesCache <ulong>(links.Unsync, totalSequenceSymbolFrequencyCounter); var compressingConverter = new CompressingConverter <ulong>(links.Unsync, balancedVariantConverter, doubletFrequenciesCache); var compressedVariant = compressingConverter.Convert(sequence); // 1: [1] (1->1) point // 2: [2] (2->2) point // 3: [1,2] (1->2) doublet // 4: [1,2,1,2] (3->3) doublet Assert.True(links.GetSource(links.GetSource(compressedVariant)) == sequence[0]); Assert.True(links.GetTarget(links.GetSource(compressedVariant)) == sequence[1]); Assert.True(links.GetSource(links.GetTarget(compressedVariant)) == sequence[2]); Assert.True(links.GetTarget(links.GetTarget(compressedVariant)) == sequence[3]); var source = _constants.SourcePart; var target = _constants.TargetPart; Assert.True(links.GetByKeys(compressedVariant, source, source) == sequence[0]); Assert.True(links.GetByKeys(compressedVariant, source, target) == sequence[1]); Assert.True(links.GetByKeys(compressedVariant, target, source) == sequence[2]); Assert.True(links.GetByKeys(compressedVariant, target, target) == sequence[3]); // 4 - length of sequence Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 0) == sequence[0]); Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 1) == sequence[1]); Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 2) == sequence[2]); Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 3) == sequence[3]); } }
public static void CompressionEfficiencyTest() { var strings = _exampleLoremIpsumText.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var arrays = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray(); var totalCharacters = arrays.Select(x => x.Length).Sum(); using (var scope1 = new TempLinksTestScope(useSequences: true)) using (var scope2 = new TempLinksTestScope(useSequences: true)) using (var scope3 = new TempLinksTestScope(useSequences: true)) { scope1.Links.Unsync.UseUnicode(); scope2.Links.Unsync.UseUnicode(); scope3.Links.Unsync.UseUnicode(); var balancedVariantConverter1 = new BalancedVariantConverter <ulong>(scope1.Links.Unsync); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(scope1.Links.Unsync); var linkFrequenciesCache1 = new LinkFrequenciesCache <ulong>(scope1.Links.Unsync, totalSequenceSymbolFrequencyCounter); var compressor1 = new CompressingConverter <ulong>(scope1.Links.Unsync, balancedVariantConverter1, linkFrequenciesCache1, doInitialFrequenciesIncrement: false); //var compressor2 = scope2.Sequences; var compressor3 = scope3.Sequences; var constants = Default <LinksConstants <ulong> > .Instance; var sequences = compressor3; //var meaningRoot = links.CreatePoint(); //var unaryOne = links.CreateAndUpdate(meaningRoot, constants.Itself); //var frequencyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); //var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); //var unaryNumberToAddressConverter = new UnaryNumberToAddressAddOperationConverter<ulong>(links, unaryOne); //var unaryNumberIncrementer = new UnaryNumberIncrementer<ulong>(links, unaryOne); //var frequencyIncrementer = new FrequencyIncrementer<ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer); //var frequencyPropertyOperator = new FrequencyPropertyOperator<ulong>(links, frequencyPropertyMarker, frequencyMarker); //var linkFrequencyIncrementer = new LinkFrequencyIncrementer<ulong>(links, frequencyPropertyOperator, frequencyIncrementer); //var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter<ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter); var linkFrequenciesCache3 = new LinkFrequenciesCache <ulong>(scope3.Links.Unsync, totalSequenceSymbolFrequencyCounter); var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache3); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(scope3.Links.Unsync, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(scope3.Links.Unsync, sequenceToItsLocalElementLevelsConverter); var compressed1 = new ulong[arrays.Length]; var compressed2 = new ulong[arrays.Length]; var compressed3 = new ulong[arrays.Length]; var START = 0; var END = arrays.Length; //for (int i = START; i < END; i++) // linkFrequenciesCache1.IncrementFrequencies(arrays[i]); var initialCount1 = scope2.Links.Unsync.Count(); var sw1 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { linkFrequenciesCache1.IncrementFrequencies(arrays[i]); compressed1[i] = compressor1.Convert(arrays[i]); } var elapsed1 = sw1.Elapsed; var balancedVariantConverter2 = new BalancedVariantConverter <ulong>(scope2.Links.Unsync); var initialCount2 = scope2.Links.Unsync.Count(); var sw2 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { compressed2[i] = balancedVariantConverter2.Convert(arrays[i]); } var elapsed2 = sw2.Elapsed; for (int i = START; i < END; i++) { linkFrequenciesCache3.IncrementFrequencies(arrays[i]); } var initialCount3 = scope3.Links.Unsync.Count(); var sw3 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { //linkFrequenciesCache3.IncrementFrequencies(arrays[i]); compressed3[i] = optimalVariantConverter.Convert(arrays[i]); } var elapsed3 = sw3.Elapsed; Console.WriteLine($"Compressor: {elapsed1}, Balanced variant: {elapsed2}, Optimal variant: {elapsed3}"); // Assert.True(elapsed1 > elapsed2); // Checks for (int i = START; i < END; i++) { var sequence1 = compressed1[i]; var sequence2 = compressed2[i]; var sequence3 = compressed3[i]; var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links.Unsync); var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links.Unsync); var decompress3 = UnicodeMap.FromSequenceLinkToString(sequence3, scope3.Links.Unsync); var structure1 = scope1.Links.Unsync.FormatStructure(sequence1, link => link.IsPartialPoint()); var structure2 = scope2.Links.Unsync.FormatStructure(sequence2, link => link.IsPartialPoint()); var structure3 = scope3.Links.Unsync.FormatStructure(sequence3, link => link.IsPartialPoint()); //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure1 == structure2); //if (sequence3 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure3 == structure2); Assert.True(strings[i] == decompress1 && decompress1 == decompress2); Assert.True(strings[i] == decompress3 && decompress3 == decompress2); } Assert.True((int)(scope1.Links.Unsync.Count() - initialCount1) < totalCharacters); Assert.True((int)(scope2.Links.Unsync.Count() - initialCount2) < totalCharacters); Assert.True((int)(scope3.Links.Unsync.Count() - initialCount3) < totalCharacters); Console.WriteLine($"{(double)(scope1.Links.Unsync.Count() - initialCount1) / totalCharacters} | {(double)(scope2.Links.Unsync.Count() - initialCount2) / totalCharacters} | {(double)(scope3.Links.Unsync.Count() - initialCount3) / totalCharacters}"); Assert.True(scope1.Links.Unsync.Count() - initialCount1 < scope2.Links.Unsync.Count() - initialCount2); Assert.True(scope3.Links.Unsync.Count() - initialCount3 < scope2.Links.Unsync.Count() - initialCount2); var duplicateProvider1 = new DuplicateSegmentsProvider <ulong>(scope1.Links.Unsync, scope1.Sequences); var duplicateProvider2 = new DuplicateSegmentsProvider <ulong>(scope2.Links.Unsync, scope2.Sequences); var duplicateProvider3 = new DuplicateSegmentsProvider <ulong>(scope3.Links.Unsync, scope3.Sequences); var duplicateCounter1 = new DuplicateSegmentsCounter <ulong>(duplicateProvider1); var duplicateCounter2 = new DuplicateSegmentsCounter <ulong>(duplicateProvider2); var duplicateCounter3 = new DuplicateSegmentsCounter <ulong>(duplicateProvider3); var duplicates1 = duplicateCounter1.Count(); ConsoleHelpers.Debug("------"); var duplicates2 = duplicateCounter2.Count(); ConsoleHelpers.Debug("------"); var duplicates3 = duplicateCounter3.Count(); Console.WriteLine($"{duplicates1} | {duplicates2} | {duplicates3}"); linkFrequenciesCache1.ValidateFrequencies(); linkFrequenciesCache3.ValidateFrequencies(); } }