public static void DictionaryBasedFrequencyStoredOptimalVariantSequenceTest() { using (var scope = new TempLinksTestScope(useSequences: false)) { var links = scope.Links; links.UseUnicode(); var sequence = UnicodeMap.FromStringToLinkArray(_sequenceExample); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(links); var linkFrequenciesCache = new LinkFrequenciesCache <ulong>(links, totalSequenceSymbolFrequencyCounter); var index = new CachedFrequencyIncrementingSequenceIndex <ulong>(linkFrequenciesCache); var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(links, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(links, sequenceToItsLocalElementLevelsConverter); var sequences = new Sequences.Sequences(links, new SequencesOptions <ulong>() { Walker = new LeveledSequenceWalker <ulong>(links) }); ExecuteTest(sequences, sequence, sequenceToItsLocalElementLevelsConverter, index, optimalVariantConverter); } }
public static void LinksBasedFrequencyStoredOptimalVariantSequenceTest() { using (var scope = new TempLinksTestScope(useSequences: false)) { var links = scope.Links; var constants = links.Constants; links.UseUnicode(); var sequence = UnicodeMap.FromStringToLinkArray(_sequenceExample); var meaningRoot = links.CreatePoint(); var unaryOne = links.CreateAndUpdate(meaningRoot, constants.Itself); var frequencyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); var unaryNumberToAddressConverter = new UnaryNumberToAddressAddOperationConverter <ulong>(links, unaryOne); var unaryNumberIncrementer = new UnaryNumberIncrementer <ulong>(links, unaryOne); var frequencyIncrementer = new FrequencyIncrementer <ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer); var frequencyPropertyOperator = new PropertyOperator <ulong>(links, frequencyPropertyMarker, frequencyMarker); var index = new FrequencyIncrementingSequenceIndex <ulong>(links, frequencyPropertyOperator, frequencyIncrementer); var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter <ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(links, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(links, sequenceToItsLocalElementLevelsConverter); var sequences = new Sequences.Sequences(links, new SequencesOptions <ulong>() { Walker = new LeveledSequenceWalker <ulong>(links) }); ExecuteTest(sequences, sequence, sequenceToItsLocalElementLevelsConverter, index, optimalVariantConverter); } }
public static void StringAndUnicodeSequenceConvertersTest() { using (var scope = new TempLinksTestScope()) { var links = scope.Links; var itself = links.Constants.Itself; var meaningRoot = links.CreatePoint(); var unaryOne = links.CreateAndUpdate(meaningRoot, itself); var unicodeSymbolMarker = links.CreateAndUpdate(meaningRoot, itself); var unicodeSequenceMarker = links.CreateAndUpdate(meaningRoot, itself); var frequencyMarker = links.CreateAndUpdate(meaningRoot, itself); var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, itself); var powerOf2ToUnaryNumberConverter = new PowerOf2ToUnaryNumberConverter <ulong>(links, unaryOne); var addressToUnaryNumberConverter = new AddressToUnaryNumberConverter <ulong>(links, powerOf2ToUnaryNumberConverter); var charToUnicodeSymbolConverter = new CharToUnicodeSymbolConverter <ulong>(links, addressToUnaryNumberConverter, unicodeSymbolMarker); var unaryNumberToAddressConverter = new UnaryNumberToAddressOrOperationConverter <ulong>(links, powerOf2ToUnaryNumberConverter); var unaryNumberIncrementer = new UnaryNumberIncrementer <ulong>(links, unaryOne); var frequencyIncrementer = new FrequencyIncrementer <ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer); var frequencyPropertyOperator = new PropertyOperator <ulong>(links, frequencyPropertyMarker, frequencyMarker); var index = new FrequencyIncrementingSequenceIndex <ulong>(links, frequencyPropertyOperator, frequencyIncrementer); var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter <ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(links, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(links, sequenceToItsLocalElementLevelsConverter); var stringToUnicodeSequenceConverter = new StringToUnicodeSequenceConverter <ulong>(links, charToUnicodeSymbolConverter, index, optimalVariantConverter, unicodeSequenceMarker); var originalString = "Hello"; var unicodeSequenceLink = stringToUnicodeSequenceConverter.Convert(originalString); var unicodeSymbolCriterionMatcher = new TargetMatcher <ulong>(links, unicodeSymbolMarker); var unicodeSymbolToCharConverter = new UnicodeSymbolToCharConverter <ulong>(links, unaryNumberToAddressConverter, unicodeSymbolCriterionMatcher); var unicodeSequenceCriterionMatcher = new TargetMatcher <ulong>(links, unicodeSequenceMarker); var sequenceWalker = new LeveledSequenceWalker <ulong>(links, unicodeSymbolCriterionMatcher.IsMatched); var unicodeSequenceToStringConverter = new UnicodeSequenceToStringConverter <ulong>(links, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter); var resultingString = unicodeSequenceToStringConverter.Convert(unicodeSequenceLink); Assert.Equal(originalString, resultingString); } }
public Patterns(string sourceImagePath) { _sourceImagePath = Path.GetFullPath(sourceImagePath); _image = new MagickImage(sourceImagePath); _pixels = _image.GetPixels(); _linksPath = Path.ChangeExtension(_sourceImagePath, ".links"); var memory = new HeapResizableDirectMemory(); //new FileMappedResizableDirectMemory(_linksPath); var constants = new LinksConstants <ulong>(enableExternalReferencesSupport: true); _links = new UInt64Links(new UInt64UnitedMemoryLinks(memory, UInt64UnitedMemoryLinks.DefaultLinksSizeStep, constants, Platform.Data.Doublets.Memory.IndexTreeType.SizedAndThreadedAVLBalancedTree)); _addressToRawNumberConverter = new AddressToRawNumberConverter <ulong>(); _rawNumberToAddressConverter = new RawNumberToAddressConverter <ulong>(); _totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(_links); _linkFrequenciesCache = new LinkFrequenciesCache <ulong>(_links, _totalSequenceSymbolFrequencyCounter); _index = new CachedFrequencyIncrementingSequenceIndex <ulong>(_linkFrequenciesCache); _linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(_linkFrequenciesCache); _sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(_links, _linkToItsFrequencyNumberConverter); _optimalVariantConverter = new OptimalVariantConverter <ulong>(_links, _sequenceToItsLocalElementLevelsConverter); }
public static void SavedSequencesOptimizationTest() { LinksConstants <ulong> constants = new LinksConstants <ulong>((1, long.MaxValue), (long.MaxValue + 1UL, ulong.MaxValue)); using (var memory = new HeapResizableDirectMemory()) using (var disposableLinks = new UInt64UnitedMemoryLinks(memory, UInt64UnitedMemoryLinks.DefaultLinksSizeStep, constants, IndexTreeType.Default)) { var links = new UInt64Links(disposableLinks); var root = links.CreatePoint(); //var numberToAddressConverter = new RawNumberToAddressConverter<ulong>(); var addressToNumberConverter = new AddressToRawNumberConverter <ulong>(); var unicodeSymbolMarker = links.GetOrCreate(root, addressToNumberConverter.Convert(1)); var unicodeSequenceMarker = links.GetOrCreate(root, addressToNumberConverter.Convert(2)); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(links); var linkFrequenciesCache = new LinkFrequenciesCache <ulong>(links, totalSequenceSymbolFrequencyCounter); var index = new CachedFrequencyIncrementingSequenceIndex <ulong>(linkFrequenciesCache); var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(links, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(links, sequenceToItsLocalElementLevelsConverter); var walker = new RightSequenceWalker <ulong>(links, new DefaultStack <ulong>(), (link) => constants.IsExternalReference(link) || links.IsPartialPoint(link)); var unicodeSequencesOptions = new SequencesOptions <ulong>() { UseSequenceMarker = true, SequenceMarkerLink = unicodeSequenceMarker, UseIndex = true, Index = index, LinksToSequenceConverter = optimalVariantConverter, Walker = walker, UseGarbageCollection = true }; var unicodeSequences = new Sequences.Sequences(new SynchronizedLinks <ulong>(links), unicodeSequencesOptions); // Create some sequences var strings = _loremIpsumExample.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var arrays = strings.Select(x => x.Select(y => addressToNumberConverter.Convert(y)).ToArray()).ToArray(); for (int i = 0; i < arrays.Length; i++) { unicodeSequences.Create(arrays[i].ShiftRight()); } var linksCountAfterCreation = links.Count(); // get list of sequences links // for each sequence link // create new sequence version // if new sequence is not the same as sequence link // delete sequence link // collect garbadge unicodeSequences.CompactAll(); var linksCountAfterCompactification = links.Count(); Assert.True(linksCountAfterCompactification < linksCountAfterCreation); } }
private static void ExecuteTest(Sequences.Sequences sequences, ulong[] sequence, SequenceToItsLocalElementLevelsConverter <ulong> sequenceToItsLocalElementLevelsConverter, ISequenceIndex <ulong> index, OptimalVariantConverter <ulong> optimalVariantConverter) { index.Add(sequence); var optimalVariant = optimalVariantConverter.Convert(sequence); var readSequence1 = sequences.ToList(optimalVariant); Assert.True(sequence.SequenceEqual(readSequence1)); }
public static void CompressionEfficiencyTest() { var strings = _exampleLoremIpsumText.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var arrays = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray(); var totalCharacters = arrays.Select(x => x.Length).Sum(); using (var scope1 = new TempLinksTestScope(useSequences: true)) using (var scope2 = new TempLinksTestScope(useSequences: true)) using (var scope3 = new TempLinksTestScope(useSequences: true)) { scope1.Links.Unsync.UseUnicode(); scope2.Links.Unsync.UseUnicode(); scope3.Links.Unsync.UseUnicode(); var balancedVariantConverter1 = new BalancedVariantConverter <ulong>(scope1.Links.Unsync); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(scope1.Links.Unsync); var linkFrequenciesCache1 = new LinkFrequenciesCache <ulong>(scope1.Links.Unsync, totalSequenceSymbolFrequencyCounter); var compressor1 = new CompressingConverter <ulong>(scope1.Links.Unsync, balancedVariantConverter1, linkFrequenciesCache1, doInitialFrequenciesIncrement: false); //var compressor2 = scope2.Sequences; var compressor3 = scope3.Sequences; var constants = Default <LinksConstants <ulong> > .Instance; var sequences = compressor3; //var meaningRoot = links.CreatePoint(); //var unaryOne = links.CreateAndUpdate(meaningRoot, constants.Itself); //var frequencyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); //var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); //var unaryNumberToAddressConverter = new UnaryNumberToAddressAddOperationConverter<ulong>(links, unaryOne); //var unaryNumberIncrementer = new UnaryNumberIncrementer<ulong>(links, unaryOne); //var frequencyIncrementer = new FrequencyIncrementer<ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer); //var frequencyPropertyOperator = new FrequencyPropertyOperator<ulong>(links, frequencyPropertyMarker, frequencyMarker); //var linkFrequencyIncrementer = new LinkFrequencyIncrementer<ulong>(links, frequencyPropertyOperator, frequencyIncrementer); //var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter<ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter); var linkFrequenciesCache3 = new LinkFrequenciesCache <ulong>(scope3.Links.Unsync, totalSequenceSymbolFrequencyCounter); var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache3); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(scope3.Links.Unsync, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(scope3.Links.Unsync, sequenceToItsLocalElementLevelsConverter); var compressed1 = new ulong[arrays.Length]; var compressed2 = new ulong[arrays.Length]; var compressed3 = new ulong[arrays.Length]; var START = 0; var END = arrays.Length; //for (int i = START; i < END; i++) // linkFrequenciesCache1.IncrementFrequencies(arrays[i]); var initialCount1 = scope2.Links.Unsync.Count(); var sw1 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { linkFrequenciesCache1.IncrementFrequencies(arrays[i]); compressed1[i] = compressor1.Convert(arrays[i]); } var elapsed1 = sw1.Elapsed; var balancedVariantConverter2 = new BalancedVariantConverter <ulong>(scope2.Links.Unsync); var initialCount2 = scope2.Links.Unsync.Count(); var sw2 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { compressed2[i] = balancedVariantConverter2.Convert(arrays[i]); } var elapsed2 = sw2.Elapsed; for (int i = START; i < END; i++) { linkFrequenciesCache3.IncrementFrequencies(arrays[i]); } var initialCount3 = scope3.Links.Unsync.Count(); var sw3 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { //linkFrequenciesCache3.IncrementFrequencies(arrays[i]); compressed3[i] = optimalVariantConverter.Convert(arrays[i]); } var elapsed3 = sw3.Elapsed; Console.WriteLine($"Compressor: {elapsed1}, Balanced variant: {elapsed2}, Optimal variant: {elapsed3}"); // Assert.True(elapsed1 > elapsed2); // Checks for (int i = START; i < END; i++) { var sequence1 = compressed1[i]; var sequence2 = compressed2[i]; var sequence3 = compressed3[i]; var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links.Unsync); var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links.Unsync); var decompress3 = UnicodeMap.FromSequenceLinkToString(sequence3, scope3.Links.Unsync); var structure1 = scope1.Links.Unsync.FormatStructure(sequence1, link => link.IsPartialPoint()); var structure2 = scope2.Links.Unsync.FormatStructure(sequence2, link => link.IsPartialPoint()); var structure3 = scope3.Links.Unsync.FormatStructure(sequence3, link => link.IsPartialPoint()); //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure1 == structure2); //if (sequence3 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure3 == structure2); Assert.True(strings[i] == decompress1 && decompress1 == decompress2); Assert.True(strings[i] == decompress3 && decompress3 == decompress2); } Assert.True((int)(scope1.Links.Unsync.Count() - initialCount1) < totalCharacters); Assert.True((int)(scope2.Links.Unsync.Count() - initialCount2) < totalCharacters); Assert.True((int)(scope3.Links.Unsync.Count() - initialCount3) < totalCharacters); Console.WriteLine($"{(double)(scope1.Links.Unsync.Count() - initialCount1) / totalCharacters} | {(double)(scope2.Links.Unsync.Count() - initialCount2) / totalCharacters} | {(double)(scope3.Links.Unsync.Count() - initialCount3) / totalCharacters}"); Assert.True(scope1.Links.Unsync.Count() - initialCount1 < scope2.Links.Unsync.Count() - initialCount2); Assert.True(scope3.Links.Unsync.Count() - initialCount3 < scope2.Links.Unsync.Count() - initialCount2); var duplicateProvider1 = new DuplicateSegmentsProvider <ulong>(scope1.Links.Unsync, scope1.Sequences); var duplicateProvider2 = new DuplicateSegmentsProvider <ulong>(scope2.Links.Unsync, scope2.Sequences); var duplicateProvider3 = new DuplicateSegmentsProvider <ulong>(scope3.Links.Unsync, scope3.Sequences); var duplicateCounter1 = new DuplicateSegmentsCounter <ulong>(duplicateProvider1); var duplicateCounter2 = new DuplicateSegmentsCounter <ulong>(duplicateProvider2); var duplicateCounter3 = new DuplicateSegmentsCounter <ulong>(duplicateProvider3); var duplicates1 = duplicateCounter1.Count(); ConsoleHelpers.Debug("------"); var duplicates2 = duplicateCounter2.Count(); ConsoleHelpers.Debug("------"); var duplicates3 = duplicateCounter3.Count(); Console.WriteLine($"{duplicates1} | {duplicates2} | {duplicates3}"); linkFrequenciesCache1.ValidateFrequencies(); linkFrequenciesCache3.ValidateFrequencies(); } }