public void InitOptions(ISynchronizedLinks <TLink> links) { if (UseSequenceMarker) { if (_equalityComparer.Equals(SequenceMarkerLink, links.Constants.Null)) { SequenceMarkerLink = links.CreatePoint(); } else { if (!links.Exists(SequenceMarkerLink)) { var link = links.CreatePoint(); if (!_equalityComparer.Equals(link, SequenceMarkerLink)) { throw new InvalidOperationException("Cannot recreate sequence marker link."); } } } if (MarkedSequenceMatcher == null) { MarkedSequenceMatcher = new MarkedSequenceCriterionMatcher <TLink>(links, SequenceMarkerLink); } } var balancedVariantConverter = new BalancedVariantConverter <TLink>(links); if (UseCompression) { if (LinksToSequenceConverter == null) { ICounter <TLink, TLink> totalSequenceSymbolFrequencyCounter; if (UseSequenceMarker) { totalSequenceSymbolFrequencyCounter = new TotalMarkedSequenceSymbolFrequencyCounter <TLink>(links, MarkedSequenceMatcher); } else { totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <TLink>(links); } var doubletFrequenciesCache = new LinkFrequenciesCache <TLink>(links, totalSequenceSymbolFrequencyCounter); var compressingConverter = new CompressingConverter <TLink>(links, balancedVariantConverter, doubletFrequenciesCache); LinksToSequenceConverter = compressingConverter; } } else { if (LinksToSequenceConverter == null) { LinksToSequenceConverter = balancedVariantConverter; } } if (UseIndex && Index == null) { Index = new SequenceIndex <TLink>(links); } if (Walker == null) { Walker = new RightSequenceWalker <TLink>(links, new DefaultStack <TLink>()); } }
/// <summary> /// <para> /// Initializes a new <see cref="FileStorage"/> instance. /// </para> /// <para></para> /// </summary> /// <param name="DBFilename"> /// <para>A db filename.</para> /// <para></para> /// </param> public FileStorage(string DBFilename) { var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true); var dataMemory = new FileMappedResizableDirectMemory(DBFilename); _disposableLinks = new UnitedMemoryLinks <TLinkAddress>(dataMemory, UnitedMemoryLinks <UInt64> .DefaultLinksSizeStep, linksConstants, IndexTreeType.Default); _synchronizedLinks = new SynchronizedLinks <TLinkAddress>(_disposableLinks); var link = _synchronizedLinks.Create(); link = _synchronizedLinks.Update(link, newSource: link, newTarget: link); ushort currentMappingLinkIndex = 1; Any = _synchronizedLinks.Constants.Any; _meaningRoot = GetOrCreateMeaningRoot(currentMappingLinkIndex++); _unicodeSymbolMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _unicodeSequenceMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _setMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _fileMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>(); _numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>(); var balancedVariantConverter = new BalancedVariantConverter <TLinkAddress>(_synchronizedLinks); var unicodeSymbolCriterionMatcher = new TargetMatcher <TLinkAddress>(_synchronizedLinks, _unicodeSymbolMarker); var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(_synchronizedLinks, _unicodeSequenceMarker); var charToUnicodeSymbolConverter = new CharToUnicodeSymbolConverter <TLinkAddress>(_synchronizedLinks, _addressToNumberConverter, _unicodeSymbolMarker); var unicodeSymbolToCharConverter = new UnicodeSymbolToCharConverter <TLinkAddress>(_synchronizedLinks, _numberToAddressConverter, unicodeSymbolCriterionMatcher); var sequenceWalker = new RightSequenceWalker <TLinkAddress>(_synchronizedLinks, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched); _stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(_synchronizedLinks, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker)); _unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(_synchronizedLinks, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter)); }
public static void PatternMatchTest() { var zeroOrMany = Sequences.Sequences.ZeroOrMany; using (var scope = new TempLinksTestScope(useSequences: true)) { var links = scope.Links; var sequences = scope.Sequences; var e1 = links.Create(); var e2 = links.Create(); var sequence = new[] { e1, e2, e1, e2 // mama / papa }; var balancedVariantConverter = new BalancedVariantConverter <ulong>(links); var balancedVariant = balancedVariantConverter.Convert(sequence); // 1: [1] // 2: [2] // 3: [1,2] // 4: [1,2,1,2] var doublet = links.GetSource(balancedVariant); var matchedSequences1 = sequences.MatchPattern(e2, e1, zeroOrMany); Assert.True(matchedSequences1.Count == 0); var matchedSequences2 = sequences.MatchPattern(zeroOrMany, e2, e1); Assert.True(matchedSequences2.Count == 0); var matchedSequences3 = sequences.MatchPattern(e1, zeroOrMany, e1); Assert.True(matchedSequences3.Count == 0); var matchedSequences4 = sequences.MatchPattern(e1, zeroOrMany, e2); Assert.Contains(doublet, matchedSequences4); Assert.Contains(balancedVariant, matchedSequences4); for (var i = 0; i < sequence.Length; i++) { links.Delete(sequence[i]); } } }
public static void ReadSequenceTest() { const long sequenceLength = 2000; using (var scope = new TempLinksTestScope(useSequences: false)) { var links = scope.Links; var sequences = new Sequences.Sequences(links, new SequencesOptions <ulong> { Walker = new LeveledSequenceWalker <ulong>(links) }); var sequence = new ulong[sequenceLength]; for (var i = 0; i < sequenceLength; i++) { sequence[i] = links.Create(); } var balancedVariantConverter = new BalancedVariantConverter <ulong>(links); var sw1 = Stopwatch.StartNew(); var balancedVariant = balancedVariantConverter.Convert(sequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var readSequence1 = sequences.ToList(balancedVariant); sw2.Stop(); var sw3 = Stopwatch.StartNew(); var readSequence2 = new List <ulong>(); SequenceWalker.WalkRight(balancedVariant, links.GetSource, links.GetTarget, links.IsPartialPoint, readSequence2.Add); sw3.Stop(); Assert.True(sequence.SequenceEqual(readSequence1)); Assert.True(sequence.SequenceEqual(readSequence2)); // Assert.True(sw2.Elapsed < sw3.Elapsed); Console.WriteLine($"Stack-based walker: {sw3.Elapsed}, Level-based reader: {sw2.Elapsed}"); for (var i = 0; i < sequenceLength; i++) { links.Delete(sequence[i]); } } }
public static void CompressionTest() { using (var scope = new TempLinksTestScope(useSequences: true)) { var links = scope.Links; var sequences = scope.Sequences; var e1 = links.Create(); var e2 = links.Create(); var sequence = new[] { e1, e2, e1, e2 // mama / papa / template [(m/p), a] { [1] [2] [1] [2] } }; var balancedVariantConverter = new BalancedVariantConverter <ulong>(links.Unsync); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(links.Unsync); var doubletFrequenciesCache = new LinkFrequenciesCache <ulong>(links.Unsync, totalSequenceSymbolFrequencyCounter); var compressingConverter = new CompressingConverter <ulong>(links.Unsync, balancedVariantConverter, doubletFrequenciesCache); var compressedVariant = compressingConverter.Convert(sequence); // 1: [1] (1->1) point // 2: [2] (2->2) point // 3: [1,2] (1->2) doublet // 4: [1,2,1,2] (3->3) doublet Assert.True(links.GetSource(links.GetSource(compressedVariant)) == sequence[0]); Assert.True(links.GetTarget(links.GetSource(compressedVariant)) == sequence[1]); Assert.True(links.GetSource(links.GetTarget(compressedVariant)) == sequence[2]); Assert.True(links.GetTarget(links.GetTarget(compressedVariant)) == sequence[3]); var source = _constants.SourcePart; var target = _constants.TargetPart; Assert.True(links.GetByKeys(compressedVariant, source, source) == sequence[0]); Assert.True(links.GetByKeys(compressedVariant, source, target) == sequence[1]); Assert.True(links.GetByKeys(compressedVariant, target, source) == sequence[2]); Assert.True(links.GetByKeys(compressedVariant, target, target) == sequence[3]); // 4 - length of sequence Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 0) == sequence[0]); Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 1) == sequence[1]); Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 2) == sequence[2]); Assert.True(links.GetSquareMatrixSequenceElementByIndex(compressedVariant, 4, 3) == sequence[3]); } }
public DoubletsDbContext(string dataDBFilename, string indexDBFilename) { var dataMemory = new FileMappedResizableDirectMemory(dataDBFilename); var indexMemory = new FileMappedResizableDirectMemory(indexDBFilename); var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true); // Init the links storage _disposableLinks = new UInt32SplitMemoryLinks(dataMemory, indexMemory, UInt32SplitMemoryLinks.DefaultLinksSizeStep, linksConstants); // Low-level logic _links = new UInt32Links(_disposableLinks); // Main logic in the combined decorator // Set up constant links (markers, aka mapped links) TLinkAddress currentMappingLinkIndex = 1; _meaningRoot = GetOrCreateMeaningRoot(currentMappingLinkIndex++); _unicodeSymbolMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _unicodeSequenceMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _titlePropertyMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _contentPropertyMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _publicationDateTimePropertyMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _blogPostMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); // Create properties operator that is able to control reading and writing properties for any link (object) _defaultLinkPropertyOperator = new PropertiesOperator <TLinkAddress>(_links); // Create converters that are able to convert link's address (UInt64 value) to a raw number represented with another UInt64 value and back _numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>(); _addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>(); // Create converters for dates _longRawNumberToDateTimeConverter = new LongRawNumberSequenceToDateTimeConverter <TLinkAddress>(new LongRawNumberSequenceToNumberConverter <TLinkAddress, long>(_links, _numberToAddressConverter)); _dateTimeToLongRawNumberConverter = new DateTimeToLongRawNumberSequenceConverter <TLinkAddress>(new NumberToLongRawNumberSequenceConverter <long, TLinkAddress>(_links, _addressToNumberConverter)); // Create converters that are able to convert string to unicode sequence stored as link and back var balancedVariantConverter = new BalancedVariantConverter <TLinkAddress>(_links); var unicodeSymbolCriterionMatcher = new TargetMatcher <TLinkAddress>(_links, _unicodeSymbolMarker); var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(_links, _unicodeSequenceMarker); var charToUnicodeSymbolConverter = new CharToUnicodeSymbolConverter <TLinkAddress>(_links, _addressToNumberConverter, _unicodeSymbolMarker); var unicodeSymbolToCharConverter = new UnicodeSymbolToCharConverter <TLinkAddress>(_links, _numberToAddressConverter, unicodeSymbolCriterionMatcher); var sequenceWalker = new RightSequenceWalker <TLinkAddress>(_links, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched); _stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(_links, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker)); _unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(_links, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter)); }
public static void BalancedVariantSearchTest() { const long sequenceLength = 200; using (var scope = new TempLinksTestScope(useSequences: true)) { var links = scope.Links; var sequences = scope.Sequences; var sequence = new ulong[sequenceLength]; for (var i = 0; i < sequenceLength; i++) { sequence[i] = links.Create(); } var balancedVariantConverter = new BalancedVariantConverter <ulong>(links); var sw1 = Stopwatch.StartNew(); var balancedVariant = balancedVariantConverter.Convert(sequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.GetAllMatchingSequences0(sequence); sw2.Stop(); var sw3 = Stopwatch.StartNew(); var searchResults3 = sequences.GetAllMatchingSequences1(sequence); sw3.Stop(); // На количестве в 200 элементов это будет занимать вечность //var sw4 = Stopwatch.StartNew(); //var searchResults4 = sequences.Each(sequence); sw4.Stop(); Assert.True(searchResults2.Count == 1 && balancedVariant == searchResults2[0]); Assert.True(searchResults3.Count == 1 && balancedVariant == searchResults3.First()); //Assert.True(sw1.Elapsed < sw2.Elapsed); for (var i = 0; i < sequenceLength; i++) { links.Delete(sequence[i]); } } }
public static void BalancedPartialVariantsSearchTest() { const long sequenceLength = 200; using (var scope = new TempLinksTestScope(useSequences: true)) { var links = scope.Links; var sequences = scope.Sequences; var sequence = new ulong[sequenceLength]; for (var i = 0; i < sequenceLength; i++) { sequence[i] = links.Create(); } var balancedVariantConverter = new BalancedVariantConverter <ulong>(links); var balancedVariant = balancedVariantConverter.Convert(sequence); var partialSequence = new ulong[sequenceLength - 2]; Array.Copy(sequence, 1, partialSequence, 0, (int)sequenceLength - 2); var sw1 = Stopwatch.StartNew(); var searchResults1 = sequences.GetAllPartiallyMatchingSequences0(partialSequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.GetAllPartiallyMatchingSequences1(partialSequence); sw2.Stop(); Assert.True(searchResults1.Count == 1 && balancedVariant == searchResults1[0]); Assert.True(searchResults2.Count == 1 && balancedVariant == searchResults2.First()); for (var i = 0; i < sequenceLength; i++) { links.Delete(sequence[i]); } } }
public PlatformDataBase(string indexFileName, string dataFileName) { this.indexFileName = indexFileName; this.dataFileName = dataFileName; var dataMemory = new FileMappedResizableDirectMemory(this.dataFileName); var indexMemory = new FileMappedResizableDirectMemory(this.indexFileName); var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true); // Init the links storage this._disposableLinks = new UInt32SplitMemoryLinks(dataMemory, indexMemory, UInt32SplitMemoryLinks.DefaultLinksSizeStep, linksConstants); // Low-level logic this.links = new UInt32Links(_disposableLinks); // Main logic in the combined decorator // Set up constant links (markers, aka mapped links) TLinkAddress currentMappingLinkIndex = 1; this._meaningRoot = GerOrCreateMeaningRoot(currentMappingLinkIndex++); this._unicodeSymbolMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); this._unicodeSequenceMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); this._bookMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); // Create converters that are able to convert link's address (UInt64 value) to a raw number represented with another UInt64 value and back this._numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>(); this._addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>(); // Create converters that are able to convert string to unicode sequence stored as link and back var balancedVariantConverter = new BalancedVariantConverter <TLinkAddress>(links); var unicodeSymbolCriterionMatcher = new TargetMatcher <TLinkAddress>(links, _unicodeSymbolMarker); var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(links, _unicodeSequenceMarker); var charToUnicodeSymbolConverter = new CharToUnicodeSymbolConverter <TLinkAddress>(links, _addressToNumberConverter, _unicodeSymbolMarker); var unicodeSymbolToCharConverter = new UnicodeSymbolToCharConverter <TLinkAddress>(links, _numberToAddressConverter, unicodeSymbolCriterionMatcher); var sequenceWalker = new RightSequenceWalker <TLinkAddress>(links, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched); this._stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(links, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker)); this._unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(links, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter)); }
public static void RundomNumbersCompressionQualityTest() { const ulong N = 500; //const ulong minNumbers = 10000; //const ulong maxNumbers = 20000; //var strings = new List<string>(); //for (ulong i = 0; i < N; i++) // strings.Add(RandomHelpers.DefaultFactory.NextUInt64(minNumbers, maxNumbers).ToString()); var strings = new List <string>(); for (ulong i = 0; i < N; i++) { strings.Add(RandomHelpers.Default.NextUInt64().ToString()); } strings = strings.Distinct().ToList(); var arrays = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray(); var totalCharacters = arrays.Select(x => x.Length).Sum(); using (var scope1 = new TempLinksTestScope(useSequences: true, sequencesOptions: new SequencesOptions <ulong> { UseCompression = true, EnforceSingleSequenceVersionOnWriteBasedOnExisting = true })) using (var scope2 = new TempLinksTestScope(useSequences: true)) { scope1.Links.UseUnicode(); scope2.Links.UseUnicode(); var compressor1 = scope1.Sequences; var compressor2 = scope2.Sequences; var compressed1 = new ulong[arrays.Length]; var compressed2 = new ulong[arrays.Length]; var sw1 = Stopwatch.StartNew(); var START = 0; var END = arrays.Length; for (int i = START; i < END; i++) { compressed1[i] = compressor1.Create(arrays[i].ShiftRight()); } var elapsed1 = sw1.Elapsed; var balancedVariantConverter = new BalancedVariantConverter <ulong>(scope2.Links); var sw2 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { compressed2[i] = balancedVariantConverter.Convert(arrays[i]); } var elapsed2 = sw2.Elapsed; Debug.WriteLine($"Compressor: {elapsed1}, Balanced sequence creator: {elapsed2}"); Assert.True(elapsed1 > elapsed2); // Checks for (int i = START; i < END; i++) { var sequence1 = compressed1[i]; var sequence2 = compressed2[i]; if (sequence1 != _constants.Null && sequence2 != _constants.Null) { var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links); var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links); Assert.True(strings[i] == decompress1 && decompress1 == decompress2); } } Assert.True((int)(scope1.Links.Count() - UnicodeMap.MapSize) < totalCharacters); Assert.True((int)(scope2.Links.Count() - UnicodeMap.MapSize) < totalCharacters); Debug.WriteLine($"{(double)(scope1.Links.Count() - UnicodeMap.MapSize) / totalCharacters} | {(double)(scope2.Links.Count() - UnicodeMap.MapSize) / totalCharacters}"); // Can be worse than balanced variant //Assert.True(scope1.Links.Count() <= scope2.Links.Count()); //compressor1.ValidateFrequencies(); } }
public static void CompressionStabilityTest() { // TODO: Fix bug (do a separate test) //const ulong minNumbers = 0; //const ulong maxNumbers = 1000; const ulong minNumbers = 10000; const ulong maxNumbers = 12500; var strings = new List <string>(); for (ulong i = minNumbers; i < maxNumbers; i++) { strings.Add(i.ToString()); } var arrays = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray(); var totalCharacters = arrays.Select(x => x.Length).Sum(); using (var scope1 = new TempLinksTestScope(useSequences: true, sequencesOptions: new SequencesOptions <ulong> { UseCompression = true, EnforceSingleSequenceVersionOnWriteBasedOnExisting = true })) using (var scope2 = new TempLinksTestScope(useSequences: true)) { scope1.Links.UseUnicode(); scope2.Links.UseUnicode(); //var compressor1 = new Compressor(scope1.Links.Unsync, scope1.Sequences); var compressor1 = scope1.Sequences; var compressor2 = scope2.Sequences; var compressed1 = new ulong[arrays.Length]; var compressed2 = new ulong[arrays.Length]; var sw1 = Stopwatch.StartNew(); var START = 0; var END = arrays.Length; // Collisions proved (cannot be solved by max doublet comparison, no stable rule) // Stability issue starts at 10001 or 11000 //for (int i = START; i < END; i++) //{ // var first = compressor1.Compress(arrays[i]); // var second = compressor1.Compress(arrays[i]); // if (first == second) // compressed1[i] = first; // else // { // // TODO: Find a solution for this case // } //} for (int i = START; i < END; i++) { var first = compressor1.Create(arrays[i].ShiftRight()); var second = compressor1.Create(arrays[i].ShiftRight()); if (first == second) { compressed1[i] = first; } else { // TODO: Find a solution for this case } } var elapsed1 = sw1.Elapsed; var balancedVariantConverter = new BalancedVariantConverter <ulong>(scope2.Links); var sw2 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { var first = balancedVariantConverter.Convert(arrays[i]); var second = balancedVariantConverter.Convert(arrays[i]); if (first == second) { compressed2[i] = first; } } var elapsed2 = sw2.Elapsed; Debug.WriteLine($"Compressor: {elapsed1}, Balanced sequence creator: {elapsed2}"); Assert.True(elapsed1 > elapsed2); // Checks for (int i = START; i < END; i++) { var sequence1 = compressed1[i]; var sequence2 = compressed2[i]; if (sequence1 != _constants.Null && sequence2 != _constants.Null) { var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links); var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links); //var structure1 = scope1.Links.FormatStructure(sequence1, link => link.IsPartialPoint()); //var structure2 = scope2.Links.FormatStructure(sequence2, link => link.IsPartialPoint()); //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure1 == structure2); Assert.True(strings[i] == decompress1 && decompress1 == decompress2); } } Assert.True((int)(scope1.Links.Count() - UnicodeMap.MapSize) < totalCharacters); Assert.True((int)(scope2.Links.Count() - UnicodeMap.MapSize) < totalCharacters); Debug.WriteLine($"{(double)(scope1.Links.Count() - UnicodeMap.MapSize) / totalCharacters} | {(double)(scope2.Links.Count() - UnicodeMap.MapSize) / totalCharacters}"); Assert.True(scope1.Links.Count() <= scope2.Links.Count()); //compressor1.ValidateFrequencies(); } }
public static void CompressionEfficiencyTest() { var strings = _exampleLoremIpsumText.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var arrays = strings.Select(UnicodeMap.FromStringToLinkArray).ToArray(); var totalCharacters = arrays.Select(x => x.Length).Sum(); using (var scope1 = new TempLinksTestScope(useSequences: true)) using (var scope2 = new TempLinksTestScope(useSequences: true)) using (var scope3 = new TempLinksTestScope(useSequences: true)) { scope1.Links.Unsync.UseUnicode(); scope2.Links.Unsync.UseUnicode(); scope3.Links.Unsync.UseUnicode(); var balancedVariantConverter1 = new BalancedVariantConverter <ulong>(scope1.Links.Unsync); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(scope1.Links.Unsync); var linkFrequenciesCache1 = new LinkFrequenciesCache <ulong>(scope1.Links.Unsync, totalSequenceSymbolFrequencyCounter); var compressor1 = new CompressingConverter <ulong>(scope1.Links.Unsync, balancedVariantConverter1, linkFrequenciesCache1, doInitialFrequenciesIncrement: false); //var compressor2 = scope2.Sequences; var compressor3 = scope3.Sequences; var constants = Default <LinksConstants <ulong> > .Instance; var sequences = compressor3; //var meaningRoot = links.CreatePoint(); //var unaryOne = links.CreateAndUpdate(meaningRoot, constants.Itself); //var frequencyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); //var frequencyPropertyMarker = links.CreateAndUpdate(meaningRoot, constants.Itself); //var unaryNumberToAddressConverter = new UnaryNumberToAddressAddOperationConverter<ulong>(links, unaryOne); //var unaryNumberIncrementer = new UnaryNumberIncrementer<ulong>(links, unaryOne); //var frequencyIncrementer = new FrequencyIncrementer<ulong>(links, frequencyMarker, unaryOne, unaryNumberIncrementer); //var frequencyPropertyOperator = new FrequencyPropertyOperator<ulong>(links, frequencyPropertyMarker, frequencyMarker); //var linkFrequencyIncrementer = new LinkFrequencyIncrementer<ulong>(links, frequencyPropertyOperator, frequencyIncrementer); //var linkToItsFrequencyNumberConverter = new LinkToItsFrequencyNumberConveter<ulong>(links, frequencyPropertyOperator, unaryNumberToAddressConverter); var linkFrequenciesCache3 = new LinkFrequenciesCache <ulong>(scope3.Links.Unsync, totalSequenceSymbolFrequencyCounter); var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache3); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(scope3.Links.Unsync, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(scope3.Links.Unsync, sequenceToItsLocalElementLevelsConverter); var compressed1 = new ulong[arrays.Length]; var compressed2 = new ulong[arrays.Length]; var compressed3 = new ulong[arrays.Length]; var START = 0; var END = arrays.Length; //for (int i = START; i < END; i++) // linkFrequenciesCache1.IncrementFrequencies(arrays[i]); var initialCount1 = scope2.Links.Unsync.Count(); var sw1 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { linkFrequenciesCache1.IncrementFrequencies(arrays[i]); compressed1[i] = compressor1.Convert(arrays[i]); } var elapsed1 = sw1.Elapsed; var balancedVariantConverter2 = new BalancedVariantConverter <ulong>(scope2.Links.Unsync); var initialCount2 = scope2.Links.Unsync.Count(); var sw2 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { compressed2[i] = balancedVariantConverter2.Convert(arrays[i]); } var elapsed2 = sw2.Elapsed; for (int i = START; i < END; i++) { linkFrequenciesCache3.IncrementFrequencies(arrays[i]); } var initialCount3 = scope3.Links.Unsync.Count(); var sw3 = Stopwatch.StartNew(); for (int i = START; i < END; i++) { //linkFrequenciesCache3.IncrementFrequencies(arrays[i]); compressed3[i] = optimalVariantConverter.Convert(arrays[i]); } var elapsed3 = sw3.Elapsed; Console.WriteLine($"Compressor: {elapsed1}, Balanced variant: {elapsed2}, Optimal variant: {elapsed3}"); // Assert.True(elapsed1 > elapsed2); // Checks for (int i = START; i < END; i++) { var sequence1 = compressed1[i]; var sequence2 = compressed2[i]; var sequence3 = compressed3[i]; var decompress1 = UnicodeMap.FromSequenceLinkToString(sequence1, scope1.Links.Unsync); var decompress2 = UnicodeMap.FromSequenceLinkToString(sequence2, scope2.Links.Unsync); var decompress3 = UnicodeMap.FromSequenceLinkToString(sequence3, scope3.Links.Unsync); var structure1 = scope1.Links.Unsync.FormatStructure(sequence1, link => link.IsPartialPoint()); var structure2 = scope2.Links.Unsync.FormatStructure(sequence2, link => link.IsPartialPoint()); var structure3 = scope3.Links.Unsync.FormatStructure(sequence3, link => link.IsPartialPoint()); //if (sequence1 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure1 == structure2); //if (sequence3 != Constants.Null && sequence2 != Constants.Null && arrays[i].Length > 3) // Assert.False(structure3 == structure2); Assert.True(strings[i] == decompress1 && decompress1 == decompress2); Assert.True(strings[i] == decompress3 && decompress3 == decompress2); } Assert.True((int)(scope1.Links.Unsync.Count() - initialCount1) < totalCharacters); Assert.True((int)(scope2.Links.Unsync.Count() - initialCount2) < totalCharacters); Assert.True((int)(scope3.Links.Unsync.Count() - initialCount3) < totalCharacters); Console.WriteLine($"{(double)(scope1.Links.Unsync.Count() - initialCount1) / totalCharacters} | {(double)(scope2.Links.Unsync.Count() - initialCount2) / totalCharacters} | {(double)(scope3.Links.Unsync.Count() - initialCount3) / totalCharacters}"); Assert.True(scope1.Links.Unsync.Count() - initialCount1 < scope2.Links.Unsync.Count() - initialCount2); Assert.True(scope3.Links.Unsync.Count() - initialCount3 < scope2.Links.Unsync.Count() - initialCount2); var duplicateProvider1 = new DuplicateSegmentsProvider <ulong>(scope1.Links.Unsync, scope1.Sequences); var duplicateProvider2 = new DuplicateSegmentsProvider <ulong>(scope2.Links.Unsync, scope2.Sequences); var duplicateProvider3 = new DuplicateSegmentsProvider <ulong>(scope3.Links.Unsync, scope3.Sequences); var duplicateCounter1 = new DuplicateSegmentsCounter <ulong>(duplicateProvider1); var duplicateCounter2 = new DuplicateSegmentsCounter <ulong>(duplicateProvider2); var duplicateCounter3 = new DuplicateSegmentsCounter <ulong>(duplicateProvider3); var duplicates1 = duplicateCounter1.Count(); ConsoleHelpers.Debug("------"); var duplicates2 = duplicateCounter2.Count(); ConsoleHelpers.Debug("------"); var duplicates3 = duplicateCounter3.Count(); Console.WriteLine($"{duplicates1} | {duplicates2} | {duplicates3}"); linkFrequenciesCache1.ValidateFrequencies(); linkFrequenciesCache3.ValidateFrequencies(); } }
public static DefaultXmlStorage <TLinkAddress> CreateXmlStorage(ILinks <TLinkAddress> links) { BalancedVariantConverter = new(links); return(new DefaultXmlStorage <TLinkAddress>(links, BalancedVariantConverter)); }