public DoubletsDbContext(string dataDBFilename, string indexDBFilename) { var dataMemory = new FileMappedResizableDirectMemory(dataDBFilename); var indexMemory = new FileMappedResizableDirectMemory(indexDBFilename); var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true); // Init the links storage _disposableLinks = new UInt32SplitMemoryLinks(dataMemory, indexMemory, UInt32SplitMemoryLinks.DefaultLinksSizeStep, linksConstants); // Low-level logic _links = new UInt32Links(_disposableLinks); // Main logic in the combined decorator // Set up constant links (markers, aka mapped links) TLinkAddress currentMappingLinkIndex = 1; _meaningRoot = GetOrCreateMeaningRoot(currentMappingLinkIndex++); _unicodeSymbolMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _unicodeSequenceMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _titlePropertyMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _contentPropertyMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _publicationDateTimePropertyMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); _blogPostMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); // Create properties operator that is able to control reading and writing properties for any link (object) _defaultLinkPropertyOperator = new PropertiesOperator <TLinkAddress>(_links); // Create converters that are able to convert link's address (UInt64 value) to a raw number represented with another UInt64 value and back _numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>(); _addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>(); // Create converters for dates _longRawNumberToDateTimeConverter = new LongRawNumberSequenceToDateTimeConverter <TLinkAddress>(new LongRawNumberSequenceToNumberConverter <TLinkAddress, long>(_links, _numberToAddressConverter)); _dateTimeToLongRawNumberConverter = new DateTimeToLongRawNumberSequenceConverter <TLinkAddress>(new NumberToLongRawNumberSequenceConverter <long, TLinkAddress>(_links, _addressToNumberConverter)); // Create converters that are able to convert string to unicode sequence stored as link and back var balancedVariantConverter = new BalancedVariantConverter <TLinkAddress>(_links); var unicodeSymbolCriterionMatcher = new TargetMatcher <TLinkAddress>(_links, _unicodeSymbolMarker); var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(_links, _unicodeSequenceMarker); var charToUnicodeSymbolConverter = new CharToUnicodeSymbolConverter <TLinkAddress>(_links, _addressToNumberConverter, _unicodeSymbolMarker); var unicodeSymbolToCharConverter = new UnicodeSymbolToCharConverter <TLinkAddress>(_links, _numberToAddressConverter, unicodeSymbolCriterionMatcher); var sequenceWalker = new RightSequenceWalker <TLinkAddress>(_links, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched); _stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(_links, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker)); _unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(_links, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter)); }
public Patterns(string sourceImagePath) { _sourceImagePath = Path.GetFullPath(sourceImagePath); _image = new MagickImage(sourceImagePath); _pixels = _image.GetPixels(); _linksPath = Path.ChangeExtension(_sourceImagePath, ".links"); var memory = new HeapResizableDirectMemory(); //new FileMappedResizableDirectMemory(_linksPath); var constants = new LinksConstants <ulong>(enableExternalReferencesSupport: true); _links = new UInt64Links(new UInt64UnitedMemoryLinks(memory, UInt64UnitedMemoryLinks.DefaultLinksSizeStep, constants, Platform.Data.Doublets.Memory.IndexTreeType.SizedAndThreadedAVLBalancedTree)); _addressToRawNumberConverter = new AddressToRawNumberConverter <ulong>(); _rawNumberToAddressConverter = new RawNumberToAddressConverter <ulong>(); _totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(_links); _linkFrequenciesCache = new LinkFrequenciesCache <ulong>(_links, _totalSequenceSymbolFrequencyCounter); _index = new CachedFrequencyIncrementingSequenceIndex <ulong>(_linkFrequenciesCache); _linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(_linkFrequenciesCache); _sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(_links, _linkToItsFrequencyNumberConverter); _optimalVariantConverter = new OptimalVariantConverter <ulong>(_links, _sequenceToItsLocalElementLevelsConverter); }
public PlatformDataBase(string indexFileName, string dataFileName) { this.indexFileName = indexFileName; this.dataFileName = dataFileName; var dataMemory = new FileMappedResizableDirectMemory(this.dataFileName); var indexMemory = new FileMappedResizableDirectMemory(this.indexFileName); var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true); // Init the links storage this._disposableLinks = new UInt32SplitMemoryLinks(dataMemory, indexMemory, UInt32SplitMemoryLinks.DefaultLinksSizeStep, linksConstants); // Low-level logic this.links = new UInt32Links(_disposableLinks); // Main logic in the combined decorator // Set up constant links (markers, aka mapped links) TLinkAddress currentMappingLinkIndex = 1; this._meaningRoot = GerOrCreateMeaningRoot(currentMappingLinkIndex++); this._unicodeSymbolMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); this._unicodeSequenceMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); this._bookMarker = GetOrCreateNextMapping(currentMappingLinkIndex++); // Create converters that are able to convert link's address (UInt64 value) to a raw number represented with another UInt64 value and back this._numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>(); this._addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>(); // Create converters that are able to convert string to unicode sequence stored as link and back var balancedVariantConverter = new BalancedVariantConverter <TLinkAddress>(links); var unicodeSymbolCriterionMatcher = new TargetMatcher <TLinkAddress>(links, _unicodeSymbolMarker); var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(links, _unicodeSequenceMarker); var charToUnicodeSymbolConverter = new CharToUnicodeSymbolConverter <TLinkAddress>(links, _addressToNumberConverter, _unicodeSymbolMarker); var unicodeSymbolToCharConverter = new UnicodeSymbolToCharConverter <TLinkAddress>(links, _numberToAddressConverter, unicodeSymbolCriterionMatcher); var sequenceWalker = new RightSequenceWalker <TLinkAddress>(links, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched); this._stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(links, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker)); this._unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(links, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter)); }
public static void SavedSequencesOptimizationTest() { LinksConstants <ulong> constants = new LinksConstants <ulong>((1, long.MaxValue), (long.MaxValue + 1UL, ulong.MaxValue)); using (var memory = new HeapResizableDirectMemory()) using (var disposableLinks = new UInt64UnitedMemoryLinks(memory, UInt64UnitedMemoryLinks.DefaultLinksSizeStep, constants, IndexTreeType.Default)) { var links = new UInt64Links(disposableLinks); var root = links.CreatePoint(); //var numberToAddressConverter = new RawNumberToAddressConverter<ulong>(); var addressToNumberConverter = new AddressToRawNumberConverter <ulong>(); var unicodeSymbolMarker = links.GetOrCreate(root, addressToNumberConverter.Convert(1)); var unicodeSequenceMarker = links.GetOrCreate(root, addressToNumberConverter.Convert(2)); var totalSequenceSymbolFrequencyCounter = new TotalSequenceSymbolFrequencyCounter <ulong>(links); var linkFrequenciesCache = new LinkFrequenciesCache <ulong>(links, totalSequenceSymbolFrequencyCounter); var index = new CachedFrequencyIncrementingSequenceIndex <ulong>(linkFrequenciesCache); var linkToItsFrequencyNumberConverter = new FrequenciesCacheBasedLinkToItsFrequencyNumberConverter <ulong>(linkFrequenciesCache); var sequenceToItsLocalElementLevelsConverter = new SequenceToItsLocalElementLevelsConverter <ulong>(links, linkToItsFrequencyNumberConverter); var optimalVariantConverter = new OptimalVariantConverter <ulong>(links, sequenceToItsLocalElementLevelsConverter); var walker = new RightSequenceWalker <ulong>(links, new DefaultStack <ulong>(), (link) => constants.IsExternalReference(link) || links.IsPartialPoint(link)); var unicodeSequencesOptions = new SequencesOptions <ulong>() { UseSequenceMarker = true, SequenceMarkerLink = unicodeSequenceMarker, UseIndex = true, Index = index, LinksToSequenceConverter = optimalVariantConverter, Walker = walker, UseGarbageCollection = true }; var unicodeSequences = new Sequences.Sequences(new SynchronizedLinks <ulong>(links), unicodeSequencesOptions); // Create some sequences var strings = _loremIpsumExample.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var arrays = strings.Select(x => x.Select(y => addressToNumberConverter.Convert(y)).ToArray()).ToArray(); for (int i = 0; i < arrays.Length; i++) { unicodeSequences.Create(arrays[i].ShiftRight()); } var linksCountAfterCreation = links.Count(); // get list of sequences links // for each sequence link // create new sequence version // if new sequence is not the same as sequence link // delete sequence link // collect garbadge unicodeSequences.CompactAll(); var linksCountAfterCompactification = links.Count(); Assert.True(linksCountAfterCompactification < linksCountAfterCreation); } }
static void Main(string[] args) { var constants = new LinksConstants <ulong>((1, long.MaxValue), (long.MaxValue + 1UL, ulong.MaxValue)); using var memory = new UInt64ResizableDirectMemoryLinks(new HeapResizableDirectMemory()); var links = memory.DecorateWithAutomaticUniquenessAndUsagesResolution(); var addressToRawNumberConverter = new AddressToRawNumberConverter <ulong>(); var rawNumberToAddressConverter = new RawNumberToAddressConverter <ulong>(); var root = links.GetOrCreate(1UL, 1UL); var unicodeSymbolMarker = links.GetOrCreate(root, addressToRawNumberConverter.Convert(1)); var patternRootMarker = links.GetOrCreate(root, addressToRawNumberConverter.Convert(2)); var charToUnicodeSymbolConverter = new Platform.Data.Doublets.Unicode.CharToUnicodeSymbolConverter <ulong>(links, addressToRawNumberConverter, unicodeSymbolMarker); var strings = new[] { "href", "target", "rel", "media", "hreflang", "type", "sizes", "content", "name", "src", "charset", "text", "cite", "ping", "alt", "sandbox", "width", "height", "data", "value", "poster", "coords", "shape", "scope", "action", "enctype", "method", "accept", "max", "min", "pattern", "placeholder", "step", "label", "wrap", "icon", "radiogroup" }; var patternRootMarkerArray = new[] { patternRootMarker }; var sequences = strings.Select((s, i) => patternRootMarkerArray.Concat(BuildSequence(s, i, links, addressToRawNumberConverter, charToUnicodeSymbolConverter)).Concat(patternRootMarkerArray).ToArray()).ToArray(); var index = new SequenceIndex <ulong>(links); var any = links.Constants.Any; var @continue = links.Constants.Continue; for (int i = 0; i < sequences.Length; i++) { index.Add(sequences[i]); } var chars = new Dictionary <ulong, char>(); links.Each(linkParts => { var link = new UInt64Link(linkParts); if (link.Target == unicodeSymbolMarker) { var symbol = (char)rawNumberToAddressConverter.Convert(link.Source); chars.Add(link.Index, symbol); Console.WriteLine($"({link.Index}: '{symbol}'->{link.Target})"); } else { var sourceString = LinkToString(links, constants, link.Source, chars, rawNumberToAddressConverter); var targetString = LinkToString(links, constants, link.Target, chars, rawNumberToAddressConverter); Console.WriteLine($"({link.Index}: {sourceString}->{targetString})"); } return(@continue); }, new UInt64Link(any, any, any)); StringBuilder sb = new StringBuilder(); sb.Append('^'); AppendPattern(links, constants, patternRootMarker, patternRootMarker, chars, any, @continue, sb, 0UL, rawNumberToAddressConverter); sb.Append('$'); var result = sb.ToString(); var simplificationRegex = new Regex(@"\(([a-z\?]*)\)", RegexOptions.Compiled); while (simplificationRegex.IsMatch(result)) { result = simplificationRegex.Replace(result, "$1"); } // (|t) // t? var simplificationRegex2 = new Regex(@"\(\|([a-z])\)", RegexOptions.Compiled); while (simplificationRegex2.IsMatch(result)) { result = simplificationRegex2.Replace(result, "$1?"); } // Repeat while (simplificationRegex.IsMatch(result)) { result = simplificationRegex.Replace(result, "$1"); } var regex = new Regex(result); for (int i = 0; i < strings.Length; i++) { if (!regex.IsMatch(strings[i])) { Console.WriteLine($"Error: {strings[i]} does not match the pattern."); } } Console.WriteLine(result); Console.WriteLine(links.Count()); Console.WriteLine("Hello World!"); }
private static ulong[] BuildSequence(string s, int i, ILinks <ulong> links, AddressToRawNumberConverter <ulong> addressToRawNumberConverter, Platform.Data.Doublets.Unicode.CharToUnicodeSymbolConverter <ulong> charToUnicodeSymbolConverter) { var result = s.Select((c, i) => BuiltCharacterPosition(links, addressToRawNumberConverter, charToUnicodeSymbolConverter, c, i)).ToArray(); return(result); }