Exemplo n.º 1
0
        /// <summary>
        /// <para>
        /// Initializes a new <see cref="FileStorage"/> instance.
        /// </para>
        /// <para></para>
        /// </summary>
        /// <param name="DBFilename">
        /// <para>A db filename.</para>
        /// <para></para>
        /// </param>
        public FileStorage(string DBFilename)
        {
            var linksConstants = new LinksConstants <TLinkAddress>(enableExternalReferencesSupport: true);
            var dataMemory     = new FileMappedResizableDirectMemory(DBFilename);

            _disposableLinks   = new UnitedMemoryLinks <TLinkAddress>(dataMemory, UnitedMemoryLinks <UInt64> .DefaultLinksSizeStep, linksConstants, IndexTreeType.Default);
            _synchronizedLinks = new SynchronizedLinks <TLinkAddress>(_disposableLinks);
            var link = _synchronizedLinks.Create();

            link = _synchronizedLinks.Update(link, newSource: link, newTarget: link);
            ushort currentMappingLinkIndex = 1;

            Any                       = _synchronizedLinks.Constants.Any;
            _meaningRoot              = GetOrCreateMeaningRoot(currentMappingLinkIndex++);
            _unicodeSymbolMarker      = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _unicodeSequenceMarker    = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _setMarker                = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _fileMarker               = GetOrCreateNextMapping(currentMappingLinkIndex++);
            _addressToNumberConverter = new AddressToRawNumberConverter <TLinkAddress>();
            _numberToAddressConverter = new RawNumberToAddressConverter <TLinkAddress>();
            var balancedVariantConverter        = new BalancedVariantConverter <TLinkAddress>(_synchronizedLinks);
            var unicodeSymbolCriterionMatcher   = new TargetMatcher <TLinkAddress>(_synchronizedLinks, _unicodeSymbolMarker);
            var unicodeSequenceCriterionMatcher = new TargetMatcher <TLinkAddress>(_synchronizedLinks, _unicodeSequenceMarker);
            var charToUnicodeSymbolConverter    = new CharToUnicodeSymbolConverter <TLinkAddress>(_synchronizedLinks, _addressToNumberConverter, _unicodeSymbolMarker);
            var unicodeSymbolToCharConverter    = new UnicodeSymbolToCharConverter <TLinkAddress>(_synchronizedLinks, _numberToAddressConverter, unicodeSymbolCriterionMatcher);
            var sequenceWalker = new RightSequenceWalker <TLinkAddress>(_synchronizedLinks, new DefaultStack <TLinkAddress>(), unicodeSymbolCriterionMatcher.IsMatched);

            _stringToUnicodeSequenceConverter = new CachingConverterDecorator <string, TLinkAddress>(new StringToUnicodeSequenceConverter <TLinkAddress>(_synchronizedLinks, charToUnicodeSymbolConverter, balancedVariantConverter, _unicodeSequenceMarker));
            _unicodeSequenceToStringConverter = new CachingConverterDecorator <TLinkAddress, string>(new UnicodeSequenceToStringConverter <TLinkAddress>(_synchronizedLinks, unicodeSequenceCriterionMatcher, sequenceWalker, unicodeSymbolToCharConverter));
        }
Exemplo n.º 2
0
        public void Run(string[] args)
        {
            var linksFile = ConsoleHelpers.GetOrReadArgument(0, "Links file", args);
            var fileToIndex = ConsoleHelpers.GetOrReadArgument(1, "File to index", args);

            if (!File.Exists(linksFile))
                Console.WriteLine("Entered links file does not exists.");
            else if (!File.Exists(fileToIndex))
                Console.WriteLine("Entered file to index does not exists.");
            else
            {
                var cancellationSource = ConsoleHelpers.HandleCancellation();

                using (var memoryManager = new UInt64LinksMemoryManager(linksFile, UInt64LinksMemoryManager.DefaultLinksSizeStep * 16))
                using (var links = new UInt64Links(memoryManager))
                {
                    var syncLinks = new SynchronizedLinks<ulong>(links);
                    UnicodeMap.InitNew(syncLinks);
                    var sequences = new Sequences(syncLinks);

                    var fileIndexer = new FileIndexer(syncLinks, sequences);

                    //fileIndexer.IndexAsync(fileToIndex, cancellationSource.Token).Wait();
                    fileIndexer.IndexSync(fileToIndex, cancellationSource.Token);
                }
            }

            ConsoleHelpers.PressAnyKeyToContinue();
        }
Exemplo n.º 3
0
 public Sequences(SynchronizedLinks <LinkIndex> links, SequencesOptions <LinkIndex> options)
 {
     Links   = links;
     _sync   = links.SyncRoot;
     Options = options;
     Options.ValidateOptions();
     Options.InitOptions(Links);
     Constants = links.Constants;
 }
        public TempLinksTestScope(SequencesOptions <ulong> sequencesOptions, bool deleteFiles = true, bool useSequences = false, bool useLog = false)
        {
            _deleteFiles = deleteFiles;
            TempFilename = Path.GetTempFileName();
            TempTransactionLogFilename = Path.GetTempFileName();
            //var coreMemoryAdapter = new UInt64UnitedMemoryLinks(TempFilename);
            var coreMemoryAdapter = new UInt64SplitMemoryLinks(new FileMappedResizableDirectMemory(TempFilename), new FileMappedResizableDirectMemory(Path.ChangeExtension(TempFilename, "indexes")), UInt64SplitMemoryLinks.DefaultLinksSizeStep, new LinksConstants <ulong>(), Memory.IndexTreeType.Default, useLinkedList: true);

            MemoryAdapter = useLog ? (ILinks <ulong>) new UInt64LinksTransactionsLayer(coreMemoryAdapter, TempTransactionLogFilename) : coreMemoryAdapter;
            Links         = new SynchronizedLinks <ulong>(new UInt64Links(MemoryAdapter));
            if (useSequences)
            {
                Sequences = new Sequences.Sequences(Links, sequencesOptions);
            }
        }
Exemplo n.º 5
0
 public Sequences(SynchronizedLinks <LinkIndex> links) : this(links, new SequencesOptions <LinkIndex>())
 {
 }
Exemplo n.º 6
0
        public static void Test(string filename)
        {
            //try
            {
                using (var memoryManager = new UInt64LinksMemoryManager(filename, 512 * 1024 * 1024))
                using (var links = new UInt64Links(memoryManager))
                {
                    var syncLinks = new SynchronizedLinks<ulong>(links);
                    //links.EnterTransaction();

                    var link = memoryManager.AllocateLink();
                    memoryManager.FreeLink(link);

                    Console.ReadKey();

                    var temp1 = syncLinks.Create();
                    var temp2 = syncLinks.Create();
                    var temp3 = syncLinks.CreateAndUpdate(temp1, temp2);
                    var temp4 = syncLinks.CreateAndUpdate(temp1, temp3);
                    var temp5 = syncLinks.CreateAndUpdate(temp4, temp2);

                    //links.Delete(links.GetSource(temp2), links.GetTarget(temp2));

                    //links.Each(0, temp2, x => links.PrintLink(x));

                    syncLinks.Each(syncLinks.Constants.Any, syncLinks.Constants.Any, x =>
                    {
                        memoryManager.PrintLink(x);
                        return true;
                    });

                    //links.ExportSourcesTree(filename + ".gexf");

                    Console.WriteLine("---");

                    Console.WriteLine(syncLinks.Count());

                    var sequences = new Sequences(syncLinks);

                    //var seq = sequences.Create(temp1, temp5, temp2, temp1, temp2); //, temp5);

                    var sequence = sequences.Create(temp1, temp5, temp2, temp1, temp2, temp3, temp2, temp4, temp1,
                        temp5);
                    //, temp5);

                    //links.Each(0, 0, (x, isAPoint) => { links.PrintLink(x); return true; });

                    //sequences.Each((x, isAPoint) => { links.PrintLink(x); return true; }, temp1, temp5, temp2, temp1, temp2, temp3, temp2, temp4, temp1, temp5);


                    var sequencesCount = 0;

                    sequences.Each(x =>
                    {
                        sequencesCount++;
                        return true;
                    }, temp1, temp5, temp2, temp1, temp2, temp3, temp2, temp4, temp1, temp5);

                    sequences.Compact(temp1, temp5, temp2, temp1, temp2, temp3, temp2, temp4, temp1, temp5);


                    Console.WriteLine(sequencesCount);

                    Console.WriteLine(syncLinks.Count());

                    sequences.Create(temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1,
                        temp1);

                    Console.WriteLine(syncLinks.Count());


                    Console.ReadKey();

                    //var ps = (from Pair score in links
                    //          where score.Target == temp2
                    //          select score).ToArray();

                    //var ls = (from Link score in links
                    //          where score.Target == temp2
                    //          select score).ToArray();

                    //links.Execute(db => from User user in links
                    //                    select user);

                    //var firstLink = links.First();

                    //links.Delete(ref firstLink);

                    Console.WriteLine("---");

                    syncLinks.Each(syncLinks.Constants.Any, syncLinks.Constants.Any, x =>
                    {
                        memoryManager.PrintLink(x);
                        return true;
                    });

                    Console.WriteLine("---");

                    //links.ExitTransaction();

                    //links.EnterTransaction();

                    //links.ExitTransaction();
                }
                ;
            }
            //catch (Exception ex)
            {
                //    ex.WriteToConsole();
            }

            Console.ReadKey();
        }
 public Compressor(SynchronizedLinks<ulong> links, Sequences sequences)
 {
     _links = links;
     _sequences = sequences;
     _maxPair = UInt64Link.Null;
     _maxFrequency = 1;
     _maxPair2 = UInt64Link.Null;
     _maxFrequency2 = 1;
     _pairsFrequencies = new UnsafeDictionary<UInt64Link, ulong>();
 }
        public static void Test()
        {
            File.Delete("web.links");

            using (var memoryManager = new UInt64LinksMemoryManager("web.links", 8 * 1024 * 1024))
            using (var links = new UInt64Links(memoryManager))
            {
                var syncLinks = new SynchronizedLinks<ulong>(links);
                UnicodeMap.InitNew(syncLinks);

                var sequences = new Sequences(syncLinks);

                // Get content
                const string url = "https://en.wikipedia.org/wiki/Main_Page";
                var pageContents = GetPageContents(url);

                var totalChars = url.Length + pageContents.Length;

                Global.Trash = totalChars;

                var urlLink = sequences.CreateBalancedVariant(UnicodeMap.FromStringToLinkArray(url));

                var responseSourceArray = UnicodeMap.FromStringToLinkArray(pageContents);

                //for (var i = 0; i < 1; i++)
                //{
                //    var sw01 = Stopwatch.StartNew();
                //    var responseLink = sequences.CreateBalancedVariant(responseSourceArray);
                //    sw01.Stop();
                //    Console.WriteLine(sw01.Elapsed);
                //}

                //var sw0 = Stopwatch.StartNew();
                //var groups = UnicodeMap.FromStringToLinkArrayGroups(response);
                //var responseLink = sequences.CreateBalancedVariant(groups); sw0.Stop();

                //var sw1 = Stopwatch.StartNew();
                //var responseCompressedArray1 = links.PrecompressSequence1(responseSourceArray); sw1.Stop();

                //var sw2 = Stopwatch.StartNew();
                //var responseCompressedArray2 = links.PrecompressSequence2(responseSourceArray); sw2.Stop();

                // [+] Можно попробовать искать не максимальный, а первый, который встречается как минимум дважды - медленно, высокое качество, не наивысшее
                // [+] Или использовать не локальный словарь, а глобальный (т.е. считать один раз, потом только делать замены) - быстро, но качество низкое
                // Precompress0 - лучшее соотношение скорость / качество. (тоже что и Data.Core.Sequences.Compressor.Precompress)

                ulong[] responseCompressedArray3 = null;

                for (var i = 0; i < 1; i++)
                {
                    var sw3 = Stopwatch.StartNew();
                    var compressor = new Data.Core.Sequences.Compressor(syncLinks, sequences, 1);
                    responseCompressedArray3 = compressor.Precompress(responseSourceArray); sw3.Stop();
                    Console.WriteLine(sw3.Elapsed);
                }

                // Combine Groups and Compression (first Compression, then Groups) (DONE)
                // Как после сжатия не группируй, больше сжатия не получить (странно, но это факт)
                //var groups = UnicodeMap.FromLinkArrayToLinkArrayGroups(responseCompressedArray3);
                //var responseLink2 = sequences.CreateBalancedVariant(groups);
                // Equal to `var responseLink2 = sequences.CreateBalancedVariant(responseCompressedArray3);`


                //for (int i = 0; i < responseCompressedArray1.Length; i++)
                //{
                //    if (responseCompressedArray1[i] != responseCompressedArray2[i])
                //    {

                //    }
                //}

                //var responseLink1 = sequences.CreateBalancedVariant(responseCompressedArray1);
                var responseLink2 = sequences.CreateBalancedVariant(responseCompressedArray3);

                //var decompress1 = sequences.FormatSequence(responseLink1);
                var decompress2 = sequences.FormatSequence(responseLink2);

                Global.Trash = decompress2;

                //for (int i = 0; i < decompress1.Length; i++)
                //{
                //    if (decompress1[i] != decompress2[i])
                //    {

                //    }
                //}

                var unpack = UnicodeMap.FromSequenceLinkToString(responseLink2, syncLinks);

                Global.Trash = (unpack == pageContents);

                var totalLinks = syncLinks.Count() - UnicodeMap.MapSize;

                Console.WriteLine(totalLinks);

                Global.Trash = totalLinks;

                syncLinks.CreateAndUpdate(urlLink, responseLink2);

                var divLinksArray = UnicodeMap.FromStringToLinkArray("div");

                var fullyMatched = sequences.GetAllMatchingSequences1(divLinksArray);
                var partiallyMatched = sequences.GetAllPartiallyMatchingSequences1(divLinksArray);

                var intersection = fullyMatched.Intersect(partiallyMatched);

            }

            Console.ReadKey();
        }
        public static void Stats()
        {
            // Get content
            const string url = "https://en.wikipedia.org/wiki/Main_Page";
            var pageContents = GetPageContents(url);

            var responseSourceArray = UnicodeMap.FromStringToLinkArray(pageContents);

            for (var i = 0; i < 3; i++)
            {
                File.Delete("stats.links");

                using (var memoryManager = new UInt64LinksMemoryManager("stats.links", 8 * 1024 * 1024))
                using (var links = new UInt64Links(memoryManager))
                {
                    var syncLinks = new SynchronizedLinks<ulong>(links);
                    UnicodeMap.InitNew(syncLinks);

                    var sequences = new Sequences(syncLinks);

                    var sw3 = Stopwatch.StartNew(); sequences.CreateBalancedVariant(responseSourceArray); sw3.Stop();

                    var totalLinks = syncLinks.Count() - UnicodeMap.MapSize;

                    Console.WriteLine($"Balanced Variant: {sw3.Elapsed}, {responseSourceArray.Length}, {totalLinks}");
                }
            }

            var minFrequency = 0UL;

            for (var i = 1; i < 200; i++)
            {
                minFrequency += (ulong)(1 + Math.Log(i));

                File.Delete("stats.links");

                using (var memoryManager = new UInt64LinksMemoryManager("stats.links", 8 * 1024 * 1024))
                using (var links = new UInt64Links(memoryManager))
                {
                    var syncLinks = new SynchronizedLinks<ulong>(links);
                    UnicodeMap.InitNew(syncLinks);

                    var sequences = new Sequences(syncLinks);

                    var sw3 = Stopwatch.StartNew();
                    var compressor = new Data.Core.Sequences.Compressor(syncLinks, sequences, minFrequency);
                    var responseCompressedArray3 = compressor.Precompress(responseSourceArray);
                    sequences.CreateBalancedVariant(responseCompressedArray3); sw3.Stop();

                    var totalLinks = syncLinks.Count() - UnicodeMap.MapSize;

                    Console.WriteLine($"{sw3.Elapsed}, {minFrequency}, {responseSourceArray.Length}, {totalLinks}");
                }
            }

            Console.ReadKey();
        }