Пример #1
0
        public void PatternMatchTest()
        {
            string tempFilename = Path.GetTempFileName();

            const ulong itself = Links.Itself;
            const ulong one = Sequences.Any;
            const ulong zeroOrMany = Sequences.ZeroOrMany;

            using (var links = new Links(tempFilename, LinksSizeStep))
            {
                var e1 = links.Create(itself, itself);
                var e2 = links.Create(itself, itself);

                var sequence = new[]
                {
                    e1, e2, e1, e2 // mama / papa
                };

                var sequences = new Sequences(links);

                var balancedVariant = sequences.CreateBalancedVariant(sequence);

                // 1: [1]
                // 2: [2]
                // 3: [1,2]
                // 4: [1,2,1,2]

                var pair = links.GetSource(balancedVariant);

                var matchedSequences1 = sequences.MatchPattern(e2, e1, zeroOrMany);

                Assert.IsTrue(matchedSequences1.Count == 0);

                var matchedSequences2 = sequences.MatchPattern(zeroOrMany, e2, e1);

                Assert.IsTrue(matchedSequences2.Count == 0);

                var matchedSequences3 = sequences.MatchPattern(e1, zeroOrMany, e1);

                Assert.IsTrue(matchedSequences3.Count == 0);

                var matchedSequences4 = sequences.MatchPattern(e1, zeroOrMany, e2);

                Assert.IsTrue(matchedSequences4.Contains(pair));
                Assert.IsTrue(matchedSequences4.Contains(balancedVariant));

                for (int i = 0; i < sequence.Length; i++)
                    links.Delete(sequence[i]);
            }

            File.Delete(tempFilename);
        }
Пример #2
0
        public void BalancedVariantSearchTest()
        {
            string tempFilename = Path.GetTempFileName();

            const long sequenceLength = 200;

            const ulong itself = Links.Itself;

            using (var links = new Links(tempFilename, LinksSizeStep))
            {
                var sequence = new ulong[sequenceLength];
                for (int i = 0; i < sequenceLength; i++)
                    sequence[i] = links.Create(itself, itself);

                var sequences = new Sequences(links);

                var sw1 = Stopwatch.StartNew();
                var balancedVariant = sequences.CreateBalancedVariant(sequence); sw1.Stop();

                var sw2 = Stopwatch.StartNew();
                var searchResults2 = sequences.GetAllMatchingSequences0(sequence); sw2.Stop();

                var sw3 = Stopwatch.StartNew();
                var searchResults3 = sequences.GetAllMatchingSequences1(sequence); sw3.Stop();

                // На количестве в 200 элементов это будет занимать вечность
                //var sw4 = Stopwatch.StartNew();
                //var searchResults4 = sequences.Each(sequence); sw4.Stop();

                Assert.IsTrue(searchResults2.Count == 1 && balancedVariant == searchResults2[0]);

                Assert.IsTrue(searchResults3.Count == 1 && balancedVariant == searchResults3.First());

                //Assert.IsTrue(sw1.Elapsed < sw2.Elapsed);

                for (int i = 0; i < sequenceLength; i++)
                    links.Delete(sequence[i]);
            }

            File.Delete(tempFilename);
        }
        public static void Test()
        {
            File.Delete("web.links");

            using (var memoryManager = new UInt64LinksMemoryManager("web.links", 8 * 1024 * 1024))
            using (var links = new UInt64Links(memoryManager))
            {
                var syncLinks = new SynchronizedLinks<ulong>(links);
                UnicodeMap.InitNew(syncLinks);

                var sequences = new Sequences(syncLinks);

                // Get content
                const string url = "https://en.wikipedia.org/wiki/Main_Page";
                var pageContents = GetPageContents(url);

                var totalChars = url.Length + pageContents.Length;

                Global.Trash = totalChars;

                var urlLink = sequences.CreateBalancedVariant(UnicodeMap.FromStringToLinkArray(url));

                var responseSourceArray = UnicodeMap.FromStringToLinkArray(pageContents);

                //for (var i = 0; i < 1; i++)
                //{
                //    var sw01 = Stopwatch.StartNew();
                //    var responseLink = sequences.CreateBalancedVariant(responseSourceArray);
                //    sw01.Stop();
                //    Console.WriteLine(sw01.Elapsed);
                //}

                //var sw0 = Stopwatch.StartNew();
                //var groups = UnicodeMap.FromStringToLinkArrayGroups(response);
                //var responseLink = sequences.CreateBalancedVariant(groups); sw0.Stop();

                //var sw1 = Stopwatch.StartNew();
                //var responseCompressedArray1 = links.PrecompressSequence1(responseSourceArray); sw1.Stop();

                //var sw2 = Stopwatch.StartNew();
                //var responseCompressedArray2 = links.PrecompressSequence2(responseSourceArray); sw2.Stop();

                // [+] Можно попробовать искать не максимальный, а первый, который встречается как минимум дважды - медленно, высокое качество, не наивысшее
                // [+] Или использовать не локальный словарь, а глобальный (т.е. считать один раз, потом только делать замены) - быстро, но качество низкое
                // Precompress0 - лучшее соотношение скорость / качество. (тоже что и Data.Core.Sequences.Compressor.Precompress)

                ulong[] responseCompressedArray3 = null;

                for (var i = 0; i < 1; i++)
                {
                    var sw3 = Stopwatch.StartNew();
                    var compressor = new Data.Core.Sequences.Compressor(syncLinks, sequences, 1);
                    responseCompressedArray3 = compressor.Precompress(responseSourceArray); sw3.Stop();
                    Console.WriteLine(sw3.Elapsed);
                }

                // Combine Groups and Compression (first Compression, then Groups) (DONE)
                // Как после сжатия не группируй, больше сжатия не получить (странно, но это факт)
                //var groups = UnicodeMap.FromLinkArrayToLinkArrayGroups(responseCompressedArray3);
                //var responseLink2 = sequences.CreateBalancedVariant(groups);
                // Equal to `var responseLink2 = sequences.CreateBalancedVariant(responseCompressedArray3);`


                //for (int i = 0; i < responseCompressedArray1.Length; i++)
                //{
                //    if (responseCompressedArray1[i] != responseCompressedArray2[i])
                //    {

                //    }
                //}

                //var responseLink1 = sequences.CreateBalancedVariant(responseCompressedArray1);
                var responseLink2 = sequences.CreateBalancedVariant(responseCompressedArray3);

                //var decompress1 = sequences.FormatSequence(responseLink1);
                var decompress2 = sequences.FormatSequence(responseLink2);

                Global.Trash = decompress2;

                //for (int i = 0; i < decompress1.Length; i++)
                //{
                //    if (decompress1[i] != decompress2[i])
                //    {

                //    }
                //}

                var unpack = UnicodeMap.FromSequenceLinkToString(responseLink2, syncLinks);

                Global.Trash = (unpack == pageContents);

                var totalLinks = syncLinks.Count() - UnicodeMap.MapSize;

                Console.WriteLine(totalLinks);

                Global.Trash = totalLinks;

                syncLinks.CreateAndUpdate(urlLink, responseLink2);

                var divLinksArray = UnicodeMap.FromStringToLinkArray("div");

                var fullyMatched = sequences.GetAllMatchingSequences1(divLinksArray);
                var partiallyMatched = sequences.GetAllPartiallyMatchingSequences1(divLinksArray);

                var intersection = fullyMatched.Intersect(partiallyMatched);

            }

            Console.ReadKey();
        }
Пример #4
0
        public void BalancedPartialVariantsSearchTest()
        {
            string tempFilename = Path.GetTempFileName();

            const long sequenceLength = 200;

            const ulong itself = Links.Itself;

            using (var links = new Links(tempFilename, LinksSizeStep))
            {
                var sequence = new ulong[sequenceLength];
                for (int i = 0; i < sequenceLength; i++)
                    sequence[i] = links.Create(itself, itself);

                var sequences = new Sequences(links);

                var balancedVariant = sequences.CreateBalancedVariant(sequence);

                var partialSequence = new ulong[sequenceLength - 2];

                Array.Copy(sequence, 1, partialSequence, 0, sequenceLength - 2);

                var sw1 = Stopwatch.StartNew();
                var searchResults1 = sequences.GetAllPartiallyMatchingSequences0(partialSequence); sw1.Stop();

                var sw2 = Stopwatch.StartNew();
                var searchResults2 = sequences.GetAllPartiallyMatchingSequences1(partialSequence); sw2.Stop();

                Assert.IsTrue(searchResults1.Count == 1 && balancedVariant == searchResults1[0]);

                Assert.IsTrue(searchResults2.Count == 1 && balancedVariant == searchResults2.First());

                for (int i = 0; i < sequenceLength; i++)
                    links.Delete(sequence[i]);
            }

            File.Delete(tempFilename);
        }
        public static void Stats()
        {
            // Get content
            const string url = "https://en.wikipedia.org/wiki/Main_Page";
            var pageContents = GetPageContents(url);

            var responseSourceArray = UnicodeMap.FromStringToLinkArray(pageContents);

            for (var i = 0; i < 3; i++)
            {
                File.Delete("stats.links");

                using (var memoryManager = new UInt64LinksMemoryManager("stats.links", 8 * 1024 * 1024))
                using (var links = new UInt64Links(memoryManager))
                {
                    var syncLinks = new SynchronizedLinks<ulong>(links);
                    UnicodeMap.InitNew(syncLinks);

                    var sequences = new Sequences(syncLinks);

                    var sw3 = Stopwatch.StartNew(); sequences.CreateBalancedVariant(responseSourceArray); sw3.Stop();

                    var totalLinks = syncLinks.Count() - UnicodeMap.MapSize;

                    Console.WriteLine($"Balanced Variant: {sw3.Elapsed}, {responseSourceArray.Length}, {totalLinks}");
                }
            }

            var minFrequency = 0UL;

            for (var i = 1; i < 200; i++)
            {
                minFrequency += (ulong)(1 + Math.Log(i));

                File.Delete("stats.links");

                using (var memoryManager = new UInt64LinksMemoryManager("stats.links", 8 * 1024 * 1024))
                using (var links = new UInt64Links(memoryManager))
                {
                    var syncLinks = new SynchronizedLinks<ulong>(links);
                    UnicodeMap.InitNew(syncLinks);

                    var sequences = new Sequences(syncLinks);

                    var sw3 = Stopwatch.StartNew();
                    var compressor = new Data.Core.Sequences.Compressor(syncLinks, sequences, minFrequency);
                    var responseCompressedArray3 = compressor.Precompress(responseSourceArray);
                    sequences.CreateBalancedVariant(responseCompressedArray3); sw3.Stop();

                    var totalLinks = syncLinks.Count() - UnicodeMap.MapSize;

                    Console.WriteLine($"{sw3.Elapsed}, {minFrequency}, {responseSourceArray.Length}, {totalLinks}");
                }
            }

            Console.ReadKey();
        }