예제 #1
0
        public void PersistenceTest()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("cone", 10);
            dawgBuilder.Insert ("bone", 10);
            dawgBuilder.Insert ("gone", 9);
            dawgBuilder.Insert ("go", 5);

            var dawg = dawgBuilder.BuildDawg ();

            var memoryStream = new MemoryStream ();

            dawg.SaveTo (memoryStream, (w, p) => w.Write (p));

            var buffer = memoryStream.GetBuffer ();

            var rehydrated = Dawg<int>.Load (new MemoryStream (buffer), r => r.ReadInt32 ());

            Assert.AreEqual (10, rehydrated ["cone"]);
            Assert.AreEqual (10, rehydrated ["bone"]);
            Assert.AreEqual (0, rehydrated ["cones"]);
            Assert.AreEqual (9, rehydrated ["gone"]);
            Assert.AreEqual (5, rehydrated ["go"]);
            Assert.AreEqual (0, rehydrated ["god"]);
        }
예제 #2
0
        public void PersistenceTest()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("cone", 10);
            dawgBuilder.Insert("bone", 10);
            dawgBuilder.Insert("gone", 9);
            dawgBuilder.Insert("go", 5);

            var dawg = dawgBuilder.BuildDawg();

            var memoryStream = new MemoryStream();

            dawg.SaveTo(memoryStream, (w, p) => w.Write(p));

            var buffer = memoryStream.GetBuffer();

            var rehydrated = Dawg <int> .Load(new MemoryStream (buffer), r => r.ReadInt32());

            Assert.AreEqual(10, rehydrated ["cone"]);
            Assert.AreEqual(10, rehydrated ["bone"]);
            Assert.AreEqual(0, rehydrated ["cones"]);
            Assert.AreEqual(9, rehydrated ["gone"]);
            Assert.AreEqual(5, rehydrated ["go"]);
            Assert.AreEqual(0, rehydrated ["god"]);
        }
예제 #3
0
        public void GetPrefixesTest()
        {
            var dawgBuilder = new DawgBuilder <bool>();

            dawgBuilder.Insert("read", true);
            dawgBuilder.Insert("reading", true);

            var rehydrated = GetDawg(dawgBuilder);

            Assert.AreEqual("read,reading", string.Join(",", rehydrated.GetPrefixes("readings").Select(kvp => kvp.Key)));
        }
예제 #4
0
        public void AssertNodeCount()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("taps", 10);
            dawgBuilder.Insert ("tops", 10);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (6, dawg.GetNodeCount ());
        }
예제 #5
0
        public void AssertNodeCount2()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("probability", 10);
            dawgBuilder.Insert("stability", 10);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(14, dawg.GetNodeCount());
        }
예제 #6
0
        public void AssertNodeCount()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("tip", 3);
            dawgBuilder.Insert("tap", 3);

            var rehydrated = GetDawg(dawgBuilder);

            Assert.AreEqual(4, rehydrated.GetNodeCount());
        }
예제 #7
0
        public void AssertNodeCount2()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("probability", 10);
            dawgBuilder.Insert (  "stability", 10);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (14, dawg.GetNodeCount ());
        }
예제 #8
0
        public void AssertNodeCount()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("taps", 10);
            dawgBuilder.Insert("tops", 10);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(6, dawg.GetNodeCount());
        }
예제 #9
0
        public void TipTapTest()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("tip", 3);
            dawgBuilder.Insert("tap", 3);

            var rehydrated = GetDawg(dawgBuilder);

            Assert.AreEqual(3, rehydrated ["tap"]);
            Assert.AreEqual(3, rehydrated ["tip"]);
        }
예제 #10
0
        public void SuffixMatchTest()
        {
            var dawgBuilder = new DawgBuilder <bool> ();

            dawgBuilder.Insert("visibility".Reverse(), true);
            dawgBuilder.Insert("possibility".Reverse(), true);
            dawgBuilder.Insert("dexterity".Reverse(), true);

            var dawg = dawgBuilder.BuildDawg();

            Assert.IsTrue(dawg.MatchPrefix("ility".Reverse()).Count() == 2);
        }
예제 #11
0
        public void SuffixMatchTest()
        {
            var dawgBuilder = new DawgBuilder<bool> ();

            dawgBuilder.Insert ( "visibility".Reverse (), true);
            dawgBuilder.Insert ("possibility".Reverse (), true);
            dawgBuilder.Insert ("dexterity".Reverse (), true);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.IsTrue (dawg.MatchPrefix ("ility".Reverse ()).Count () == 2);
        }
예제 #12
0
        public void AgoEgo()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("ago", 9);
            dawgBuilder.Insert ("ego", 10);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (9,  dawg ["ago"]);
            Assert.AreEqual (10, dawg ["ego"]);
            Assert.AreEqual (0,  dawg ["ag"]);
            Assert.AreEqual (0,  dawg ["eg"]);
        }
예제 #13
0
        public void AgoEgo()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("ago", 9);
            dawgBuilder.Insert("ego", 10);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(9, dawg ["ago"]);
            Assert.AreEqual(10, dawg ["ego"]);
            Assert.AreEqual(0, dawg ["ag"]);
            Assert.AreEqual(0, dawg ["eg"]);
        }
예제 #14
0
        public void TestMethod1()
        {
            var dawgBuilder = new DawgBuilder <bool> ();

            dawgBuilder.Insert("cat", true);
            dawgBuilder.Insert("caterpillar", true);
            dawgBuilder.Insert("dog", true);

            var dawg = dawgBuilder.BuildDawg();

            Assert.IsTrue(dawg.MatchPrefix("cat").Select(kvp => kvp.Key).SequenceEqual(new [] { "cat", "caterpillar" }));
            Assert.IsTrue(dawg.MatchPrefix("ca").Select(kvp => kvp.Key).SequenceEqual(new [] { "cat", "caterpillar" }));
            Assert.IsTrue(dawg.MatchPrefix("").Select(kvp => kvp.Key).SequenceEqual(new [] { "cat", "caterpillar", "dog" }));
            Assert.IsTrue(dawg.MatchPrefix("boot").Count() == 0);
            Assert.IsTrue(dawg.MatchPrefix("cats").Count() == 0);
        }
예제 #15
0
        private static void Increment(DawgBuilder <int> db, string key)
        {
            int n;

            db.TryGetValue(key, out n);
            db.Insert(key, n + 1);
        }
예제 #16
0
        public void TestMethod1()
        {
            var dawgBuilder = new DawgBuilder<bool> ();

            dawgBuilder.Insert ("cat", true);
            dawgBuilder.Insert ("caterpillar", true);
            dawgBuilder.Insert ("dog", true);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.IsTrue (dawg.MatchPrefix ("cat").Select (kvp => kvp.Key).SequenceEqual (new [] {"cat", "caterpillar"}));
            Assert.IsTrue (dawg.MatchPrefix ("ca").Select (kvp => kvp.Key).SequenceEqual (new [] {"cat", "caterpillar"}));
            Assert.IsTrue (dawg.MatchPrefix ("").Select (kvp => kvp.Key).SequenceEqual (new [] {"cat", "caterpillar", "dog"}));
            Assert.IsTrue (dawg.MatchPrefix ("boot").Count () == 0);
            Assert.IsTrue (dawg.MatchPrefix ("cats").Count () == 0);
        }
예제 #17
0
        public void MatchPrefixTest()
        {
            var dawgBuilder = new DawgBuilder <bool> ();

            dawgBuilder.Insert("cat", true);
            dawgBuilder.Insert("caterpillar", true);
            dawgBuilder.Insert("dog", true);

            var dawg = GetDawg(dawgBuilder);

            Assert.AreEqual("cat,caterpillar", MatchJoin(dawg, "cat"));
            Assert.AreEqual("cat,caterpillar", MatchJoin(dawg, "ca"));
            Assert.AreEqual("cat,caterpillar,dog", MatchJoin(dawg, ""));
            Assert.AreEqual("", MatchJoin(dawg, "boot"));
            Assert.AreEqual("", MatchJoin(dawg, "cats"));
        }
예제 #18
0
        public static Dawg <FormInterpretations> CreateDAWG(MRDFileReader mrdFile)
        {
            DateTime start = DateTime.Now;

            Console.WriteLine("Inserting forms in DAWG... Please wait...");
            DawgSharp.DawgBuilder <FormInterpretations> dawgBuilder = new DawgBuilder <FormInterpretations>();
            UInt64 cntForms = 0;

            foreach (WordForm f in mrdFile.AllForms)
            {
                string word = f.Prefix + f.Flexia.Prefix + f.Lemma.Base + f.Flexia.Flexion;
                FormInterpretations payload = null;
                dawgBuilder.TryGetValue(word, out payload);
                if (payload == null)
                {
                    payload = new FormInterpretations();
                    dawgBuilder.Insert(word, payload);
                }
                payload.Add(f);
                cntForms++;
            }
            Console.WriteLine("All forms count: " + cntForms);
            Console.WriteLine("Building... please wait...");
            Dawg <FormInterpretations> dawg = dawgBuilder.BuildDawg();

            Console.WriteLine("DAWG create time: {0}", DateTime.Now - start);
            return(dawg);
        }
예제 #19
0
        public void EmptyNodeTest()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("tip", 0);

            var rehydrated = GetDawg(dawgBuilder);

            Assert.AreEqual(0, rehydrated ["tip"]);
        }
예제 #20
0
        public void TryGetValueOnPartialKey()
        {
            var builder = new DawgBuilder <bool>();

            builder.Insert("dates", true);
            bool b = builder.TryGetValue("date", out var v);

            Assert.False(v);
            Assert.False(b);
        }
예제 #21
0
        public void EmptyKey()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("", 5);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(5, dawg [""]);
        }
예제 #22
0
        public void EmptyKey()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("", 5);

            var rehydrated = GetDawg(dawgBuilder);

            Assert.AreEqual(5, rehydrated [""]);
        }
예제 #23
0
        public void GetPrefixesOnEmptyString()
        {
            var dawgBuilder = new DawgBuilder <bool>();

            dawgBuilder.Insert("", true);

            var rehydrated = GetDawg(dawgBuilder);

            Assert.AreEqual("", rehydrated.GetPrefixes("readings").Single().Key);
        }
예제 #24
0
        public void GetPrefixesWithKeySameLengthAsItem()
        {
            var dawgBuilder = new DawgBuilder <bool>();

            dawgBuilder.Insert("ab", true);

            var rehydrated = GetDawg(dawgBuilder);

            Assert.AreEqual("ab", string.Join(",", rehydrated.GetPrefixes("ab").Select(kvp => kvp.Key)));
        }
예제 #25
0
        public void EmptyKey()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("", 5);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (5, dawg [""]);
        }
예제 #26
0
        /// <summary>
        /// Добавляет новую лексему к модели.
        /// </summary>
        /// <param name="lexem">Лексема.</param>
        public void AddLexem(WordForm lexem)
        {
            ulong tagOut = (ulong)Tag.NoWord;
            ulong tag    = (ulong)lexem.Tag;

            if (builder.TryGetValue(lexem.Word, out tagOut))
            {
                tag |= tagOut;
            }
            builder.Insert(lexem.Word, tag);
        }
예제 #27
0
        public void TryGetValueUnexpectedBehaviour()
        {
            var builder = new DawgBuilder <string>();

            builder.Insert("dates", "dates");

            bool b = builder.TryGetValue("date", out var v);

            Assert.True(b);
            Assert.Null(v);
        }
예제 #28
0
        public void TestMethod1()
        {
            string longString = new string ('a', 200 * 1000);

            var builder = new DawgBuilder <bool> ();

            builder.Insert(longString, true);

            var dawg = builder.BuildDawg();

            Assert.IsTrue(dawg [longString]);
        }
예제 #29
0
        public void TestMethod1()
        {
            string longString = new string ('a', 200 * 1000);

            var builder = new DawgBuilder<bool> ();

            builder.Insert (longString, true);

            var dawg = builder.BuildDawg ();

            Assert.IsTrue (dawg [longString]);
        }
예제 #30
0
        public void LongStringTest()
        {
            var longString = Enumerable.Repeat('a', 200 * 1000);

            var dawgBuilder = new DawgBuilder <bool> ();

            dawgBuilder.Insert(longString, true);

            var rehydrated = GetDawg(dawgBuilder);

            Assert.IsTrue(rehydrated [longString]);
        }
예제 #31
0
        private static void DAWGTest()
        {
            DawgSharp.DawgBuilder <string> builder = new DawgBuilder <string>();
            builder.Insert("МЕГА", "1");
            builder.Insert("ГИГА", "2");
            builder.Insert("СУПЕР", "3");
            builder.Insert("ПРЕ", "4");
            builder.Insert("ПРЕД", "5");
            builder.Insert("СУПЕРГЕТЕРО", "6");
            DawgSharp.Dawg <string> d = builder.BuildDawg();

            string r1 = d["СУПЕР"];             // Есть, r1 = "3"
            string r2 = d["НАНОФУСЬКА"];        // Нету, r2 = null
            string r3 = d["СУПЕРШПИОН"];        // Есть начало, но слово не совпадает r3 = null

            int commonPrefixLength = d.GetLongestCommonPrefixLength("СУПЕРШПИОН");

            Console.WriteLine(commonPrefixLength);             // 5 супер
            string prefix = "СУПЕРШПИОН".Substring(0, commonPrefixLength);

            string r4 = d[prefix];             // r4 = 3 есть

            // поиск СУПЕР*
            foreach (KeyValuePair <string, string> kvp in d.MatchPrefix(prefix))
            {
                Console.WriteLine("{0} {1}", kvp.Key, kvp.Value);
            }


            // поиск ПР*
            foreach (KeyValuePair <string, string> kvp in d.MatchPrefix("ПР"))
            {
                Console.WriteLine("{0} {1}", kvp.Key, kvp.Value);
            }
        }
예제 #32
0
        protected override IPrefixMatcher Build(string dictionaryFile)
        {
            using (var dictionarySource = new WordDictionary(dictionaryFile))
            {
                var dawgBuilder = new DawgBuilder <bool>();
                foreach (var word in dictionarySource)
                {
                    dawgBuilder.Insert(word, true);
                }

                return(new PrefixMatcher(dawgBuilder.BuildDawg()));
            }
        }
예제 #33
0
        public void IndexTerm(DocumentId id, string word, int position)
        {
            builder.TryGetValue(word, out var blockList);

            if (blockList == null)
            {
                blockList = new RangePostingsList();
                builder.Insert(word, blockList);
            }

            blockList.Add(id);
            allDocuments.Add(id);
        }
예제 #34
0
        public void GetRandomItemTest()
        {
            var dawgBuilder = new DawgBuilder <bool> ();

            // Let's see how word length will affect the uniformity of the distribution.
            dawgBuilder.Insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true);
            dawgBuilder.Insert("aa", true);
            dawgBuilder.Insert("b", true);

            var dawg = SaveToFileAndLoadBack(dawgBuilder.BuildDawg());

            int n = 100;

            var random = new Random(1);

            var counters = Enumerable.Range(0, n)
                           .Select(i => dawg.GetRandomItem(random))
                           .GroupBy(item => item.Key)
                           .ToDictionary(g => g.Key, g => g.Count());

            Assert.AreEqual(3, counters.Count);
        }
예제 #35
0
        public void TestMethod1()
        {
            var words = File.ReadAllLines(@"..\..\..\..\..\eneko-words.txt");

            var builder = new DawgBuilder <bool> ();

            foreach (var word in words)
            {
                builder.Insert(word, true);
            }

            builder.BuildDawg();
        }
예제 #36
0
        public void TestMethod1()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("cone", 10);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (10, dawg ["cone"]);
            Assert.AreEqual (0, dawg ["con"]);
            Assert.AreEqual (0, dawg ["cones"]);
            Assert.AreEqual (0, dawg ["pit"]);
        }
예제 #37
0
        protected override TestDawgNode CreateInstance(IEnumerable <string> words)
        {
            var builder = new DawgBuilder();

            foreach (var word in words)
            {
                builder.Insert(word);
            }

            var root = builder.Finish();

            return(new TestDawgNode(root));
        }
예제 #38
0
        public void TestMethod1()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("cone", 10);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(10, dawg ["cone"]);
            Assert.AreEqual(0, dawg ["con"]);
            Assert.AreEqual(0, dawg ["cones"]);
            Assert.AreEqual(0, dawg ["pit"]);
        }
예제 #39
0
        public void TestMethod1()
        {
            var words = File.ReadAllLines (@"..\..\..\..\..\eneko-words.txt");

            var builder = new DawgBuilder<bool> ();

            foreach (var word in words)
            {
                builder.Insert (word, true);
            }

            builder.BuildDawg ();
        }
예제 #40
0
        /// <summary>
        /// Inserts a word into the dictionary
        /// </summary>
        /// <param name="word"></param>
        /// <returns>TRUE if the words is saved</returns>
        public static bool InsertWord(String word)
        {
            var dawgBuilder = new DawgBuilder<bool>();

            dawgBuilder.Insert(word, true);

            var dawg = dawgBuilder.BuildDawg();

            using (Stream stream = new MemoryStream(100))
            {
                Action<BinaryWriter, bool> writePayload = null;
                dawg.SaveTo(stream, writePayload);

                //bool saved = FileHelper.Save(_BASEPATH + _FILENAME, stream);

                return WordExists(word, stream);
            }
        }
예제 #41
0
        public void EnekoWordListTest()
        {
            var words = File.ReadAllLines(Path.Combine(TestContext.CurrentContext.TestDirectory, "eneko-words.txt"));

            var dawgBuilder = new DawgBuilder <bool> ();

            foreach (var word in words)
            {
                dawgBuilder.Insert(word, true);
            }

            var rehydrated = GetDawg(dawgBuilder);

            foreach (string word in words)
            {
                Assert.IsTrue(rehydrated [word], word);
            }
        }
예제 #42
0
 private static void Increment(DawgBuilder<int> db, string key)
 {
     int n;
     db.TryGetValue(key, out n);
     db.Insert(key, n + 1);
 }