Ejemplo n.º 1
0
        public static Dawg <FormInterpretations> CreateDAWG(MRDFileReader mrdFile)
        {
            DateTime start = DateTime.Now;

            Console.WriteLine("Inserting forms in DAWG... Please wait...");
            DawgSharp.DawgBuilder <FormInterpretations> dawgBuilder = new DawgBuilder <FormInterpretations>();
            UInt64 cntForms = 0;

            foreach (WordForm f in mrdFile.AllForms)
            {
                string word = f.Prefix + f.Flexia.Prefix + f.Lemma.Base + f.Flexia.Flexion;
                FormInterpretations payload = null;
                dawgBuilder.TryGetValue(word, out payload);
                if (payload == null)
                {
                    payload = new FormInterpretations();
                    dawgBuilder.Insert(word, payload);
                }
                payload.Add(f);
                cntForms++;
            }
            Console.WriteLine("All forms count: " + cntForms);
            Console.WriteLine("Building... please wait...");
            Dawg <FormInterpretations> dawg = dawgBuilder.BuildDawg();

            Console.WriteLine("DAWG create time: {0}", DateTime.Now - start);
            return(dawg);
        }
Ejemplo n.º 2
0
        private static void DAWGTest()
        {
            DawgSharp.DawgBuilder <string> builder = new DawgBuilder <string>();
            builder.Insert("МЕГА", "1");
            builder.Insert("ГИГА", "2");
            builder.Insert("СУПЕР", "3");
            builder.Insert("ПРЕ", "4");
            builder.Insert("ПРЕД", "5");
            builder.Insert("СУПЕРГЕТЕРО", "6");
            DawgSharp.Dawg <string> d = builder.BuildDawg();

            string r1 = d["СУПЕР"];             // Есть, r1 = "3"
            string r2 = d["НАНОФУСЬКА"];        // Нету, r2 = null
            string r3 = d["СУПЕРШПИОН"];        // Есть начало, но слово не совпадает r3 = null

            int commonPrefixLength = d.GetLongestCommonPrefixLength("СУПЕРШПИОН");

            Console.WriteLine(commonPrefixLength);             // 5 супер
            string prefix = "СУПЕРШПИОН".Substring(0, commonPrefixLength);

            string r4 = d[prefix];             // r4 = 3 есть

            // поиск СУПЕР*
            foreach (KeyValuePair <string, string> kvp in d.MatchPrefix(prefix))
            {
                Console.WriteLine("{0} {1}", kvp.Key, kvp.Value);
            }


            // поиск ПР*
            foreach (KeyValuePair <string, string> kvp in d.MatchPrefix("ПР"))
            {
                Console.WriteLine("{0} {1}", kvp.Key, kvp.Value);
            }
        }
Ejemplo n.º 3
0
        public void PersistenceTest()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("cone", 10);
            dawgBuilder.Insert ("bone", 10);
            dawgBuilder.Insert ("gone", 9);
            dawgBuilder.Insert ("go", 5);

            var dawg = dawgBuilder.BuildDawg ();

            var memoryStream = new MemoryStream ();

            dawg.SaveTo (memoryStream, (w, p) => w.Write (p));

            var buffer = memoryStream.GetBuffer ();

            var rehydrated = Dawg<int>.Load (new MemoryStream (buffer), r => r.ReadInt32 ());

            Assert.AreEqual (10, rehydrated ["cone"]);
            Assert.AreEqual (10, rehydrated ["bone"]);
            Assert.AreEqual (0, rehydrated ["cones"]);
            Assert.AreEqual (9, rehydrated ["gone"]);
            Assert.AreEqual (5, rehydrated ["go"]);
            Assert.AreEqual (0, rehydrated ["god"]);
        }
Ejemplo n.º 4
0
        public void PersistenceTest()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("cone", 10);
            dawgBuilder.Insert("bone", 10);
            dawgBuilder.Insert("gone", 9);
            dawgBuilder.Insert("go", 5);

            var dawg = dawgBuilder.BuildDawg();

            var memoryStream = new MemoryStream();

            dawg.SaveTo(memoryStream, (w, p) => w.Write(p));

            var buffer = memoryStream.GetBuffer();

            var rehydrated = Dawg <int> .Load(new MemoryStream (buffer), r => r.ReadInt32());

            Assert.AreEqual(10, rehydrated ["cone"]);
            Assert.AreEqual(10, rehydrated ["bone"]);
            Assert.AreEqual(0, rehydrated ["cones"]);
            Assert.AreEqual(9, rehydrated ["gone"]);
            Assert.AreEqual(5, rehydrated ["go"]);
            Assert.AreEqual(0, rehydrated ["god"]);
        }
Ejemplo n.º 5
0
        public void EmptyDictioinaryTest()
        {
            var dawgBuilder = new DawgBuilder<bool> ();

            var dawg = dawgBuilder.BuildDawg ();

            Assert.IsTrue (dawg.MatchPrefix ("boot").Count () == 0);
            Assert.IsTrue (dawg.MatchPrefix ("").Count () == 0);
        }
Ejemplo n.º 6
0
        public void EmptyDictioinaryTest()
        {
            var dawgBuilder = new DawgBuilder <bool> ();

            var dawg = dawgBuilder.BuildDawg();

            Assert.IsTrue(dawg.MatchPrefix("boot").Count() == 0);
            Assert.IsTrue(dawg.MatchPrefix("").Count() == 0);
        }
Ejemplo n.º 7
0
        public void EmptyKey()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("", 5);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(5, dawg [""]);
        }
Ejemplo n.º 8
0
        public void EmptyKey()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("", 5);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (5, dawg [""]);
        }
Ejemplo n.º 9
0
        public void AssertNodeCount2()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("probability", 10);
            dawgBuilder.Insert (  "stability", 10);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (14, dawg.GetNodeCount ());
        }
Ejemplo n.º 10
0
        public void AssertNodeCount()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("taps", 10);
            dawgBuilder.Insert("tops", 10);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(6, dawg.GetNodeCount());
        }
Ejemplo n.º 11
0
        public void AssertNodeCount()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("taps", 10);
            dawgBuilder.Insert ("tops", 10);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (6, dawg.GetNodeCount ());
        }
Ejemplo n.º 12
0
        public void AssertNodeCount2()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("probability", 10);
            dawgBuilder.Insert("stability", 10);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(14, dawg.GetNodeCount());
        }
Ejemplo n.º 13
0
        public void SuffixMatchTest()
        {
            var dawgBuilder = new DawgBuilder<bool> ();

            dawgBuilder.Insert ( "visibility".Reverse (), true);
            dawgBuilder.Insert ("possibility".Reverse (), true);
            dawgBuilder.Insert ("dexterity".Reverse (), true);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.IsTrue (dawg.MatchPrefix ("ility".Reverse ()).Count () == 2);
        }
Ejemplo n.º 14
0
        public void SuffixMatchTest()
        {
            var dawgBuilder = new DawgBuilder <bool> ();

            dawgBuilder.Insert("visibility".Reverse(), true);
            dawgBuilder.Insert("possibility".Reverse(), true);
            dawgBuilder.Insert("dexterity".Reverse(), true);

            var dawg = dawgBuilder.BuildDawg();

            Assert.IsTrue(dawg.MatchPrefix("ility".Reverse()).Count() == 2);
        }
Ejemplo n.º 15
0
        public void TestMethod1()
        {
            string longString = new string ('a', 200 * 1000);

            var builder = new DawgBuilder <bool> ();

            builder.Insert(longString, true);

            var dawg = builder.BuildDawg();

            Assert.IsTrue(dawg [longString]);
        }
Ejemplo n.º 16
0
        public void TestMethod1()
        {
            string longString = new string ('a', 200 * 1000);

            var builder = new DawgBuilder<bool> ();

            builder.Insert (longString, true);

            var dawg = builder.BuildDawg ();

            Assert.IsTrue (dawg [longString]);
        }
Ejemplo n.º 17
0
        public void TestMethod1()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("cone", 10);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(10, dawg ["cone"]);
            Assert.AreEqual(0, dawg ["con"]);
            Assert.AreEqual(0, dawg ["cones"]);
            Assert.AreEqual(0, dawg ["pit"]);
        }
Ejemplo n.º 18
0
        public void TestMethod1()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("cone", 10);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (10, dawg ["cone"]);
            Assert.AreEqual (0, dawg ["con"]);
            Assert.AreEqual (0, dawg ["cones"]);
            Assert.AreEqual (0, dawg ["pit"]);
        }
Ejemplo n.º 19
0
        public void TestMethod1()
        {
            var words = File.ReadAllLines(@"..\..\..\..\..\eneko-words.txt");

            var builder = new DawgBuilder <bool> ();

            foreach (var word in words)
            {
                builder.Insert(word, true);
            }

            builder.BuildDawg();
        }
Ejemplo n.º 20
0
        protected override IPrefixMatcher Build(string dictionaryFile)
        {
            using (var dictionarySource = new WordDictionary(dictionaryFile))
            {
                var dawgBuilder = new DawgBuilder <bool>();
                foreach (var word in dictionarySource)
                {
                    dawgBuilder.Insert(word, true);
                }

                return(new PrefixMatcher(dawgBuilder.BuildDawg()));
            }
        }
Ejemplo n.º 21
0
        public void TestMethod1()
        {
            var words = File.ReadAllLines (@"..\..\..\..\..\eneko-words.txt");

            var builder = new DawgBuilder<bool> ();

            foreach (var word in words)
            {
                builder.Insert (word, true);
            }

            builder.BuildDawg ();
        }
Ejemplo n.º 22
0
        public void AgoEgo()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("ago", 9);
            dawgBuilder.Insert("ego", 10);

            var dawg = dawgBuilder.BuildDawg();

            Assert.AreEqual(9, dawg ["ago"]);
            Assert.AreEqual(10, dawg ["ego"]);
            Assert.AreEqual(0, dawg ["ag"]);
            Assert.AreEqual(0, dawg ["eg"]);
        }
Ejemplo n.º 23
0
        public void AgoEgo()
        {
            var dawgBuilder = new DawgBuilder<int> ();

            dawgBuilder.Insert ("ago", 9);
            dawgBuilder.Insert ("ego", 10);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.AreEqual (9,  dawg ["ago"]);
            Assert.AreEqual (10, dawg ["ego"]);
            Assert.AreEqual (0,  dawg ["ag"]);
            Assert.AreEqual (0,  dawg ["eg"]);
        }
Ejemplo n.º 24
0
        public void TestMethod1()
        {
            var dawgBuilder = new DawgBuilder<bool> ();

            dawgBuilder.Insert ("cat", true);
            dawgBuilder.Insert ("caterpillar", true);
            dawgBuilder.Insert ("dog", true);

            var dawg = dawgBuilder.BuildDawg ();

            Assert.IsTrue (dawg.MatchPrefix ("cat").Select (kvp => kvp.Key).SequenceEqual (new [] {"cat", "caterpillar"}));
            Assert.IsTrue (dawg.MatchPrefix ("ca").Select (kvp => kvp.Key).SequenceEqual (new [] {"cat", "caterpillar"}));
            Assert.IsTrue (dawg.MatchPrefix ("").Select (kvp => kvp.Key).SequenceEqual (new [] {"cat", "caterpillar", "dog"}));
            Assert.IsTrue (dawg.MatchPrefix ("boot").Count () == 0);
            Assert.IsTrue (dawg.MatchPrefix ("cats").Count () == 0);
        }
Ejemplo n.º 25
0
        public void TestMethod1()
        {
            var dawgBuilder = new DawgBuilder <bool> ();

            dawgBuilder.Insert("cat", true);
            dawgBuilder.Insert("caterpillar", true);
            dawgBuilder.Insert("dog", true);

            var dawg = dawgBuilder.BuildDawg();

            Assert.IsTrue(dawg.MatchPrefix("cat").Select(kvp => kvp.Key).SequenceEqual(new [] { "cat", "caterpillar" }));
            Assert.IsTrue(dawg.MatchPrefix("ca").Select(kvp => kvp.Key).SequenceEqual(new [] { "cat", "caterpillar" }));
            Assert.IsTrue(dawg.MatchPrefix("").Select(kvp => kvp.Key).SequenceEqual(new [] { "cat", "caterpillar", "dog" }));
            Assert.IsTrue(dawg.MatchPrefix("boot").Count() == 0);
            Assert.IsTrue(dawg.MatchPrefix("cats").Count() == 0);
        }
Ejemplo n.º 26
0
        protected override Dawg <TPayload> GetDawg <TPayload> (DawgBuilder <TPayload> dawgBuilder)
        {
            var dawg = dawgBuilder.BuildDawg();

            var memoryStream = new MemoryStream();

#pragma warning disable 612,618
            dawg.SaveAsMatrixDawg(memoryStream);
#pragma warning restore 612,618

            var buffer = memoryStream.GetBuffer();

            var rehydrated = Dawg <TPayload> .Load(new MemoryStream (buffer));

            return(rehydrated);
        }
Ejemplo n.º 27
0
        /// <summary>
        /// Inserts a word into the dictionary
        /// </summary>
        /// <param name="word"></param>
        /// <returns>TRUE if the words is saved</returns>
        public static bool InsertWord(String word)
        {
            var dawgBuilder = new DawgBuilder<bool>();

            dawgBuilder.Insert(word, true);

            var dawg = dawgBuilder.BuildDawg();

            using (Stream stream = new MemoryStream(100))
            {
                Action<BinaryWriter, bool> writePayload = null;
                dawg.SaveTo(stream, writePayload);

                //bool saved = FileHelper.Save(_BASEPATH + _FILENAME, stream);

                return WordExists(word, stream);
            }
        }
Ejemplo n.º 28
0
        /// <summary>
        /// Inserts a word into the dictionary
        /// </summary>
        /// <param name="word"></param>
        /// <returns>TRUE if the words is saved</returns>
        public static bool InsertWord(String word)
        {
            var dawgBuilder = new DawgBuilder <bool>();

            dawgBuilder.Insert(word, true);

            var dawg = dawgBuilder.BuildDawg();

            using (Stream stream = new MemoryStream(100))
            {
                Action <BinaryWriter, bool> writePayload = null;
                dawg.SaveTo(stream, writePayload);

                //bool saved = FileHelper.Save(_BASEPATH + _FILENAME, stream);

                return(WordExists(word, stream));
            }
        }
Ejemplo n.º 29
0
        public void EnekoWordListSaveToFileTest()
        {
            var words = File.ReadAllLines(Path.Combine(TestContext.CurrentContext.TestDirectory, "eneko-words.txt"));

            var dawgBuilder = new DawgBuilder <bool> ();

            foreach (string word in words)
            {
                dawgBuilder.Insert(word, true);
            }

            var dawg = dawgBuilder.BuildDawg();

            var rehydrated = SaveToFileAndLoadBack(dawg);

            foreach (string word in words)
            {
                Assert.IsTrue(rehydrated [word], word);
            }
        }
Ejemplo n.º 30
0
        public void GetRandomItemTest()
        {
            var dawgBuilder = new DawgBuilder <bool> ();

            // Let's see how word length will affect the uniformity of the distribution.
            dawgBuilder.Insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true);
            dawgBuilder.Insert("aa", true);
            dawgBuilder.Insert("b", true);

            var dawg = SaveToFileAndLoadBack(dawgBuilder.BuildDawg());

            int n = 100;

            var random = new Random(1);

            var counters = Enumerable.Range(0, n)
                           .Select(i => dawg.GetRandomItem(random))
                           .GroupBy(item => item.Key)
                           .ToDictionary(g => g.Key, g => g.Count());

            Assert.AreEqual(3, counters.Count);
        }
Ejemplo n.º 31
0
 public DawgSearchableIndex Build()
 {
     return(new DawgSearchableIndex(builder.BuildDawg(), allDocuments));
 }
Ejemplo n.º 32
0
 /// <summary>
 /// Сохраняет модель в заданный поток.
 /// </summary>
 /// <param name="fs">Поток.</param>
 public void SaveTo(System.IO.Stream fs)
 {
     dawg = builder.BuildDawg();
     dawg.SaveTo(fs);
 }
Ejemplo n.º 33
0
 protected override Dawg <TPayload> GetDawg <TPayload> (DawgBuilder <TPayload> dawgBuilder)
 {
     return(dawgBuilder.BuildDawg());
 }
Ejemplo n.º 34
0
        protected virtual Dawg <TPayload> GetDawg <TPayload> (DawgBuilder <TPayload> dawgBuilder)
        {
            var dawg = dawgBuilder.BuildDawg();

            return(dawg);
        }