示例#1
0
        //public static void MakeEnglishDictionary()
        //{
        //    var dawgBuilder = new DawgBuilder<bool>(); // <bool> is the value type.
        //                                               // Key type is always string.
        //    string[] lines = File.ReadLines(@"C:\Users\Simeon\Desktop\Scrabble\Scrabble\Helpers\englishWords.txt").ToArray();

        //    foreach (string key in new[] { "Aaron", "abacus", "abashed" })
        //    {
        //        dawgBuilder.Insert(key, true);
        //    }

        //    var dawg = dawgBuilder.BuildDawg(); // Computer is working.  Please wait ...

        //    dawg.SaveTo(File.Create(@"C:\Users\Simeon\Desktop\Scrabble\Scrabble\Helpers\englishDawg.bin"));

        //}

        /// <summary>
        /// Loads a DAWG (a dictionary of words as a compact trie).
        /// </summary>
        /// <param name="language">Language of dictionary to load</param>
        /// <returns></returns>
        public static Dawg <bool> LoadDawg(GameLanguage language)
        {
            Stream fs   = File.Open(language.Language + "Dawg.bin", FileMode.Open, FileAccess.Read);
            var    dawg = Dawg <bool> .Load(fs);

            return(dawg);
        }
示例#2
0
        protected void GetRules(string prefixText)
        {
            Dawg <string> Dawg;
            var           PrefixText = DictionaryHelper.RemoveStressMarks(prefixText).ToLowerInvariant().Reverse();

            var fileBasedDictionary = new FileBasedDictionary(Context.Server);

            try
            {
                using (Stream ReverseDict = fileBasedDictionary.OpenReverseIndex())
                {
                    Dawg = Dawg <string> .Load(ReverseDict,
                                               Func =>
                    {
                        string s = Func.ReadString();
                        return(s == String.Empty ? null : s);
                    });
                }

                int PrefixLen = Dawg.GetLongestCommonPrefixLength(PrefixText);

                WriteJSONToResponse(Dawg.MatchPrefix(PrefixText.Take(PrefixLen))
                                    .GroupBy(kvp => kvp.Value, kvp => kvp)
                                    .SelectMany(g => g.Take(1))
                                    .Select(kvp => kvp.Value + DictionaryHelper.RuleLineDelimiter + new string(kvp.Key.Reverse().ToArray()))
                                    .Take(10)
                                    .ToArray());
            }
            catch (Exception e)
            {
                WriteJSONToResponse(new [] { "Доступ к словарю в данный момент отсутствует. Возможно происходит построение индексов." });

                Email.SendAdminEmail("GetRules", e.ToString());
            }
        }
示例#3
0
        public void PersistenceTest()
        {
            var dawgBuilder = new DawgBuilder <int> ();

            dawgBuilder.Insert("cone", 10);
            dawgBuilder.Insert("bone", 10);
            dawgBuilder.Insert("gone", 9);
            dawgBuilder.Insert("go", 5);

            var dawg = dawgBuilder.BuildDawg();

            var memoryStream = new MemoryStream();

            dawg.SaveTo(memoryStream, (w, p) => w.Write(p));

            var buffer = memoryStream.GetBuffer();

            var rehydrated = Dawg <int> .Load(new MemoryStream (buffer), r => r.ReadInt32());

            Assert.AreEqual(10, rehydrated ["cone"]);
            Assert.AreEqual(10, rehydrated ["bone"]);
            Assert.AreEqual(0, rehydrated ["cones"]);
            Assert.AreEqual(9, rehydrated ["gone"]);
            Assert.AreEqual(5, rehydrated ["go"]);
            Assert.AreEqual(0, rehydrated ["god"]);
        }
示例#4
0
        public void BoingDawg_Generate()
        {
            const string textFile = "boing_crosschecks.txt";
            //const string textFile = "englishWords.txt";

            const string binFile = "boingDAWG.bin";
            //const string binFile = "englishDawg.bin";

            string        fileContents = File.ReadAllText(textFile);
            List <string> boingWords   = Regex.Matches(fileContents, "\\w+").Select(m => m.Value).ToList();

            DawgBuilder <bool> dawgBuilder = new();

            foreach (string word in boingWords)
            {
                dawgBuilder.Insert(word, true);
            }

            Dawg <bool> dawg = dawgBuilder.BuildDawg(); // Computer is working.  Please wait ...

            using (FileStream file = File.Create(binFile)) dawg.SaveTo(file);

            //Now read the file back in and check if a particular word is in the dictionary:
            Dawg <bool> dawg2 = Dawg <bool> .Load(File.Open(binFile, FileMode.Open));
        }
示例#5
0
        public void FindPrefixTapsTops()
        {
            var words = new[]
            {
                "tap",
                "taps",
                "top",
                "tops"
            };

            var dawg = Dawg.CreateBuilder(words).Build();

            var array = dawg.WithPrefix("to").ToArray();

            Assert.Contains("top", array);
            Assert.Contains("tops", array);

            array = dawg.WithPrefix("tap").ToArray();

            Assert.Contains("tap", array);
            Assert.Contains("taps", array);

            Assert.Empty(dawg.WithPrefix("tu"));

            array = dawg.WithPrefix("t").ToArray();

            Assert.Contains("tap", array);
            Assert.Contains("taps", array);
            Assert.Contains("top", array);
            Assert.Contains("tops", array);
        }
示例#6
0
        public static Dawg <FormInterpretations> CreateDAWG(MRDFileReader mrdFile)
        {
            DateTime start = DateTime.Now;

            Console.WriteLine("Inserting forms in DAWG... Please wait...");
            DawgSharp.DawgBuilder <FormInterpretations> dawgBuilder = new DawgBuilder <FormInterpretations>();
            UInt64 cntForms = 0;

            foreach (WordForm f in mrdFile.AllForms)
            {
                string word = f.Prefix + f.Flexia.Prefix + f.Lemma.Base + f.Flexia.Flexion;
                FormInterpretations payload = null;
                dawgBuilder.TryGetValue(word, out payload);
                if (payload == null)
                {
                    payload = new FormInterpretations();
                    dawgBuilder.Insert(word, payload);
                }
                payload.Add(f);
                cntForms++;
            }
            Console.WriteLine("All forms count: " + cntForms);
            Console.WriteLine("Building... please wait...");
            Dawg <FormInterpretations> dawg = dawgBuilder.BuildDawg();

            Console.WriteLine("DAWG create time: {0}", DateTime.Now - start);
            return(dawg);
        }
示例#7
0
 /// <summary>
 /// Using word, checks if word is valid in dictionary
 /// </summary>
 /// <param name="dawg">Dawg to use</param>
 /// <param name="word">Word to check</param>
 /// <param name="alwaysExists">Says if any word is considered as valid</param>
 /// <returns></returns>
 public static bool CheckWordValidity(Dawg <bool> dawg, string word, bool alwaysExists = false)
 {
     if (!alwaysExists && !dawg[word.ToUpper()])
     {
         return(false);
     }
     return(true);
 }
示例#8
0
        public static DawgSearchableIndex Deserialize(Stream stream)
        {
            using var gzipStream = new GZipStream(stream, CompressionMode.Decompress, leaveOpen: true);
            var allDocuments = Serializer.DeserializeWithLengthPrefix <RangePostingsList>(gzipStream, PrefixStyle.Base128);
            var dawg         = Dawg <RangePostingsList> .Load(gzipStream, readPayload : DeserializePayload);

            return(new DawgSearchableIndex(dawg, allDocuments));
        }
示例#9
0
 public MainWindow()
 {
     InitializeComponent();
     d      = new Dawg();
     vmDawg = new ViewModelDawg(d);
     //BindDawgToUI();
     //DataContext = vmDawg;
 }
示例#10
0
        private void _rebuildDAWG(string path)
        {
            MRDFileReader mrdFile = new MRDFileReader(_gramtab);

            mrdFile.LoadMrd(Path.Combine(_workDir, @"Dicts\morphs.mrd"));
            _dawg = CreateDAWG(mrdFile);
            _dawg.SaveTo(File.Create(path), WritePayload);
        }
示例#11
0
 public bool IsURLInList(string url)
 {
     if (dawg == null)
     {
         dawg = dawgBuilder.BuildDawg();
         dawgBuilder = null;
     }
     return dawg["url"];
 }
示例#12
0
        public DawgBenchmark()
        {
            var prefix = string.Empty;

            using var dawgStream = File.OpenRead(prefix + SaveLocation);
            _dawg = new Dawg(dawgStream);
            using var queryStream = File.OpenRead(prefix + Query1K);
            _words = BuildQuery1K(queryStream);
        }
示例#13
0
文件: Program.cs 项目: vtortola/dawg
 static void FindWords(HashSet <string> toFind, Dawg dawg, int times)
 {
     for (var i = 0; i < times; i++)
     {
         foreach (var word in toFind)
         {
             Assert(true, dawg.Contains(word));
         }
     }
 }
示例#14
0
 public virtual Dawg <TKey, TValue> Build(IEnumerable <KeyValuePair <IEnumerable <TKey>, TValue> > data)
 {
     registry = new Dictionary <IDawgNode <TKey, TValue>, IDawgNode <TKey, TValue> >();
     instance = new Dawg <TKey, TValue>();
     foreach (var pair in data)
     {
         Append(pair.Key, pair.Value);
     }
     return(instance);
 }
        protected override IPrefixMatcher Build(string dictionaryFile)
        {
            using (var dictionarySource = new WordDictionary(dictionaryFile))
            {
                var dawgBuilder = Dawg.CreateBuilder(dictionarySource);
                var dawg        = dawgBuilder.Build();

                return(new PrefixMatcher(dawg));
            }
        }
示例#16
0
        private static Dawg <bool> SaveToFileAndLoadBack(Dawg <bool> dawg)
        {
            string binFilePath = Path.GetTempFileName();

            using (var file = File.OpenWrite(binFilePath))
                dawg.SaveTo(file);

            var rehydrated = Dawg <bool> .Load(File.OpenRead(binFilePath));

            return(rehydrated);
        }
示例#17
0
 protected AbstractDacukBuilder()
 {
     registry     = new Dictionary <IDawgNode <TKey, TValue>, IDawgNode <TKey, TValue> >();
     instance     = new Dawg <TKey, TValue>();
     confluxState = null;
     lastState    = null;
     comparer     = null;
     prefixLenght = 0;
     prefixPath   = new Stack <IDawgNode <TKey, TValue> >();
     Registered   = new RegisteredPropertyHandler();
 }
示例#18
0
        public void Case15()
        {
            var words = new[]
            {
                "AAHS",
                "AALIIS",
                "AALS"
            };

            var builder = Dawg.CreateBuilder(words);
        }
示例#19
0
        public IEnumerable <string> GetWordsWithGivenPrefix(string prefix)
        {
            Queue <string> result = new();
            IEnumerable <KeyValuePair <string, bool> > wordsWithPrefix = Dawg.MatchPrefix(prefix);

            foreach (KeyValuePair <string, bool> wordWithPrefix in wordsWithPrefix)
            {
                result.Enqueue(wordWithPrefix.Key);
            }
            return(result);
        }
示例#20
0
        /// <summary>
        /// Finds the word in the dictionary
        /// </summary>
        /// <param name="word"></param>
        /// <returns>TRUE if found</returns>
        public static bool WordExists(String word, Stream fs)
        {
            Func <BinaryReader, bool> readPayload = null;
            var dawgMap = Dawg <bool> .Load(fs, readPayload);

            if (dawgMap[word])
            {
                return(true);
            }

            return(false);
        }
        protected override IPrefixMatcher Build(string dictionaryFile)
        {
            using (var dictionarySource = new WordDictionary(dictionaryFile))
            {
                var dawg = new Dawg();
                foreach (var word in dictionarySource)
                {
                    dawg.Insert(word.ToUpper());
                }

                return(new PrefixMatcher(dawg));
            }
        }
示例#22
0
        public void Case7()
        {
            var words = new[]
            {
                "bcn",
                "acn",
                "bon",
                "boa",
                "asn"
            };

            var builder = Dawg.CreateBuilder(words).Build();

            Assert.True(builder.Contains("boa"));
        }
示例#23
0
        public void Case14()
        {
            var words = new[]
            {
                "RIG",
                "RIN",
                "RIE"
            };

            var dawg = Dawg.CreateBuilder(words).Build();

            var array = dawg.WithPrefix("R").ToArray();

            Assert.Equal(3, array.Length);
        }
示例#24
0
        protected override Dawg <TPayload> GetDawg <TPayload> (DawgBuilder <TPayload> dawgBuilder)
        {
            var dawg = dawgBuilder.BuildDawg();

            var memoryStream = new MemoryStream();

#pragma warning disable 612,618
            dawg.SaveAsMatrixDawg(memoryStream);
#pragma warning restore 612,618

            var buffer = memoryStream.GetBuffer();

            var rehydrated = Dawg <TPayload> .Load(new MemoryStream (buffer));

            return(rehydrated);
        }
示例#25
0
        public void Case8()
        {
            var words = new[]
            {
                "AAHEED",
                "AAHED",
                "OOHED",
                "AALID",
                "AAED"
            };

            var builder = Dawg.CreateBuilder(words).Build();

            Assert.True(builder.Contains("AAHED"));
            Assert.False(builder.Contains("AAHEDS"));
            Assert.False(builder.Contains("AHAED"));
        }
示例#26
0
        public void LoadDAWG()
        {
            string path = Path.Combine(_workDir, "forms.dawg");

            if (File.Exists(path))
            {
                DateTime start = DateTime.Now;
                _dawg = Dawg <FormInterpretations> .Load(File.Open(path, FileMode.Open), ReadPayload);

                Console.WriteLine("DAWG load time: {0}", DateTime.Now - start);
                Console.WriteLine("DAWG nodes: {0}", _dawg.GetNodeCount());
                Console.WriteLine("DAWG count {0}", _dawg.Count());
            }
            else
            {
                _rebuildDAWG(path);
            }
        }
示例#27
0
        /// <summary>
        /// Loads the list of valid words from the input file.
        /// These words are from the Collin's dictionary of valid scrabble words.
        /// </summary>
        private void LoadWords()
        {
            ValidWords = new List <string>();

            string path = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), @"Resources\ods4.txt");

            foreach (var w in File.ReadAllLines(path))
            {
                ValidWords.Add(w);
            }
            var    assembly     = Assembly.GetExecutingAssembly();
            string resourceName = assembly.GetManifestResourceNames().Single(str => str.EndsWith("ODS4_DAWG.bin"));

            using (Stream stream = assembly.GetManifestResourceStream(resourceName))
                using (StreamReader reader = new StreamReader(stream))
                {
                    Dawg = Dawg <bool> .Load(reader.BaseStream);
                }
        }
示例#28
0
        public void DawgCreator_SaveDawg_ReadDawg()
        {
            // create
            DawgCreator dawgCreator = new DawgCreator(lexiconName, numNodes, numWords, numReversePartWords);

            // add nodes
            for (int idx = 0; idx < numNodes; idx++)
            {
                dawgCreator.AddNode(nodes[idx]);
            }

            // save
            dawgCreator.SaveDawg(lexiconFileName);

            // read it back:-)
            Dawg dawg = new Dawg(lexiconFileName);

            // let us hope for no exceptions!
        }
示例#29
0
文件: Program.cs 项目: vtortola/dawg
        static void TestOwnWordsExits(string[] words, Dawg dawg)
        {
            var original  = words.ToHashSet();
            var dawgWords = dawg.ToHashSet();

            Assert(true, original.Count == dawgWords.Count, "The hash do not contains the same words");

            if (!original.IsSubsetOf(dawgWords))
            {
                var a = original.Except(dawgWords).ToArray();
                var b = dawgWords.Except(original).ToArray();
                Assert(true, false, "different sets");
            }

            foreach (var word in words)
            {
                Assert(true, dawg.Contains(word), $"DAWG Should contain {word}");
            }
        }
示例#30
0
文件: Program.cs 项目: vtortola/dawg
        static void TestRandomGeneratedWords(string[] words, HashSet <string> hash, Dawg dawg)
        {
            var factor  = 1;
            var counter = 0L;

            Parallel.For(0, words.Length, new ParallelOptions()
            {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            }, i =>
            {
                var random   = new Random(123 + i);
                var selected = words[i].ToCharArray();
                for (int j = 0; j < selected.Length * factor; j++)
                {
                    Shuffle(selected, random);
                    var shuffled = new string(selected);
                    Assert(shuffled, hash, dawg, $"Differs on shuffled {shuffled}");
                    Interlocked.Increment(ref counter);
                }

                var forSubstring = words[i];
                if (forSubstring.Length < 3)
                {
                    return;
                }

                for (int j = 0; j < selected.Length * factor; j++)
                {
                    var cut = forSubstring.Substring(0, random.Next(1, forSubstring.Length - 1));
                    Assert(cut, hash, dawg, $"Differs on substring {cut}");
                    Interlocked.Increment(ref counter);
                }

                for (int j = 0; j < selected.Length * factor; j++)
                {
                    var insert = forSubstring.Insert(random.Next(0, forSubstring.Length - 1), "".PadLeft(random.Next(1, factor), 's'));
                    Assert(insert, hash, dawg, $"Differs on insert {insert}");
                    Interlocked.Increment(ref counter);
                }
            });
            Console.Write($"{counter:n0} of random words tried ");
        }
示例#31
0
文件: Program.cs 项目: vtortola/dawg
        static HashSet <string>[] DawgPrefixSearch(Dawg dawg, string[] prefixes, int times, int?max = null)
        {
            var results = new HashSet <string> [prefixes.Length];

            for (var index = 0; index < prefixes.Length; index++)
            {
                var prefix = prefixes[index];
                for (int i = 0; i < times; i++)
                {
                    var query = dawg.WithPrefix(prefix);
                    if (max.HasValue)
                    {
                        query = query.Take(max.Value);
                    }
                    results[index] = query.ToHashSet();
                }
            }

            return(results);
        }