//public static void MakeEnglishDictionary() //{ // var dawgBuilder = new DawgBuilder<bool>(); // <bool> is the value type. // // Key type is always string. // string[] lines = File.ReadLines(@"C:\Users\Simeon\Desktop\Scrabble\Scrabble\Helpers\englishWords.txt").ToArray(); // foreach (string key in new[] { "Aaron", "abacus", "abashed" }) // { // dawgBuilder.Insert(key, true); // } // var dawg = dawgBuilder.BuildDawg(); // Computer is working. Please wait ... // dawg.SaveTo(File.Create(@"C:\Users\Simeon\Desktop\Scrabble\Scrabble\Helpers\englishDawg.bin")); //} /// <summary> /// Loads a DAWG (a dictionary of words as a compact trie). /// </summary> /// <param name="language">Language of dictionary to load</param> /// <returns></returns> public static Dawg <bool> LoadDawg(GameLanguage language) { Stream fs = File.Open(language.Language + "Dawg.bin", FileMode.Open, FileAccess.Read); var dawg = Dawg <bool> .Load(fs); return(dawg); }
protected void GetRules(string prefixText) { Dawg <string> Dawg; var PrefixText = DictionaryHelper.RemoveStressMarks(prefixText).ToLowerInvariant().Reverse(); var fileBasedDictionary = new FileBasedDictionary(Context.Server); try { using (Stream ReverseDict = fileBasedDictionary.OpenReverseIndex()) { Dawg = Dawg <string> .Load(ReverseDict, Func => { string s = Func.ReadString(); return(s == String.Empty ? null : s); }); } int PrefixLen = Dawg.GetLongestCommonPrefixLength(PrefixText); WriteJSONToResponse(Dawg.MatchPrefix(PrefixText.Take(PrefixLen)) .GroupBy(kvp => kvp.Value, kvp => kvp) .SelectMany(g => g.Take(1)) .Select(kvp => kvp.Value + DictionaryHelper.RuleLineDelimiter + new string(kvp.Key.Reverse().ToArray())) .Take(10) .ToArray()); } catch (Exception e) { WriteJSONToResponse(new [] { "Доступ к словарю в данный момент отсутствует. Возможно происходит построение индексов." }); Email.SendAdminEmail("GetRules", e.ToString()); } }
public void PersistenceTest() { var dawgBuilder = new DawgBuilder <int> (); dawgBuilder.Insert("cone", 10); dawgBuilder.Insert("bone", 10); dawgBuilder.Insert("gone", 9); dawgBuilder.Insert("go", 5); var dawg = dawgBuilder.BuildDawg(); var memoryStream = new MemoryStream(); dawg.SaveTo(memoryStream, (w, p) => w.Write(p)); var buffer = memoryStream.GetBuffer(); var rehydrated = Dawg <int> .Load(new MemoryStream (buffer), r => r.ReadInt32()); Assert.AreEqual(10, rehydrated ["cone"]); Assert.AreEqual(10, rehydrated ["bone"]); Assert.AreEqual(0, rehydrated ["cones"]); Assert.AreEqual(9, rehydrated ["gone"]); Assert.AreEqual(5, rehydrated ["go"]); Assert.AreEqual(0, rehydrated ["god"]); }
public void BoingDawg_Generate() { const string textFile = "boing_crosschecks.txt"; //const string textFile = "englishWords.txt"; const string binFile = "boingDAWG.bin"; //const string binFile = "englishDawg.bin"; string fileContents = File.ReadAllText(textFile); List <string> boingWords = Regex.Matches(fileContents, "\\w+").Select(m => m.Value).ToList(); DawgBuilder <bool> dawgBuilder = new(); foreach (string word in boingWords) { dawgBuilder.Insert(word, true); } Dawg <bool> dawg = dawgBuilder.BuildDawg(); // Computer is working. Please wait ... using (FileStream file = File.Create(binFile)) dawg.SaveTo(file); //Now read the file back in and check if a particular word is in the dictionary: Dawg <bool> dawg2 = Dawg <bool> .Load(File.Open(binFile, FileMode.Open)); }
public void FindPrefixTapsTops() { var words = new[] { "tap", "taps", "top", "tops" }; var dawg = Dawg.CreateBuilder(words).Build(); var array = dawg.WithPrefix("to").ToArray(); Assert.Contains("top", array); Assert.Contains("tops", array); array = dawg.WithPrefix("tap").ToArray(); Assert.Contains("tap", array); Assert.Contains("taps", array); Assert.Empty(dawg.WithPrefix("tu")); array = dawg.WithPrefix("t").ToArray(); Assert.Contains("tap", array); Assert.Contains("taps", array); Assert.Contains("top", array); Assert.Contains("tops", array); }
public static Dawg <FormInterpretations> CreateDAWG(MRDFileReader mrdFile) { DateTime start = DateTime.Now; Console.WriteLine("Inserting forms in DAWG... Please wait..."); DawgSharp.DawgBuilder <FormInterpretations> dawgBuilder = new DawgBuilder <FormInterpretations>(); UInt64 cntForms = 0; foreach (WordForm f in mrdFile.AllForms) { string word = f.Prefix + f.Flexia.Prefix + f.Lemma.Base + f.Flexia.Flexion; FormInterpretations payload = null; dawgBuilder.TryGetValue(word, out payload); if (payload == null) { payload = new FormInterpretations(); dawgBuilder.Insert(word, payload); } payload.Add(f); cntForms++; } Console.WriteLine("All forms count: " + cntForms); Console.WriteLine("Building... please wait..."); Dawg <FormInterpretations> dawg = dawgBuilder.BuildDawg(); Console.WriteLine("DAWG create time: {0}", DateTime.Now - start); return(dawg); }
/// <summary> /// Using word, checks if word is valid in dictionary /// </summary> /// <param name="dawg">Dawg to use</param> /// <param name="word">Word to check</param> /// <param name="alwaysExists">Says if any word is considered as valid</param> /// <returns></returns> public static bool CheckWordValidity(Dawg <bool> dawg, string word, bool alwaysExists = false) { if (!alwaysExists && !dawg[word.ToUpper()]) { return(false); } return(true); }
public static DawgSearchableIndex Deserialize(Stream stream) { using var gzipStream = new GZipStream(stream, CompressionMode.Decompress, leaveOpen: true); var allDocuments = Serializer.DeserializeWithLengthPrefix <RangePostingsList>(gzipStream, PrefixStyle.Base128); var dawg = Dawg <RangePostingsList> .Load(gzipStream, readPayload : DeserializePayload); return(new DawgSearchableIndex(dawg, allDocuments)); }
public MainWindow() { InitializeComponent(); d = new Dawg(); vmDawg = new ViewModelDawg(d); //BindDawgToUI(); //DataContext = vmDawg; }
private void _rebuildDAWG(string path) { MRDFileReader mrdFile = new MRDFileReader(_gramtab); mrdFile.LoadMrd(Path.Combine(_workDir, @"Dicts\morphs.mrd")); _dawg = CreateDAWG(mrdFile); _dawg.SaveTo(File.Create(path), WritePayload); }
public bool IsURLInList(string url) { if (dawg == null) { dawg = dawgBuilder.BuildDawg(); dawgBuilder = null; } return dawg["url"]; }
public DawgBenchmark() { var prefix = string.Empty; using var dawgStream = File.OpenRead(prefix + SaveLocation); _dawg = new Dawg(dawgStream); using var queryStream = File.OpenRead(prefix + Query1K); _words = BuildQuery1K(queryStream); }
static void FindWords(HashSet <string> toFind, Dawg dawg, int times) { for (var i = 0; i < times; i++) { foreach (var word in toFind) { Assert(true, dawg.Contains(word)); } } }
public virtual Dawg <TKey, TValue> Build(IEnumerable <KeyValuePair <IEnumerable <TKey>, TValue> > data) { registry = new Dictionary <IDawgNode <TKey, TValue>, IDawgNode <TKey, TValue> >(); instance = new Dawg <TKey, TValue>(); foreach (var pair in data) { Append(pair.Key, pair.Value); } return(instance); }
protected override IPrefixMatcher Build(string dictionaryFile) { using (var dictionarySource = new WordDictionary(dictionaryFile)) { var dawgBuilder = Dawg.CreateBuilder(dictionarySource); var dawg = dawgBuilder.Build(); return(new PrefixMatcher(dawg)); } }
private static Dawg <bool> SaveToFileAndLoadBack(Dawg <bool> dawg) { string binFilePath = Path.GetTempFileName(); using (var file = File.OpenWrite(binFilePath)) dawg.SaveTo(file); var rehydrated = Dawg <bool> .Load(File.OpenRead(binFilePath)); return(rehydrated); }
protected AbstractDacukBuilder() { registry = new Dictionary <IDawgNode <TKey, TValue>, IDawgNode <TKey, TValue> >(); instance = new Dawg <TKey, TValue>(); confluxState = null; lastState = null; comparer = null; prefixLenght = 0; prefixPath = new Stack <IDawgNode <TKey, TValue> >(); Registered = new RegisteredPropertyHandler(); }
public void Case15() { var words = new[] { "AAHS", "AALIIS", "AALS" }; var builder = Dawg.CreateBuilder(words); }
public IEnumerable <string> GetWordsWithGivenPrefix(string prefix) { Queue <string> result = new(); IEnumerable <KeyValuePair <string, bool> > wordsWithPrefix = Dawg.MatchPrefix(prefix); foreach (KeyValuePair <string, bool> wordWithPrefix in wordsWithPrefix) { result.Enqueue(wordWithPrefix.Key); } return(result); }
/// <summary> /// Finds the word in the dictionary /// </summary> /// <param name="word"></param> /// <returns>TRUE if found</returns> public static bool WordExists(String word, Stream fs) { Func <BinaryReader, bool> readPayload = null; var dawgMap = Dawg <bool> .Load(fs, readPayload); if (dawgMap[word]) { return(true); } return(false); }
protected override IPrefixMatcher Build(string dictionaryFile) { using (var dictionarySource = new WordDictionary(dictionaryFile)) { var dawg = new Dawg(); foreach (var word in dictionarySource) { dawg.Insert(word.ToUpper()); } return(new PrefixMatcher(dawg)); } }
public void Case7() { var words = new[] { "bcn", "acn", "bon", "boa", "asn" }; var builder = Dawg.CreateBuilder(words).Build(); Assert.True(builder.Contains("boa")); }
public void Case14() { var words = new[] { "RIG", "RIN", "RIE" }; var dawg = Dawg.CreateBuilder(words).Build(); var array = dawg.WithPrefix("R").ToArray(); Assert.Equal(3, array.Length); }
protected override Dawg <TPayload> GetDawg <TPayload> (DawgBuilder <TPayload> dawgBuilder) { var dawg = dawgBuilder.BuildDawg(); var memoryStream = new MemoryStream(); #pragma warning disable 612,618 dawg.SaveAsMatrixDawg(memoryStream); #pragma warning restore 612,618 var buffer = memoryStream.GetBuffer(); var rehydrated = Dawg <TPayload> .Load(new MemoryStream (buffer)); return(rehydrated); }
public void Case8() { var words = new[] { "AAHEED", "AAHED", "OOHED", "AALID", "AAED" }; var builder = Dawg.CreateBuilder(words).Build(); Assert.True(builder.Contains("AAHED")); Assert.False(builder.Contains("AAHEDS")); Assert.False(builder.Contains("AHAED")); }
public void LoadDAWG() { string path = Path.Combine(_workDir, "forms.dawg"); if (File.Exists(path)) { DateTime start = DateTime.Now; _dawg = Dawg <FormInterpretations> .Load(File.Open(path, FileMode.Open), ReadPayload); Console.WriteLine("DAWG load time: {0}", DateTime.Now - start); Console.WriteLine("DAWG nodes: {0}", _dawg.GetNodeCount()); Console.WriteLine("DAWG count {0}", _dawg.Count()); } else { _rebuildDAWG(path); } }
/// <summary> /// Loads the list of valid words from the input file. /// These words are from the Collin's dictionary of valid scrabble words. /// </summary> private void LoadWords() { ValidWords = new List <string>(); string path = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), @"Resources\ods4.txt"); foreach (var w in File.ReadAllLines(path)) { ValidWords.Add(w); } var assembly = Assembly.GetExecutingAssembly(); string resourceName = assembly.GetManifestResourceNames().Single(str => str.EndsWith("ODS4_DAWG.bin")); using (Stream stream = assembly.GetManifestResourceStream(resourceName)) using (StreamReader reader = new StreamReader(stream)) { Dawg = Dawg <bool> .Load(reader.BaseStream); } }
public void DawgCreator_SaveDawg_ReadDawg() { // create DawgCreator dawgCreator = new DawgCreator(lexiconName, numNodes, numWords, numReversePartWords); // add nodes for (int idx = 0; idx < numNodes; idx++) { dawgCreator.AddNode(nodes[idx]); } // save dawgCreator.SaveDawg(lexiconFileName); // read it back:-) Dawg dawg = new Dawg(lexiconFileName); // let us hope for no exceptions! }
static void TestOwnWordsExits(string[] words, Dawg dawg) { var original = words.ToHashSet(); var dawgWords = dawg.ToHashSet(); Assert(true, original.Count == dawgWords.Count, "The hash do not contains the same words"); if (!original.IsSubsetOf(dawgWords)) { var a = original.Except(dawgWords).ToArray(); var b = dawgWords.Except(original).ToArray(); Assert(true, false, "different sets"); } foreach (var word in words) { Assert(true, dawg.Contains(word), $"DAWG Should contain {word}"); } }
static void TestRandomGeneratedWords(string[] words, HashSet <string> hash, Dawg dawg) { var factor = 1; var counter = 0L; Parallel.For(0, words.Length, new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }, i => { var random = new Random(123 + i); var selected = words[i].ToCharArray(); for (int j = 0; j < selected.Length * factor; j++) { Shuffle(selected, random); var shuffled = new string(selected); Assert(shuffled, hash, dawg, $"Differs on shuffled {shuffled}"); Interlocked.Increment(ref counter); } var forSubstring = words[i]; if (forSubstring.Length < 3) { return; } for (int j = 0; j < selected.Length * factor; j++) { var cut = forSubstring.Substring(0, random.Next(1, forSubstring.Length - 1)); Assert(cut, hash, dawg, $"Differs on substring {cut}"); Interlocked.Increment(ref counter); } for (int j = 0; j < selected.Length * factor; j++) { var insert = forSubstring.Insert(random.Next(0, forSubstring.Length - 1), "".PadLeft(random.Next(1, factor), 's')); Assert(insert, hash, dawg, $"Differs on insert {insert}"); Interlocked.Increment(ref counter); } }); Console.Write($"{counter:n0} of random words tried "); }
static HashSet <string>[] DawgPrefixSearch(Dawg dawg, string[] prefixes, int times, int?max = null) { var results = new HashSet <string> [prefixes.Length]; for (var index = 0; index < prefixes.Length; index++) { var prefix = prefixes[index]; for (int i = 0; i < times; i++) { var query = dawg.WithPrefix(prefix); if (max.HasValue) { query = query.Take(max.Value); } results[index] = query.ToHashSet(); } } return(results); }