public static Dawg <FormInterpretations> CreateDAWG(MRDFileReader mrdFile) { DateTime start = DateTime.Now; Console.WriteLine("Inserting forms in DAWG... Please wait..."); DawgSharp.DawgBuilder <FormInterpretations> dawgBuilder = new DawgBuilder <FormInterpretations>(); UInt64 cntForms = 0; foreach (WordForm f in mrdFile.AllForms) { string word = f.Prefix + f.Flexia.Prefix + f.Lemma.Base + f.Flexia.Flexion; FormInterpretations payload = null; dawgBuilder.TryGetValue(word, out payload); if (payload == null) { payload = new FormInterpretations(); dawgBuilder.Insert(word, payload); } payload.Add(f); cntForms++; } Console.WriteLine("All forms count: " + cntForms); Console.WriteLine("Building... please wait..."); Dawg <FormInterpretations> dawg = dawgBuilder.BuildDawg(); Console.WriteLine("DAWG create time: {0}", DateTime.Now - start); return(dawg); }
public FormInterpretations Lookup(string word) { word = word.ToUpperInvariant(); FormInterpretations forms = _dawg[word]; return(forms); }
private FormInterpretations ReadPayload(BinaryReader binaryReader) { // cnt // <acc1> <ancode-list1> // <acc2> <ancode-list2> // // <ancode-list> := <cnt-byte> <ushort> var p = new FormInterpretations(); int count = binaryReader.ReadByte(); while (count > 0) { int accent = binaryReader.ReadByte(); int listCnt = binaryReader.ReadByte(); List <ushort> list = new List <ushort>(); while (listCnt > 0) { list.Add(binaryReader.ReadUInt16()); listCnt--; } p.AccentToAncodes.Add(accent, list); count--; } return(p); }
private void WritePayload(BinaryWriter binaryWriter, FormInterpretations formInterpretations) { // cnt // <acc1> <ancode-list1> // <acc2> <ancode-list2> // // <ancode-list> := <cnt-byte> <ushort> binaryWriter.Write((byte)formInterpretations.AccentToAncodes.Count); foreach (var kvp in formInterpretations.AccentToAncodes) { binaryWriter.Write((byte)kvp.Key); binaryWriter.Write((byte)kvp.Value.Count); foreach (ushort ancodeNo in kvp.Value) { binaryWriter.Write((ushort)ancodeNo); } } }
public void PrintLookup(string word) { FormInterpretations forms = Lookup(word); if (forms != null) { foreach (var kvp in forms.AccentToAncodes) { Console.WriteLine("Accent {0} {1}", kvp.Key, AccentHelper.SetAccent(word, kvp.Key)); foreach (ushort ancodeNo in kvp.Value) { Console.WriteLine(" {0}", _gramtab.LookupByNo(ancodeNo)); } } } else { Console.WriteLine(word + " not found"); } }
public virtual void ProcessWord() { Console.WriteLine(_currentWord); FormInterpretations forms = _morph.Lookup(_currentWord); if (forms != null) { if (forms.AccentToAncodes.Count == 1) { SingleAccentWords.Add(AccentHelper.SetAccent(_currentWord, forms.AccentToAncodes.Keys.First())); } else { ManyAccentWords.Add(_currentWord); } } else { NotFoundWords.Add(_currentWord); } }