public MaximumLikelihoodTagger(Config config) { _config = config; _dictionary = new Dictionary <string, POSDictionaryEntry>(); StreamReader reader = new StreamReader(_config.DictionaryFilename); while (!reader.EndOfStream) { string line = reader.ReadLine().Trim(); if (line.StartsWith("#")) { continue; } POSDictionaryEntry?entry = POSDictionaryEntry.Parse(line); if (entry.HasValue) { _dictionary.Add(entry.Value.Lexeme, entry.Value); } } reader.Close(); }
public static POSDictionaryEntry?Parse(string line) { POSDictionaryEntry entry = new POSDictionaryEntry { Lexeme = "", Tags = new Dictionary <PersianPartOfSpeech, double>() }; string[] parts = line.Split(new[] { '\t' }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length != 2) { return(null); } entry.Lexeme = parts[0]; string[] elements = parts[1].Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); foreach (var element in elements) { PersianPartOfSpeech pos = (PersianPartOfSpeech)Enum.Parse(typeof(PersianPartOfSpeech), element.Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries)[0]); double weight = double.Parse(element.Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries)[1]); entry.Tags.Add(pos, weight); } return(entry); }