private void AddRoots(RootLine entry, MorphemeSurfaceDictionary<Root> roots) { string item = entry.Root; string[] surfaces = entry.Surfaces.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries); string lex = entry.Lex; string[] flags = entry.Flags.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries); string type = entry.Id; string[] rules = entry.Rules.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries); if (string.IsNullOrEmpty(entry.Lex)) { lex = item; } Root root; if (type == "KISALTMA" || type == "ALINTI" || type == "KISALTMA_NOKTALI" || type == "HARF") { root = new Root(type, lex, LabelSet.ConvertLabelNamesToIndexes(flags), _orthography.GetRules(rules), item); } else { root = new Root(type, lex, LabelSet.ConvertLabelNamesToIndexes(flags), _orthography.GetRules(rules)); } roots.Add(item, root); // kelimeyi asıl yüzeyi ile ekliyoruz //eğer fazladan yüzeyi var ise onunla da ekliyoruz. foreach (string lexicalForm in surfaces) { roots.Add(lexicalForm, root); } }
private void AddSuffix(SuffixDictionaryLine entry, Dictionary<string, Suffix> suffixesById, MorphemeSurfaceDictionary<Suffix> suffixes) { string id = entry.Id; string lex = entry.Lex; MorphemeType morphemeType; if (!Enum.TryParse(entry.Type, out morphemeType)) { morphemeType = MorphemeType.O; Console.WriteLine("Invalid Morpheme Type: " + entry.Type); } string[] flags = entry.Flags.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries); string[] rulesToken = entry.Rules.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries); Debug.Assert(entry.Surfaces != null, "entry.Surfaces != null"); var surfaces = new List<string>(entry.Surfaces.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries)); List<OrthographyRule> rules = _orthography.GetRules(rulesToken); var suffix = new Suffix(id, lex, morphemeType, LabelSet.ConvertLabelNamesToIndexes(flags), rules); suffixesById.Add(id, suffix); foreach (string surface in surfaces) { suffixes.Add(surface, suffix); } }
private void AddSuffix(SuffixDictionaryLine entry, Dictionary <string, Suffix> suffixesById, MorphemeSurfaceDictionary <Suffix> suffixes) { string id = entry.Id; string lex = entry.Lex; MorphemeType morphemeType; if (!Enum.TryParse(entry.Type, out morphemeType)) { morphemeType = MorphemeType.O; Console.WriteLine("Invalid Morpheme Type: " + entry.Type); } string[] flags = entry.Flags.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); string[] rulesToken = entry.Rules.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); Debug.Assert(entry.Surfaces != null, "entry.Surfaces != null"); var surfaces = new List <string>(entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries)); List <OrthographyRule> rules = _orthography.GetRules(rulesToken); var suffix = new Suffix(id, lex, morphemeType, LabelSet.ConvertLabelNamesToIndexes(flags), rules); suffixesById.Add(id, suffix); foreach (string surface in surfaces) { suffixes.Add(surface, suffix); } }
private void AddRoots(RootLine entry, MorphemeSurfaceDictionary <Root> roots) { string item = entry.Root; string[] surfaces = entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); string lex = entry.Lex; string[] flags = entry.Flags.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); string type = entry.Id; string[] rules = entry.Rules.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); if (string.IsNullOrEmpty(entry.Lex)) { lex = item; } Root root; if (type == "KISALTMA" || type == "ALINTI" || type == "KISALTMA_NOKTALI" || type == "HARF") { root = new Root(type, lex, LabelSet.ConvertLabelNamesToIndexes(flags), _orthography.GetRules(rules), item); } else { root = new Root(type, lex, LabelSet.ConvertLabelNamesToIndexes(flags), _orthography.GetRules(rules)); } roots.Add(item, root); // kelimeyi asıl yüzeyi ile ekliyoruz //eğer fazladan yüzeyi var ise onunla da ekliyoruz. foreach (string lexicalForm in surfaces) { roots.Add(lexicalForm, root); } }
private void AddRoots(RootLine entry, Dictionary <string, Root> rootsById, MorphemeSurfaceDictionary <Root> rootsBySurface) { var mainSurface = entry.Root; var surfaces = new List <string> { mainSurface }; surfaces.AddRange(entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries)); var lex = entry.Lex; var labels = entry.Labels.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); var pos = entry.Pos; var rules = entry.Rules.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); if (Regex.IsMatch(lex, @"\p{L}[2-9]")) { rules.Add("DROP_ID_DIGIT"); } if (string.IsNullOrEmpty(entry.Lex)) { lex = mainSurface; } var root = new Root(pos, lex, new ImmutableSortedSet <string>(surfaces), new ImmutableHashSet <string>(labels), _orthography.GetRules(rules)); var id = lex + "/" + pos; if (!rootsById.ContainsKey(id)) { rootsById.Add(id, root); } else { Trace.TraceEvent(TraceEventType.Warning, 0, $"Duplicate root: {id}"); } foreach (var lexicalForm in surfaces) { rootsBySurface.Add(lexicalForm, root); } }
private void AddSuffix(SuffixDictionaryLine entry, Dictionary <string, Suffix> suffixesById, MorphemeSurfaceDictionary <Suffix> suffixes) { string id = entry.Id; string lex = entry.Lex; MorphemeType morphemeType; if (!Enum.TryParse(entry.Type, out morphemeType)) { morphemeType = MorphemeType.O; Trace.TraceEvent(TraceEventType.Error, 0, $"Invalid Morpheme Type: {entry.Type}"); } string[] labels = entry.Labels.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); string[] rulesToken = entry.Rules.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); Debug.Assert(entry.Surfaces != null, "entry.Surfaces != null"); var surfaces = new List <string>(entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries)); List <OrthographyRule> rules = _orthography.GetRules(rulesToken); var suffix = new Suffix(id, lex, new ImmutableSortedSet <string>(surfaces), morphemeType, new ImmutableHashSet <string>(labels), rules); if (suffixesById.ContainsKey(id)) { Trace.TraceEvent(TraceEventType.Warning, 0, $"Duplicate suffix: {id}"); } else { suffixesById.Add(id, suffix); } foreach (string surface in surfaces) { suffixes.Add(surface.Replace('_', ' '), suffix); } }
private void AddRoots(RootLine entry, Dictionary<string, Root> rootsById, MorphemeSurfaceDictionary<Root> rootsBySurface) { var mainSurface = entry.Root; var surfaces = new List<string> {mainSurface}; surfaces.AddRange(entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries)); var lex = entry.Lex; var labels = entry.Labels.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries); var pos = entry.Pos; var rules = entry.Rules.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries).ToList(); if (Regex.IsMatch(lex, @"\p{L}[2-9]")) { rules.Add("DROP_ID_DIGIT"); } if (string.IsNullOrEmpty(entry.Lex)) { lex = mainSurface; } var root = new Root(pos, lex, new ImmutableSortedSet<string>(surfaces), new ImmutableHashSet<string>(labels), _orthography.GetRules(rules)); var id = lex + "/" + pos; if (!rootsById.ContainsKey(id)) { rootsById.Add(id, root); } else { Trace.TraceEvent(TraceEventType.Warning, 0, $"Duplicate root: {id}"); } foreach (var lexicalForm in surfaces) { rootsBySurface.Add(lexicalForm, root); } }
private void AddSuffix(SuffixDictionaryLine entry, Dictionary<string, Suffix> suffixesById, MorphemeSurfaceDictionary<Suffix> suffixes) { string id = entry.Id; string lex = entry.Lex; MorphemeType morphemeType; if (!Enum.TryParse(entry.Type, out morphemeType)) { morphemeType = MorphemeType.O; Trace.TraceEvent(TraceEventType.Error, 0, $"Invalid Morpheme Type: {entry.Type}"); } string[] labels = entry.Labels.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries); string[] rulesToken = entry.Rules.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries); Debug.Assert(entry.Surfaces != null, "entry.Surfaces != null"); var surfaces = new List<string>(entry.Surfaces.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries)); List<OrthographyRule> rules = _orthography.GetRules(rulesToken); var suffix = new Suffix(id, lex, new ImmutableSortedSet<string>(surfaces), morphemeType, new ImmutableHashSet<string>(labels), rules); if (suffixesById.ContainsKey(id)) { Trace.TraceEvent(TraceEventType.Warning, 0, $"Duplicate suffix: {id}"); } else { suffixesById.Add(id, suffix); } foreach (string surface in surfaces) { suffixes.Add(surface.Replace('_', ' '), suffix); } }