Example #1
0
        private void AddRoots(RootLine entry, MorphemeSurfaceDictionary<Root> roots)
        {
            string item = entry.Root;
            string[] surfaces = entry.Surfaces.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);
            string lex = entry.Lex;
            string[] flags = entry.Flags.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);
            string type = entry.Id;
            string[] rules = entry.Rules.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);

            if (string.IsNullOrEmpty(entry.Lex))
            {
                lex = item;
            }

            Root root;
            if (type == "KISALTMA" || type == "ALINTI" || type == "KISALTMA_NOKTALI" || type == "HARF")
            {
                root = new Root(type, lex, LabelSet.ConvertLabelNamesToIndexes(flags), _orthography.GetRules(rules),
                    item);
            }
            else
            {
                root = new Root(type, lex, LabelSet.ConvertLabelNamesToIndexes(flags), _orthography.GetRules(rules));
            }

            roots.Add(item, root); // kelimeyi asıl yüzeyi ile ekliyoruz

            //eğer fazladan yüzeyi var ise onunla da ekliyoruz.
            foreach (string lexicalForm in surfaces)
            {
                roots.Add(lexicalForm, root);
            }
        }
Example #2
0
        private void AddSuffix(SuffixDictionaryLine entry,
            Dictionary<string, Suffix> suffixesById,
            MorphemeSurfaceDictionary<Suffix> suffixes)
        {
            string id = entry.Id;
            string lex = entry.Lex;
            MorphemeType morphemeType;

            if (!Enum.TryParse(entry.Type, out morphemeType))
            {
                morphemeType = MorphemeType.O;
                Console.WriteLine("Invalid Morpheme Type: " + entry.Type);
            }

            string[] flags = entry.Flags.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);
            string[] rulesToken = entry.Rules.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);
            Debug.Assert(entry.Surfaces != null, "entry.Surfaces != null");
            var surfaces =
                new List<string>(entry.Surfaces.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries));

            List<OrthographyRule> rules = _orthography.GetRules(rulesToken);
            var suffix = new Suffix(id, lex, morphemeType, LabelSet.ConvertLabelNamesToIndexes(flags), rules);
            suffixesById.Add(id, suffix);

            foreach (string surface in surfaces)
            {
                suffixes.Add(surface, suffix);
            }
        }
Example #3
0
        private void AddSuffix(SuffixDictionaryLine entry,
                               Dictionary <string, Suffix> suffixesById,
                               MorphemeSurfaceDictionary <Suffix> suffixes)
        {
            string       id  = entry.Id;
            string       lex = entry.Lex;
            MorphemeType morphemeType;

            if (!Enum.TryParse(entry.Type, out morphemeType))
            {
                morphemeType = MorphemeType.O;
                Console.WriteLine("Invalid Morpheme Type: " + entry.Type);
            }

            string[] flags      = entry.Flags.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
            string[] rulesToken = entry.Rules.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
            Debug.Assert(entry.Surfaces != null, "entry.Surfaces != null");
            var surfaces =
                new List <string>(entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries));

            List <OrthographyRule> rules = _orthography.GetRules(rulesToken);
            var suffix = new Suffix(id, lex, morphemeType, LabelSet.ConvertLabelNamesToIndexes(flags), rules);

            suffixesById.Add(id, suffix);

            foreach (string surface in surfaces)
            {
                suffixes.Add(surface, suffix);
            }
        }
Example #4
0
        private void AddRoots(RootLine entry, MorphemeSurfaceDictionary <Root> roots)
        {
            string item = entry.Root;

            string[] surfaces = entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
            string   lex      = entry.Lex;

            string[] flags = entry.Flags.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
            string   type  = entry.Id;

            string[] rules = entry.Rules.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);


            if (string.IsNullOrEmpty(entry.Lex))
            {
                lex = item;
            }

            Root root;

            if (type == "KISALTMA" || type == "ALINTI" || type == "KISALTMA_NOKTALI" || type == "HARF")
            {
                root = new Root(type, lex, LabelSet.ConvertLabelNamesToIndexes(flags), _orthography.GetRules(rules),
                                item);
            }
            else
            {
                root = new Root(type, lex, LabelSet.ConvertLabelNamesToIndexes(flags), _orthography.GetRules(rules));
            }

            roots.Add(item, root); // kelimeyi asıl yüzeyi ile ekliyoruz

            //eğer fazladan yüzeyi var ise onunla da ekliyoruz.
            foreach (string lexicalForm in surfaces)
            {
                roots.Add(lexicalForm, root);
            }
        }
Example #5
0
        private void AddRoots(RootLine entry, Dictionary <string, Root> rootsById,
                              MorphemeSurfaceDictionary <Root> rootsBySurface)
        {
            var mainSurface = entry.Root;
            var surfaces    = new List <string> {
                mainSurface
            };

            surfaces.AddRange(entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries));

            var lex    = entry.Lex;
            var labels = entry.Labels.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var pos    = entry.Pos;
            var rules  = entry.Rules.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            if (Regex.IsMatch(lex, @"\p{L}[2-9]"))
            {
                rules.Add("DROP_ID_DIGIT");
            }

            if (string.IsNullOrEmpty(entry.Lex))
            {
                lex = mainSurface;
            }

            var root = new Root(pos, lex,
                                new ImmutableSortedSet <string>(surfaces),
                                new ImmutableHashSet <string>(labels),
                                _orthography.GetRules(rules));

            var id = lex + "/" + pos;

            if (!rootsById.ContainsKey(id))
            {
                rootsById.Add(id, root);
            }
            else
            {
                Trace.TraceEvent(TraceEventType.Warning, 0, $"Duplicate root: {id}");
            }

            foreach (var lexicalForm in surfaces)
            {
                rootsBySurface.Add(lexicalForm, root);
            }
        }
Example #6
0
        private void AddSuffix(SuffixDictionaryLine entry,
                               Dictionary <string, Suffix> suffixesById,
                               MorphemeSurfaceDictionary <Suffix> suffixes)
        {
            string       id  = entry.Id;
            string       lex = entry.Lex;
            MorphemeType morphemeType;

            if (!Enum.TryParse(entry.Type, out morphemeType))
            {
                morphemeType = MorphemeType.O;
                Trace.TraceEvent(TraceEventType.Error, 0, $"Invalid Morpheme Type: {entry.Type}");
            }

            string[] labels     = entry.Labels.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
            string[] rulesToken = entry.Rules.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
            Debug.Assert(entry.Surfaces != null, "entry.Surfaces != null");
            var surfaces =
                new List <string>(entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries));

            List <OrthographyRule> rules = _orthography.GetRules(rulesToken);
            var suffix = new Suffix(id, lex, new ImmutableSortedSet <string>(surfaces), morphemeType, new ImmutableHashSet <string>(labels), rules);

            if (suffixesById.ContainsKey(id))
            {
                Trace.TraceEvent(TraceEventType.Warning, 0, $"Duplicate suffix: {id}");
            }
            else
            {
                suffixesById.Add(id, suffix);
            }

            foreach (string surface in surfaces)
            {
                suffixes.Add(surface.Replace('_', ' '), suffix);
            }
        }
Example #7
0
        private void AddRoots(RootLine entry, Dictionary<string, Root> rootsById,
            MorphemeSurfaceDictionary<Root> rootsBySurface)
        {
            var mainSurface = entry.Root;
            var surfaces = new List<string> {mainSurface};
            surfaces.AddRange(entry.Surfaces.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries));

            var lex = entry.Lex;
            var labels = entry.Labels.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);
            var pos = entry.Pos;
            var rules = entry.Rules.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries).ToList();

            if (Regex.IsMatch(lex, @"\p{L}[2-9]"))
            {
                rules.Add("DROP_ID_DIGIT");
            }

            if (string.IsNullOrEmpty(entry.Lex))
            {
                lex = mainSurface;
            }

            var root = new Root(pos, lex,
                new ImmutableSortedSet<string>(surfaces),
                new ImmutableHashSet<string>(labels),
                _orthography.GetRules(rules));

            var id = lex + "/" + pos;

            if (!rootsById.ContainsKey(id))
            {
                rootsById.Add(id, root);
            }
            else
            {
                Trace.TraceEvent(TraceEventType.Warning, 0, $"Duplicate root: {id}");
            }

            foreach (var lexicalForm in surfaces)
            {
                rootsBySurface.Add(lexicalForm, root);
            }
        }
Example #8
0
        private void AddSuffix(SuffixDictionaryLine entry,
            Dictionary<string, Suffix> suffixesById,
            MorphemeSurfaceDictionary<Suffix> suffixes)
        {
            string id = entry.Id;
            string lex = entry.Lex;
            MorphemeType morphemeType;

            if (!Enum.TryParse(entry.Type, out morphemeType))
            {
                morphemeType = MorphemeType.O;
                Trace.TraceEvent(TraceEventType.Error, 0, $"Invalid Morpheme Type: {entry.Type}");
            }

            string[] labels = entry.Labels.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);
            string[] rulesToken = entry.Rules.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);
            Debug.Assert(entry.Surfaces != null, "entry.Surfaces != null");
            var surfaces =
                new List<string>(entry.Surfaces.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries));

            List<OrthographyRule> rules = _orthography.GetRules(rulesToken);
            var suffix = new Suffix(id, lex, new ImmutableSortedSet<string>(surfaces), morphemeType, new ImmutableHashSet<string>(labels), rules);
            if (suffixesById.ContainsKey(id))
            {
                Trace.TraceEvent(TraceEventType.Warning, 0, $"Duplicate suffix: {id}");
            }
            else
            {
                suffixesById.Add(id, suffix);
            }

            foreach (string surface in surfaces)
            {
                suffixes.Add(surface.Replace('_', ' '), suffix);
            }
        }