Ejemplo n.º 1
0
        public Language Read()
        {
            var sw = new Stopwatch();

            sw.Start();

            _orthography = ReadOrthography();

            _trace.TraceInformation($"{_dirPath} orthography loading time is {sw.ElapsedMilliseconds} ms");
            sw.Restart();

            var morphotactics = ReadMorphotactics();

            _trace.TraceInformation($"{_dirPath} morphotactics loading time is {sw.ElapsedMilliseconds} ms");
            sw.Restart();

            var roots = ReadRoots();

            _trace.TraceInformation($"{_dirPath} roots loading time is {sw.ElapsedMilliseconds} ms");
            sw.Restart();

            var suffixes = ReadSuffixes();

            _trace.TraceInformation($"{_dirPath} suffixes loading time is {sw.ElapsedMilliseconds} ms");
            sw.Restart();

            return(new Language(_languageType, _orthography, morphotactics, roots, suffixes));
        }
Ejemplo n.º 2
0
        public Language Read()
        {
            var sw = new Stopwatch();

            sw.Start();

            _orthography = ReadOrthography();
            Debug.Print($"orthograpy: {sw.ElapsedMilliseconds} ms");
            sw.Restart();

            Morphotactics morphotactics = ReadMorphotactics();

            Debug.Print($"morphotactics: {sw.ElapsedMilliseconds} ms");
            sw.Restart();

            MorphemeSurfaceDictionary <Root> roots = ReadRoots();

            Debug.Print($"roots: {sw.ElapsedMilliseconds} ms");
            sw.Restart();

            Suffixes suffixes = ReadSuffixes();

            Debug.Print($"suffixes: {sw.ElapsedMilliseconds} ms");
            sw.Restart();


            int index = _dirPath.LastIndexOf("\\", StringComparison.Ordinal);

            string langCode = index > -1 ? _dirPath.Substring(index + 1) : _dirPath;

            return(new Language(langCode, morphotactics, roots, suffixes));
        }
Ejemplo n.º 3
0
 private MutableLanguage(LanguageType type,
                         Orthography orthography,
                         Morphotactics morphotactics,
                         MorphemeContainer <Root> roots,
                         MorphemeContainer <Suffix> suffixes)
     : base(type, orthography, morphotactics, roots, suffixes)
 {
 }
Ejemplo n.º 4
0
        public Language Parse(LanguageData data)
        {
            _orthography = ParseOrthography(data.OrthographyXml);

            var morphotactics = ParseMorphotactics(data.MorphotacticsXml);

            var roots = ParseRoots(data.RootTxt);

            var suffixes = ParseSuffixes(data.SuffixTxt);

            return(new Language(data.Type, _orthography, morphotactics, roots, suffixes));
        }
Ejemplo n.º 5
0
 internal Language(LanguageType type,
                   Orthography orthography,
                   Morphotactics morphotactics,
                   MorphemeContainer <Root> roots,
                   MorphemeContainer <Suffix> suffixes)
 {
     Type          = type;
     Orthography   = orthography;
     Morphotactics = morphotactics;
     Roots         = roots;
     Suffixes      = suffixes;
     Analyzer      = new WordAnalyzer(this);
 }
Ejemplo n.º 6
0
        private void Add(RootEntry entry)
        {
            var rules = Orthography.GetRules(entry.Rules);

            var root = new Root(entry.Pos, entry.Lex, entry.Surfaces, entry.Labels, rules);

            Roots.ById.Add(entry.Id, root);

            foreach (string surface in entry.Surfaces)
            {
                Roots.BySurface.Add(surface, root);
            }
        }
Ejemplo n.º 7
0
 public SuffixLexiconReader(Orthography orthography)
 {
     _orthography = orthography;
 }
Ejemplo n.º 8
0
 public RootLexiconReader(Orthography orthography)
 {
     _orthography = orthography;
 }
Ejemplo n.º 9
0
        private void ParseText_DoWork(object sender, DoWorkEventArgs e)
        {
            Lexicon.Clear();
            var expElementPattern = new Regex($"({PunctuationPattern.ToString()})|({WordPattern.ToString()})");
            var whiteSpacePattern = new Regex(@"[\s\n\r]+", RegexOptions.Singleline | RegexOptions.Multiline);

            var    worker   = sender as BackgroundWorker;
            var    text     = e.Argument as string;
            int    progress = 0;
            string state    = string.Empty;

            // TODO: handling of paragraph breaks and section headers, etc
            foreach (Match p in ParagraphPattern.Matches(text))
            {
                string paragraphText = p.Value.Trim();
                paragraphText = whiteSpacePattern.Replace(paragraphText, " ");
                var paragraph = new Lx.Discourse();
                Text.Discourse.AddLast(paragraph);

                foreach (Match l in LinePattern.Matches(paragraphText))
                {
                    //store line, section up into words and punctuation
                    string cleanedLine = l.Value.Trim();
                    //cleanedLine = whiteSpacePattern.Replace(cleanedLine, " ");
                    state = cleanedLine;

                    var expression = new Lx.Expression(cleanedLine);
                    paragraph.Expressions.AddLast(expression);

                    foreach (Match m in expElementPattern.Matches(expression.Graph))
                    {
                        if (m.Groups.Count > 0)
                        {
                            // string m => List<Glyphs>
                            var glyphs = Script.AddGlyphs(m.Value.ToCharArray());

                            // List<Glyph> => List<Grapheme>
                            // Pre-analysis, graphemes are 1:1 with glyphs
                            var graphemes = Orthography.AddGraphemes(glyphs);

                            // List<Grapheme> => Morpheme
                            if (string.IsNullOrEmpty(m.Groups[1].Value))
                            {
                                //var morph = Text.Lexicon.Add(m.Groups[2].Value);
                                //morph.GraphemeChain.Add(Lx.SegmentChain<Lx.Grapheme>.NewSegmentChain(graphemes));
                                Lx.Morpheme morph = Text.Lexicon.Add(graphemes);
                                expression.Sequence.AddLast(morph);
                            }
                            else
                            {
                                expression.Sequence.AddLast(Text.Paralexicon.Add(m.Groups[1].Value));
                            }
                        }
                    }

                    worker.ReportProgress(++progress, state);
                }
            }

            UpdateLocalLexicon();
        }