Пример #1
0
        private void ParseText_DoWork(object sender, DoWorkEventArgs e)
        {
            Lexicon.Clear();
            var expElementPattern = new Regex($"({PunctuationPattern.ToString()})|({WordPattern.ToString()})");
            var whiteSpacePattern = new Regex(@"[\s\n\r]+", RegexOptions.Singleline | RegexOptions.Multiline);

            var    worker   = sender as BackgroundWorker;
            var    text     = e.Argument as string;
            int    progress = 0;
            string state    = string.Empty;

            // TODO: handling of paragraph breaks and section headers, etc
            foreach (Match p in ParagraphPattern.Matches(text))
            {
                string paragraphText = p.Value.Trim();
                paragraphText = whiteSpacePattern.Replace(paragraphText, " ");
                var paragraph = new Lx.Discourse();
                Text.Discourse.AddLast(paragraph);

                foreach (Match l in LinePattern.Matches(paragraphText))
                {
                    //store line, section up into words and punctuation
                    string cleanedLine = l.Value.Trim();
                    //cleanedLine = whiteSpacePattern.Replace(cleanedLine, " ");
                    state = cleanedLine;

                    var expression = new Lx.Expression(cleanedLine);
                    paragraph.Expressions.AddLast(expression);

                    foreach (Match m in expElementPattern.Matches(expression.Graph))
                    {
                        if (m.Groups.Count > 0)
                        {
                            // string m => List<Glyphs>
                            var glyphs = Script.AddGlyphs(m.Value.ToCharArray());

                            // List<Glyph> => List<Grapheme>
                            // Pre-analysis, graphemes are 1:1 with glyphs
                            var graphemes = Orthography.AddGraphemes(glyphs);

                            // List<Grapheme> => Morpheme
                            if (string.IsNullOrEmpty(m.Groups[1].Value))
                            {
                                //var morph = Text.Lexicon.Add(m.Groups[2].Value);
                                //morph.GraphemeChain.Add(Lx.SegmentChain<Lx.Grapheme>.NewSegmentChain(graphemes));
                                Lx.Morpheme morph = Text.Lexicon.Add(graphemes);
                                expression.Sequence.AddLast(morph);
                            }
                            else
                            {
                                expression.Sequence.AddLast(Text.Paralexicon.Add(m.Groups[1].Value));
                            }
                        }
                    }

                    worker.ReportProgress(++progress, state);
                }
            }

            UpdateLocalLexicon();
        }