public Language Read() { var sw = new Stopwatch(); sw.Start(); _orthography = ReadOrthography(); _trace.TraceInformation($"{_dirPath} orthography loading time is {sw.ElapsedMilliseconds} ms"); sw.Restart(); var morphotactics = ReadMorphotactics(); _trace.TraceInformation($"{_dirPath} morphotactics loading time is {sw.ElapsedMilliseconds} ms"); sw.Restart(); var roots = ReadRoots(); _trace.TraceInformation($"{_dirPath} roots loading time is {sw.ElapsedMilliseconds} ms"); sw.Restart(); var suffixes = ReadSuffixes(); _trace.TraceInformation($"{_dirPath} suffixes loading time is {sw.ElapsedMilliseconds} ms"); sw.Restart(); return(new Language(_languageType, _orthography, morphotactics, roots, suffixes)); }
public Language Read() { var sw = new Stopwatch(); sw.Start(); _orthography = ReadOrthography(); Debug.Print($"orthograpy: {sw.ElapsedMilliseconds} ms"); sw.Restart(); Morphotactics morphotactics = ReadMorphotactics(); Debug.Print($"morphotactics: {sw.ElapsedMilliseconds} ms"); sw.Restart(); MorphemeSurfaceDictionary <Root> roots = ReadRoots(); Debug.Print($"roots: {sw.ElapsedMilliseconds} ms"); sw.Restart(); Suffixes suffixes = ReadSuffixes(); Debug.Print($"suffixes: {sw.ElapsedMilliseconds} ms"); sw.Restart(); int index = _dirPath.LastIndexOf("\\", StringComparison.Ordinal); string langCode = index > -1 ? _dirPath.Substring(index + 1) : _dirPath; return(new Language(langCode, morphotactics, roots, suffixes)); }
private MutableLanguage(LanguageType type, Orthography orthography, Morphotactics morphotactics, MorphemeContainer <Root> roots, MorphemeContainer <Suffix> suffixes) : base(type, orthography, morphotactics, roots, suffixes) { }
public Language Parse(LanguageData data) { _orthography = ParseOrthography(data.OrthographyXml); var morphotactics = ParseMorphotactics(data.MorphotacticsXml); var roots = ParseRoots(data.RootTxt); var suffixes = ParseSuffixes(data.SuffixTxt); return(new Language(data.Type, _orthography, morphotactics, roots, suffixes)); }
internal Language(LanguageType type, Orthography orthography, Morphotactics morphotactics, MorphemeContainer <Root> roots, MorphemeContainer <Suffix> suffixes) { Type = type; Orthography = orthography; Morphotactics = morphotactics; Roots = roots; Suffixes = suffixes; Analyzer = new WordAnalyzer(this); }
private void Add(RootEntry entry) { var rules = Orthography.GetRules(entry.Rules); var root = new Root(entry.Pos, entry.Lex, entry.Surfaces, entry.Labels, rules); Roots.ById.Add(entry.Id, root); foreach (string surface in entry.Surfaces) { Roots.BySurface.Add(surface, root); } }
public SuffixLexiconReader(Orthography orthography) { _orthography = orthography; }
public RootLexiconReader(Orthography orthography) { _orthography = orthography; }
private void ParseText_DoWork(object sender, DoWorkEventArgs e) { Lexicon.Clear(); var expElementPattern = new Regex($"({PunctuationPattern.ToString()})|({WordPattern.ToString()})"); var whiteSpacePattern = new Regex(@"[\s\n\r]+", RegexOptions.Singleline | RegexOptions.Multiline); var worker = sender as BackgroundWorker; var text = e.Argument as string; int progress = 0; string state = string.Empty; // TODO: handling of paragraph breaks and section headers, etc foreach (Match p in ParagraphPattern.Matches(text)) { string paragraphText = p.Value.Trim(); paragraphText = whiteSpacePattern.Replace(paragraphText, " "); var paragraph = new Lx.Discourse(); Text.Discourse.AddLast(paragraph); foreach (Match l in LinePattern.Matches(paragraphText)) { //store line, section up into words and punctuation string cleanedLine = l.Value.Trim(); //cleanedLine = whiteSpacePattern.Replace(cleanedLine, " "); state = cleanedLine; var expression = new Lx.Expression(cleanedLine); paragraph.Expressions.AddLast(expression); foreach (Match m in expElementPattern.Matches(expression.Graph)) { if (m.Groups.Count > 0) { // string m => List<Glyphs> var glyphs = Script.AddGlyphs(m.Value.ToCharArray()); // List<Glyph> => List<Grapheme> // Pre-analysis, graphemes are 1:1 with glyphs var graphemes = Orthography.AddGraphemes(glyphs); // List<Grapheme> => Morpheme if (string.IsNullOrEmpty(m.Groups[1].Value)) { //var morph = Text.Lexicon.Add(m.Groups[2].Value); //morph.GraphemeChain.Add(Lx.SegmentChain<Lx.Grapheme>.NewSegmentChain(graphemes)); Lx.Morpheme morph = Text.Lexicon.Add(graphemes); expression.Sequence.AddLast(morph); } else { expression.Sequence.AddLast(Text.Paralexicon.Add(m.Groups[1].Value)); } } } worker.ReportProgress(++progress, state); } } UpdateLocalLexicon(); }