Ejemplo n.º 1
0
        private void ParseText_DoWork(object sender, DoWorkEventArgs e)
        {
            Lexicon.Clear();
            var expElementPattern = new Regex($"({PunctuationPattern.ToString()})|({WordPattern.ToString()})");
            var whiteSpacePattern = new Regex(@"[\s\n\r]+", RegexOptions.Singleline | RegexOptions.Multiline);

            var    worker   = sender as BackgroundWorker;
            var    text     = e.Argument as string;
            int    progress = 0;
            string state    = string.Empty;

            // TODO: handling of paragraph breaks and section headers, etc
            foreach (Match p in ParagraphPattern.Matches(text))
            {
                string paragraphText = p.Value.Trim();
                paragraphText = whiteSpacePattern.Replace(paragraphText, " ");
                var paragraph = new Lx.Discourse();
                Text.Discourse.AddLast(paragraph);

                foreach (Match l in LinePattern.Matches(paragraphText))
                {
                    //store line, section up into words and punctuation
                    string cleanedLine = l.Value.Trim();
                    //cleanedLine = whiteSpacePattern.Replace(cleanedLine, " ");
                    state = cleanedLine;

                    var expression = new Lx.Expression(cleanedLine);
                    paragraph.Expressions.AddLast(expression);

                    foreach (Match m in expElementPattern.Matches(expression.Graph))
                    {
                        if (m.Groups.Count > 0)
                        {
                            // string m => List<Glyphs>
                            var glyphs = Script.AddGlyphs(m.Value.ToCharArray());

                            // List<Glyph> => List<Grapheme>
                            // Pre-analysis, graphemes are 1:1 with glyphs
                            var graphemes = Orthography.AddGraphemes(glyphs);

                            // List<Grapheme> => Morpheme
                            if (string.IsNullOrEmpty(m.Groups[1].Value))
                            {
                                //var morph = Text.Lexicon.Add(m.Groups[2].Value);
                                //morph.GraphemeChain.Add(Lx.SegmentChain<Lx.Grapheme>.NewSegmentChain(graphemes));
                                Lx.Morpheme morph = Text.Lexicon.Add(graphemes);
                                expression.Sequence.AddLast(morph);
                            }
                            else
                            {
                                expression.Sequence.AddLast(Text.Paralexicon.Add(m.Groups[1].Value));
                            }
                        }
                    }

                    worker.ReportProgress(++progress, state);
                }
            }

            UpdateLocalLexicon();
        }
Ejemplo n.º 2
0
		public void Roundtrip_LdmlDelimiters()
		{
			using (var environment = new TestEnvironment())
			{
				var mp = new MatchedPair("mpOpen1", "mpClose2", false);
				var pp = new PunctuationPattern("pattern1", PunctuationPatternContext.Medial);
				// Quotation Marks:
				// Level 1 normal quotation marks (quotationStart and quotationEnd)
				// Level 2 normal quotation marks (alternateQuotationStart and alternateQuotationEnd)
				// Level 3 normal quotation marks (special: sil:quotation-marks)
				// Level 1 narrative quotation marks (special: sil:quotation-marks)
				var qm1 = new QuotationMark("\"", "\"", "\"", 1, QuotationMarkingSystemType.Normal);
				var qm2 = new QuotationMark("{", "}", "{", 2, QuotationMarkingSystemType.Normal);
				var qm3 = new QuotationMark("open1", "close2", "cont3", 3, QuotationMarkingSystemType.Normal);
				var qm4 = new QuotationMark("", null, null, 1, QuotationMarkingSystemType.Narrative);

				var wsToLdml = new WritingSystemDefinition("en", "Latn", "", "");
				wsToLdml.MatchedPairs.Add(mp);
				wsToLdml.PunctuationPatterns.Add(pp);
				wsToLdml.QuotationMarks.Add(qm1);
				wsToLdml.QuotationMarks.Add(qm2);
				wsToLdml.QuotationMarks.Add(qm3);
				wsToLdml.QuotationMarks.Add(qm4);
				wsToLdml.QuotationParagraphContinueType = QuotationParagraphContinueType.Outermost;
	
				var ldmlAdaptor = new LdmlDataMapper(new TestWritingSystemFactory());
				ldmlAdaptor.Write(environment.FilePath("test.ldml"), wsToLdml, null);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:matched-pairs/sil:matched-pair[@open='mpOpen1' and @close='mpClose2' and @paraClose='false']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:punctuation-patterns/sil:punctuation-pattern[@pattern='pattern1' and @context='medial']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/quotationStart[text()='\"']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/quotationEnd[text()='\"']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/alternateQuotationStart[text()='{']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/alternateQuotationEnd[text()='}']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:quotation-marks/sil:quotationContinue[text()='\"']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:quotation-marks/sil:alternateQuotationContinue[text()='{']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:quotation-marks/sil:quotation[@open='open1' and @close='close2' and @continue='cont3' and @level='3']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:quotation-marks/sil:quotation[@open and string-length(@open)=0 and @level='1' and @type='narrative']", 1, environment.NamespaceManager);

				var wsFromLdml = new WritingSystemDefinition();
				ldmlAdaptor.Read(environment.FilePath("test.ldml"), wsFromLdml);
				Assert.That(wsFromLdml.MatchedPairs.FirstOrDefault(), Is.EqualTo(mp));
				Assert.That(wsFromLdml.PunctuationPatterns.FirstOrDefault(), Is.EqualTo(pp));
				Assert.That(wsFromLdml.QuotationParagraphContinueType, Is.EqualTo(QuotationParagraphContinueType.Outermost));
				Assert.That(wsFromLdml.QuotationMarks[0], Is.EqualTo(qm1));
				Assert.That(wsFromLdml.QuotationMarks[1], Is.EqualTo(qm2));
				Assert.That(wsFromLdml.QuotationMarks[2], Is.EqualTo(qm3));
				Assert.That(wsFromLdml.QuotationMarks[3], Is.EqualTo(qm4));
			
				// Test rewriting the loaded file while using the original version as a base to make sure 
				// no duplicate elements are created
				ldmlAdaptor.Write(environment.FilePath("test.ldml"), wsFromLdml, new MemoryStream(File.ReadAllBytes(environment.FilePath("test.ldml"))));
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:matched-pairs/sil:matched-pair[@open='mpOpen1' and @close='mpClose2' and @paraClose='false']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:punctuation-patterns/sil:punctuation-pattern[@pattern='pattern1' and @context='medial']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/quotationStart[text()='\"']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/quotationEnd[text()='\"']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/alternateQuotationStart[text()='{']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/alternateQuotationEnd[text()='}']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:quotation-marks/sil:quotationContinue[text()='\"']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:quotation-marks/sil:alternateQuotationContinue[text()='{']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:quotation-marks/sil:quotation[@open='open1' and @close='close2' and @continue='cont3' and @level='3']", 1, environment.NamespaceManager);
				AssertThatXmlIn.File(environment.FilePath("test.ldml"))
					.HasSpecifiedNumberOfMatchesForXpath("/ldml/delimiters/special/sil:quotation-marks/sil:quotation[@open and string-length(@open)=0 and @level='1' and @type='narrative']", 1, environment.NamespaceManager);
			}
		}
Ejemplo n.º 3
0
		private void ReadDelimitersElement(XElement delimitersElem, WritingSystemDefinition ws)
		{
			string open, close;
			string level1Continue = null;
			string level2Continue = null;

			// A bit strange, but we need to read the special element first to get everything we need to write 
			// level 1 and 2. So we just store everything but 1 and 2 in a list and add them after we add 1 and 2.
			var specialQuotationMarks = new List<QuotationMark>();

			XElement specialElem = delimitersElem.NonAltElement("special");
			if (specialElem != null)
			{
				XElement matchedPairsElem = specialElem.Element(Sil + "matched-pairs");
				if (matchedPairsElem != null)
				{
					foreach (XElement matchedPairElem in matchedPairsElem.NonAltElements(Sil + "matched-pair"))
					{
						open = (string) matchedPairElem.Attribute("open");
						close = (string) matchedPairElem.Attribute("close");
						bool paraClose = (bool?) matchedPairElem.Attribute("paraClose") ?? false;
						var mp = new MatchedPair(open, close, paraClose);
						ws.MatchedPairs.Add(mp);
					}
				}

				XElement punctuationPatternsElem = specialElem.Element(Sil + "punctuation-patterns");
				if (punctuationPatternsElem != null)
				{
					foreach (XElement punctuationPatternElem in punctuationPatternsElem.NonAltElements(Sil + "punctuation-pattern"))
					{
						var pattern = (string) punctuationPatternElem.Attribute("pattern");
						PunctuationPatternContext ppc = ContextToPunctuationPatternContext[(string) punctuationPatternElem.Attribute("context")];
						var pp = new PunctuationPattern(pattern, ppc);
						ws.PunctuationPatterns.Add(pp);
					}
				}

				XElement quotationsElem = specialElem.Element(Sil + "quotation-marks");
				if (quotationsElem != null)
				{
					string paraContinueType = (string)quotationsElem.Attribute("paraContinueType") ?? string.Empty;
					ws.QuotationParagraphContinueType = QuotationToQuotationParagraphContinueTypes[paraContinueType];

					level1Continue = (string)quotationsElem.Element(Sil + "quotationContinue");
					level2Continue = (string)quotationsElem.Element(Sil + "alternateQuotationContinue");

					foreach (XElement quotationElem in quotationsElem.NonAltElements(Sil + "quotation"))
					{
						open = (string) quotationElem.Attribute("open");
						close = (string) quotationElem.Attribute("close");
						var cont = (string) quotationElem.Attribute("continue");
						int level = (int?) quotationElem.Attribute("level") ?? 1;
						var type = (string) quotationElem.Attribute("type");
						QuotationMarkingSystemType qmType = !string.IsNullOrEmpty(type) ? QuotationToQuotationMarkingSystemTypes[type] : QuotationMarkingSystemType.Normal;
						
						var qm = new QuotationMark(open, close, cont, level, qmType);
						specialQuotationMarks.Add(qm);
					}
				}
			}

			// level 1: quotationStart, quotationEnd
			open = (string)delimitersElem.NonAltElement("quotationStart");
			close = (string)delimitersElem.NonAltElement("quotationEnd");
			if (!string.IsNullOrEmpty(open) || !string.IsNullOrEmpty(close) || !string.IsNullOrEmpty(level1Continue))
			{
				var qm = new QuotationMark(open, close, level1Continue, 1, QuotationMarkingSystemType.Normal);
				ws.QuotationMarks.Add(qm);
			}

			// level 2: alternateQuotationStart, alternateQuotationEnd
			open = (string)delimitersElem.NonAltElement("alternateQuotationStart");
			close = (string)delimitersElem.NonAltElement("alternateQuotationEnd");
			if (!string.IsNullOrEmpty(open) || !string.IsNullOrEmpty(close) || !string.IsNullOrEmpty(level2Continue))
			{
				var qm = new QuotationMark(open, close, level2Continue, 2, QuotationMarkingSystemType.Normal);
				ws.QuotationMarks.Add(qm);
			}

			ws.QuotationMarks.AddRange(specialQuotationMarks);
		}