Replacement reader class for byte reading.
		public void BasicConversion()
		{
			var mappings = GetMappings();
			mappings.Add(new InterlinearMapping() { Marker = "id", Destination = InterlinDestination.Abbreviation, WritingSystem = "en" });
			var wsf = GetWsf();
			var input = new ByteReader("input1", Encoding.UTF8.GetBytes(input1));
			var converter = new Sfm2FlexText();
			var output = converter.Convert(input, mappings, wsf);
			using (var outputStream = new MemoryStream(output))
			{
				using (var reader = new StreamReader(outputStream))
				{
					var outputElt = XElement.Load(reader);
					Assert.That(outputElt.Name.LocalName, Is.EqualTo("document"));
					var textElt = outputElt.Element("interlinear-text");
					Assert.That(textElt, Is.Not.Null);
					VerifyItem(textElt, "./item[@type='title']", "fr", "Abu Nawas");
					VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "Abu");
					VerifyItem(textElt, "./item[@type='source']", "en", "Guna Bte Rintal");
					VerifyItem(textElt, "./item[@type='comment']", "en", "a funny story (folk tale?) about the relationship between Abu Nawas and a king");
					var paragraphs = textElt.Element("paragraphs");
					Assert.IsNotNull(paragraphs);
					var para = paragraphs.Element("paragraph");
					Assert.IsNotNull(para);
					var phrases = para.Element("phrases");
					Assert.IsNotNull(phrases);

					var phrase1 = phrases.Element("phrase");
					Assert.IsNotNull(phrase1);
					VerifyItem(phrase1, "./item[@type='reference-label']", "en", "Abu Nawas 001");
					VerifyText(phrase1, new[] {"$$", "Uun", "kono'", "serita'", ",", "dau-dau", "(", "tu",")", "kisa", "Abu", "Nawas", "."},
						new HashSet<string>(new[] {".", ",", "$$", "(", ")"}), "qaa-x-kal");
					VerifyItem(phrase1, "./item[@type='gls']", "en", "There was, it is said, a story about Abu Nawas. JT added some more.");
					VerifyItem(phrase1, "./item[@type='lit']", "en", "There existed, it is said, long ago, this story about Abu Nawas");
					VerifyItem(phrase1, "./item[@type='note']", "en", "sentence adjunct: reported speech");
					VerifyItem(phrase1, "./item[@type='note'][2]", "en", "Example hacked by JohnT to exemplify more cases");

					var phrase2 = phrases.Elements("phrase").Skip(1).First();
					VerifyItem(phrase2, "./item[@type='reference-label']", "en", "Abu Nawas 002");
					VerifyText(phrase2, new[] { "Abu", "Nawas", "kerjo", "ta'", "rojo" },
						new HashSet<string>(), "qaa-x-kal");
					VerifyItem(phrase2, "./item[@type='gls']", "en", "Abu Nawas worked for the king.");

					var phrase3 = paragraphs.XPathSelectElement("./paragraph[2]/phrases/phrase");
					VerifyItem(phrase3, "./item[@type='reference-label']", "en", "Abu Nawas 003");
					VerifyText(phrase3, new[] { "John", "added", "this" },
						new HashSet<string>(), "qaa-x-kal");
					VerifyItem(phrase3, "./item[@type='gls']", "en", "and this");

					var text2 = outputElt.Elements("interlinear-text").Skip(1).First();
					VerifyItem(text2, "./item[@type='title-abbreviation']", "en", "Jt");

					var phrase4 = text2.XPathSelectElement("./paragraphs/paragraph/phrases/phrase");
					VerifyItem(phrase4, "./item[@type='reference-label']", "en", "Jt 001");
					VerifyText(phrase4, new[] { "A", "second", "text", "in", "two", "parts" },
						new HashSet<string>(), "qaa-x-kal");
					VerifyItem(phrase4, "./item[@type='gls']", "en", "its free translation");
					Assert.That(phrase4.XPathSelectElements("./item[@type='gls']").Count(), Is.EqualTo(1));

					var phrase5 = text2.XPathSelectElement("./paragraphs/paragraph[2]/phrases/phrase");
					VerifyText(phrase5, new[] { "second", "para" },
						new HashSet<string>(), "qaa-x-kal");

					// If we unexpectedly get a second text line AFTER some other known field without a ref line, start a new phrase anyway.
					var phrase6 = text2.XPathSelectElement("./paragraphs/paragraph[2]/phrases/phrase[2]");
					VerifyText(phrase6, new[] { "second", "para", "second", "sentence"},
						new HashSet<string>(), "qaa-x-kal");
				}
			}
		}
		public void MultipleTexts()
		{
			var mappings = GetMappings();
			mappings.Add(new InterlinearMapping() { Marker = "id", Destination = InterlinDestination.Id, WritingSystem = "en" });
			mappings.Add(new InterlinearMapping() { Marker = "ab", Destination = InterlinDestination.Abbreviation, WritingSystem = "en" });
			var wsf = GetWsf();
			var input = new ByteReader("input3", Encoding.GetEncoding(1252).GetBytes(input3));
			var converter = new Sfm2FlexText();
			var output = converter.Convert(input, mappings, wsf);
			using (var outputStream = new MemoryStream(output))
			{
				using (var reader = new StreamReader(outputStream))
				{
					var outputElt = XElement.Load(reader);
					var textElt = outputElt.Element("interlinear-text");
					VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "Abu");
					var phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase");
					VerifyText(phrase1, new[] { "John", "added", "this" },
						new HashSet<string>(), "qaa-x-kal");

					//\id
					//\ab MyT
					//\name MyText
					//\com Some coments
					//\com more comments
					//\p
					//\ref MyText 001
					//\t Some text
					textElt = outputElt.Elements("interlinear-text").Skip(1).First();
					VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "MyT");
					VerifyItem(textElt, "./item[@type='title']", "fr", "MyText");
					VerifyItem(textElt, "./item[@type='comment']", "en", "Some coments more comments");
					phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase");
					VerifyText(phrase1, new[] { "Some", "text" }, new HashSet<string>(), "qaa-x-kal");

					// Verifies that:
					//	- \name can occur twice and be concatenated
					//	- \name can force the start of a new text
					//	- a subsequent \name not following some content is ignored
					//\name Another
					//\name Text
					//\com More comments
					//\name this is ignored
					//\p
					//\ref AT 001
					//\t third text
					textElt = outputElt.Elements("interlinear-text").Skip(2).First();
					VerifyItem(textElt, "./item[@type='title']", "fr", "Another Text");
					VerifyItem(textElt, "./item[@type='comment']", "en", "More comments");
					phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase");
					VerifyText(phrase1, new[] { "third", "text" }, new HashSet<string>(), "qaa-x-kal");

					// Verifies that:
					//	- \id can force the start of a new text
					//  - \ab does not start yet another (when no intervening content)
					//\id
					//\ab Yet
					//\name Yet another
					//\p
					//\ref Yet 001
					//\t fourth text			textElt = outputElt.Elements("interlinear-text").Skip(2).First();
					textElt = outputElt.Elements("interlinear-text").Skip(3).First();
					VerifyItem(textElt, "./item[@type='title']", "fr", "Yet another");
					VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "Yet");
					phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase");
					VerifyText(phrase1, new[] { "fourth", "text" }, new HashSet<string>(), "qaa-x-kal");
				}
			}
		}
		public void EncodingConverters()
		{
			var encConv = new EncConverters();
			encConv.AddConversionMap("XXYTestConverter", "1252",
						ECInterfaces.ConvType.Legacy_to_from_Unicode, "cp", "", "",
						ECInterfaces.ProcessTypeFlags.CodePageConversion);
			var mappings = new List<InterlinearMapping>();
			mappings.Add(new InterlinearMapping()
							{
								Marker = "id",
								Destination = InterlinDestination.Abbreviation	,
								WritingSystem = "en",
								Converter = "XXYTestConverter"
							});
			mappings.Add(new InterlinearMapping()
			{
				Marker = "t",
				Destination = InterlinDestination.Baseline,
				WritingSystem = "qaa-x-kal",
				Converter = "XXYTestConverter"
			});
			var wsf = GetWsf();
			var input = new ByteReader("input2", Encoding.GetEncoding(1252).GetBytes(input2));
			var converter = new Sfm2FlexText();
			var output = converter.Convert(input, mappings, wsf);
			using (var outputStream = new MemoryStream(output))
			{
				using (var reader = new StreamReader(outputStream))
				{
					var outputElt = XElement.Load(reader);
					var textElt = outputElt.Element("interlinear-text");
					VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "Abu\x2026");
					var phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase");
					VerifyText(phrase1, new[] { "John", "added", "this\x017D" },
						new HashSet<string>(), "qaa-x-kal");
					encConv.Remove("XXYTestConverter");
				}
			}
		}
		/// <summary>
		/// Do the conversion. The signature of this method is required for use with ProgressDialogWithTask.RunTask,
		/// but the parameters and return result are not actually used.
		/// </summary>
		private object DoConversion(IThreadedProgress dlg, object[] parameters)
		{
			m_firstNewText = null;
			foreach (var path1 in InputFiles)
			{
				var path = path1.Trim();
				if (!File.Exists(path))
					continue; // report?
				var input = new ByteReader(path);
				var converterStage1 = GetSfmConverter();
				var stage1 = converterStage1.Convert(input, m_mappings, m_cache.WritingSystemFactory);
				// Skip actual import if SHIFT was held down.
				if (secretShiftText.Visible == true)
					continue;
				DoStage2Conversion(stage1, dlg);
			}
			return null;
		}
Esempio n. 5
0
		public void BasicConversion()
		{
			const string input1 =
@"\lx glossedonce
\ge onlygloss
\del 0
\wc Pyle:PYLEE-1007
\cdt 2012-06-04T08:06:14Z

\lx glossedtwice
\ge firstgloss
\del 0
\wc Pyle:PYLEE-1007
\cdt 2012-06-05T08:06:14Z
\ge secondgloss
\del 1
\wc Pyle:PYLEE-1007
\cdt 2012-06-05T08:06:29Z
\ddt 2012-06-07T08:49:08Z

\lx support a phrase
\ge phrase gloss
\del 0
\wc Pyle:PYLEE-1007
\cdt 2012-06-05T08:23:54Z
";
			var mappings = new List<InterlinearMapping>();
			mappings.Add(new InterlinearMapping { Marker = "lx", Destination = InterlinDestination.Wordform, WritingSystem = "qaa-x-kal" });
			mappings.Add(new InterlinearMapping { Marker = "ge", Destination = InterlinDestination.WordGloss, WritingSystem = "en" });
			var wsf = GetWsf();
			var input = new ByteReader("input1", Encoding.UTF8.GetBytes(input1));
			var converter = new Sfm2FlexTextWordsFrag();
			var output = converter.Convert(input, mappings, wsf);
			using (var outputStream = new MemoryStream(output))
			{
				using (var reader = new StreamReader(outputStream))
				{
					var outputElt = XElement.Load(reader);
					Assert.That(outputElt.Name.LocalName, Is.EqualTo("document"));
					var words = outputElt.Elements("word").ToList();
					Assert.That(words, Has.Count.EqualTo(3));

					{
						var word1 = words[0];
						var txtItems = word1.XPathSelectElements("item[@type='txt']").ToList();
						var glsItems = word1.XPathSelectElements("item[@type='gls']").ToList();
						Assert.That(txtItems, Has.Count.EqualTo(1));
						Assert.That(glsItems, Has.Count.EqualTo(1));
						VerifyItem(word1, "./item[@type='txt']", "qaa-x-kal", "glossedonce");
						VerifyItem(word1, "./item[@type='gls']", "en", "onlygloss");
					}

					{
						var word2 = words[1];
						var txtItems = word2.XPathSelectElements("item[@type='txt']").ToList();
						var glsItems = word2.XPathSelectElements("item[@type='gls']").ToList();
						Assert.That(txtItems, Has.Count.EqualTo(1));
						Assert.That(glsItems, Has.Count.EqualTo(2));
						VerifyItem(word2, "./item[@type='txt']", "qaa-x-kal", "glossedtwice");
						VerifyItem(word2, "./item[@type='gls'][1]", "en", "firstgloss");
						VerifyItem(word2, "./item[@type='gls'][2]", "en", "secondgloss");
					}

					{
						var word3 = words[2];
						var txtItems = word3.XPathSelectElements("item[@type='txt']").ToList();
						var glsItems = word3.XPathSelectElements("item[@type='gls']").ToList();
						Assert.That(txtItems, Has.Count.EqualTo(1));
						Assert.That(glsItems, Has.Count.EqualTo(1));
						VerifyItem(word3, "./item[@type='txt']", "qaa-x-kal", "support a phrase");
						VerifyItem(word3, "./item[@type='gls']", "en", "phrase gloss");
					}
				}
			}
		}
Esempio n. 6
0
		public void WordsWithoutGlosses()
		{
			const string input2 =
@"\lx wordone
\lx wordtwo
\lx wordthree
";
			var mappings = new List<InterlinearMapping>();
			mappings.Add(new InterlinearMapping { Marker = "lx", Destination = InterlinDestination.Wordform, WritingSystem = "qaa-x-kal" });
			mappings.Add(new InterlinearMapping { Marker = "ge", Destination = InterlinDestination.WordGloss, WritingSystem = "en" });
			var wsf = GetWsf();
			var input = new ByteReader("input2", Encoding.UTF8.GetBytes(input2));
			var converter = new Sfm2FlexTextWordsFrag();
			var output = converter.Convert(input, mappings, wsf);
			using (var outputStream = new MemoryStream(output))
			{
				using (var reader = new StreamReader(outputStream))
				{
					var outputElt = XElement.Load(reader);
					Assert.That(outputElt.Name.LocalName, Is.EqualTo("document"));
					var words = outputElt.Elements("word").ToList();
					Assert.That(words, Has.Count.EqualTo(3));
					{
						var word1 = words[0];
						var txtItems = word1.XPathSelectElements("item[@type='txt']").ToList();
						Assert.That(txtItems, Has.Count.EqualTo(1));
						VerifyItem(word1, "./item[@type='txt']", "qaa-x-kal", "wordone");
					}
					{
						var word2 = words[1];
						var txtItems = word2.XPathSelectElements("item[@type='txt']").ToList();
						Assert.That(txtItems, Has.Count.EqualTo(1));
						VerifyItem(word2, "./item[@type='txt']", "qaa-x-kal", "wordtwo");
					}
					{
						var word3 = words[2];
						var txtItems = word3.XPathSelectElements("item[@type='txt']").ToList();
						Assert.That(txtItems, Has.Count.EqualTo(1));
						VerifyItem(word3, "./item[@type='txt']", "qaa-x-kal", "wordthree");
					}

				}
			}
		}