public void BasicConversion() { var mappings = GetMappings(); mappings.Add(new InterlinearMapping() { Marker = "id", Destination = InterlinDestination.Abbreviation, WritingSystem = "en" }); var wsf = GetWsf(); var input = new ByteReader("input1", Encoding.UTF8.GetBytes(input1)); var converter = new Sfm2FlexText(); var output = converter.Convert(input, mappings, wsf); using (var outputStream = new MemoryStream(output)) { using (var reader = new StreamReader(outputStream)) { var outputElt = XElement.Load(reader); Assert.That(outputElt.Name.LocalName, Is.EqualTo("document")); var textElt = outputElt.Element("interlinear-text"); Assert.That(textElt, Is.Not.Null); VerifyItem(textElt, "./item[@type='title']", "fr", "Abu Nawas"); VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "Abu"); VerifyItem(textElt, "./item[@type='source']", "en", "Guna Bte Rintal"); VerifyItem(textElt, "./item[@type='comment']", "en", "a funny story (folk tale?) about the relationship between Abu Nawas and a king"); var paragraphs = textElt.Element("paragraphs"); Assert.IsNotNull(paragraphs); var para = paragraphs.Element("paragraph"); Assert.IsNotNull(para); var phrases = para.Element("phrases"); Assert.IsNotNull(phrases); var phrase1 = phrases.Element("phrase"); Assert.IsNotNull(phrase1); VerifyItem(phrase1, "./item[@type='reference-label']", "en", "Abu Nawas 001"); VerifyText(phrase1, new[] {"$$", "Uun", "kono'", "serita'", ",", "dau-dau", "(", "tu",")", "kisa", "Abu", "Nawas", "."}, new HashSet<string>(new[] {".", ",", "$$", "(", ")"}), "qaa-x-kal"); VerifyItem(phrase1, "./item[@type='gls']", "en", "There was, it is said, a story about Abu Nawas. JT added some more."); VerifyItem(phrase1, "./item[@type='lit']", "en", "There existed, it is said, long ago, this story about Abu Nawas"); VerifyItem(phrase1, "./item[@type='note']", "en", "sentence adjunct: reported speech"); VerifyItem(phrase1, "./item[@type='note'][2]", "en", "Example hacked by JohnT to exemplify more cases"); var phrase2 = phrases.Elements("phrase").Skip(1).First(); VerifyItem(phrase2, "./item[@type='reference-label']", "en", "Abu Nawas 002"); VerifyText(phrase2, new[] { "Abu", "Nawas", "kerjo", "ta'", "rojo" }, new HashSet<string>(), "qaa-x-kal"); VerifyItem(phrase2, "./item[@type='gls']", "en", "Abu Nawas worked for the king."); var phrase3 = paragraphs.XPathSelectElement("./paragraph[2]/phrases/phrase"); VerifyItem(phrase3, "./item[@type='reference-label']", "en", "Abu Nawas 003"); VerifyText(phrase3, new[] { "John", "added", "this" }, new HashSet<string>(), "qaa-x-kal"); VerifyItem(phrase3, "./item[@type='gls']", "en", "and this"); var text2 = outputElt.Elements("interlinear-text").Skip(1).First(); VerifyItem(text2, "./item[@type='title-abbreviation']", "en", "Jt"); var phrase4 = text2.XPathSelectElement("./paragraphs/paragraph/phrases/phrase"); VerifyItem(phrase4, "./item[@type='reference-label']", "en", "Jt 001"); VerifyText(phrase4, new[] { "A", "second", "text", "in", "two", "parts" }, new HashSet<string>(), "qaa-x-kal"); VerifyItem(phrase4, "./item[@type='gls']", "en", "its free translation"); Assert.That(phrase4.XPathSelectElements("./item[@type='gls']").Count(), Is.EqualTo(1)); var phrase5 = text2.XPathSelectElement("./paragraphs/paragraph[2]/phrases/phrase"); VerifyText(phrase5, new[] { "second", "para" }, new HashSet<string>(), "qaa-x-kal"); // If we unexpectedly get a second text line AFTER some other known field without a ref line, start a new phrase anyway. var phrase6 = text2.XPathSelectElement("./paragraphs/paragraph[2]/phrases/phrase[2]"); VerifyText(phrase6, new[] { "second", "para", "second", "sentence"}, new HashSet<string>(), "qaa-x-kal"); } } }
public void MultipleTexts() { var mappings = GetMappings(); mappings.Add(new InterlinearMapping() { Marker = "id", Destination = InterlinDestination.Id, WritingSystem = "en" }); mappings.Add(new InterlinearMapping() { Marker = "ab", Destination = InterlinDestination.Abbreviation, WritingSystem = "en" }); var wsf = GetWsf(); var input = new ByteReader("input3", Encoding.GetEncoding(1252).GetBytes(input3)); var converter = new Sfm2FlexText(); var output = converter.Convert(input, mappings, wsf); using (var outputStream = new MemoryStream(output)) { using (var reader = new StreamReader(outputStream)) { var outputElt = XElement.Load(reader); var textElt = outputElt.Element("interlinear-text"); VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "Abu"); var phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase"); VerifyText(phrase1, new[] { "John", "added", "this" }, new HashSet<string>(), "qaa-x-kal"); //\id //\ab MyT //\name MyText //\com Some coments //\com more comments //\p //\ref MyText 001 //\t Some text textElt = outputElt.Elements("interlinear-text").Skip(1).First(); VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "MyT"); VerifyItem(textElt, "./item[@type='title']", "fr", "MyText"); VerifyItem(textElt, "./item[@type='comment']", "en", "Some coments more comments"); phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase"); VerifyText(phrase1, new[] { "Some", "text" }, new HashSet<string>(), "qaa-x-kal"); // Verifies that: // - \name can occur twice and be concatenated // - \name can force the start of a new text // - a subsequent \name not following some content is ignored //\name Another //\name Text //\com More comments //\name this is ignored //\p //\ref AT 001 //\t third text textElt = outputElt.Elements("interlinear-text").Skip(2).First(); VerifyItem(textElt, "./item[@type='title']", "fr", "Another Text"); VerifyItem(textElt, "./item[@type='comment']", "en", "More comments"); phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase"); VerifyText(phrase1, new[] { "third", "text" }, new HashSet<string>(), "qaa-x-kal"); // Verifies that: // - \id can force the start of a new text // - \ab does not start yet another (when no intervening content) //\id //\ab Yet //\name Yet another //\p //\ref Yet 001 //\t fourth text textElt = outputElt.Elements("interlinear-text").Skip(2).First(); textElt = outputElt.Elements("interlinear-text").Skip(3).First(); VerifyItem(textElt, "./item[@type='title']", "fr", "Yet another"); VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "Yet"); phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase"); VerifyText(phrase1, new[] { "fourth", "text" }, new HashSet<string>(), "qaa-x-kal"); } } }
public void EncodingConverters() { var encConv = new EncConverters(); encConv.AddConversionMap("XXYTestConverter", "1252", ECInterfaces.ConvType.Legacy_to_from_Unicode, "cp", "", "", ECInterfaces.ProcessTypeFlags.CodePageConversion); var mappings = new List<InterlinearMapping>(); mappings.Add(new InterlinearMapping() { Marker = "id", Destination = InterlinDestination.Abbreviation , WritingSystem = "en", Converter = "XXYTestConverter" }); mappings.Add(new InterlinearMapping() { Marker = "t", Destination = InterlinDestination.Baseline, WritingSystem = "qaa-x-kal", Converter = "XXYTestConverter" }); var wsf = GetWsf(); var input = new ByteReader("input2", Encoding.GetEncoding(1252).GetBytes(input2)); var converter = new Sfm2FlexText(); var output = converter.Convert(input, mappings, wsf); using (var outputStream = new MemoryStream(output)) { using (var reader = new StreamReader(outputStream)) { var outputElt = XElement.Load(reader); var textElt = outputElt.Element("interlinear-text"); VerifyItem(textElt, "./item[@type='title-abbreviation']", "en", "Abu\x2026"); var phrase1 = textElt.XPathSelectElement("./paragraphs/paragraph/phrases/phrase"); VerifyText(phrase1, new[] { "John", "added", "this\x017D" }, new HashSet<string>(), "qaa-x-kal"); encConv.Remove("XXYTestConverter"); } } }
/// <summary> /// Do the conversion. The signature of this method is required for use with ProgressDialogWithTask.RunTask, /// but the parameters and return result are not actually used. /// </summary> private object DoConversion(IThreadedProgress dlg, object[] parameters) { m_firstNewText = null; foreach (var path1 in InputFiles) { var path = path1.Trim(); if (!File.Exists(path)) continue; // report? var input = new ByteReader(path); var converterStage1 = GetSfmConverter(); var stage1 = converterStage1.Convert(input, m_mappings, m_cache.WritingSystemFactory); // Skip actual import if SHIFT was held down. if (secretShiftText.Visible == true) continue; DoStage2Conversion(stage1, dlg); } return null; }
public void BasicConversion() { const string input1 = @"\lx glossedonce \ge onlygloss \del 0 \wc Pyle:PYLEE-1007 \cdt 2012-06-04T08:06:14Z \lx glossedtwice \ge firstgloss \del 0 \wc Pyle:PYLEE-1007 \cdt 2012-06-05T08:06:14Z \ge secondgloss \del 1 \wc Pyle:PYLEE-1007 \cdt 2012-06-05T08:06:29Z \ddt 2012-06-07T08:49:08Z \lx support a phrase \ge phrase gloss \del 0 \wc Pyle:PYLEE-1007 \cdt 2012-06-05T08:23:54Z "; var mappings = new List<InterlinearMapping>(); mappings.Add(new InterlinearMapping { Marker = "lx", Destination = InterlinDestination.Wordform, WritingSystem = "qaa-x-kal" }); mappings.Add(new InterlinearMapping { Marker = "ge", Destination = InterlinDestination.WordGloss, WritingSystem = "en" }); var wsf = GetWsf(); var input = new ByteReader("input1", Encoding.UTF8.GetBytes(input1)); var converter = new Sfm2FlexTextWordsFrag(); var output = converter.Convert(input, mappings, wsf); using (var outputStream = new MemoryStream(output)) { using (var reader = new StreamReader(outputStream)) { var outputElt = XElement.Load(reader); Assert.That(outputElt.Name.LocalName, Is.EqualTo("document")); var words = outputElt.Elements("word").ToList(); Assert.That(words, Has.Count.EqualTo(3)); { var word1 = words[0]; var txtItems = word1.XPathSelectElements("item[@type='txt']").ToList(); var glsItems = word1.XPathSelectElements("item[@type='gls']").ToList(); Assert.That(txtItems, Has.Count.EqualTo(1)); Assert.That(glsItems, Has.Count.EqualTo(1)); VerifyItem(word1, "./item[@type='txt']", "qaa-x-kal", "glossedonce"); VerifyItem(word1, "./item[@type='gls']", "en", "onlygloss"); } { var word2 = words[1]; var txtItems = word2.XPathSelectElements("item[@type='txt']").ToList(); var glsItems = word2.XPathSelectElements("item[@type='gls']").ToList(); Assert.That(txtItems, Has.Count.EqualTo(1)); Assert.That(glsItems, Has.Count.EqualTo(2)); VerifyItem(word2, "./item[@type='txt']", "qaa-x-kal", "glossedtwice"); VerifyItem(word2, "./item[@type='gls'][1]", "en", "firstgloss"); VerifyItem(word2, "./item[@type='gls'][2]", "en", "secondgloss"); } { var word3 = words[2]; var txtItems = word3.XPathSelectElements("item[@type='txt']").ToList(); var glsItems = word3.XPathSelectElements("item[@type='gls']").ToList(); Assert.That(txtItems, Has.Count.EqualTo(1)); Assert.That(glsItems, Has.Count.EqualTo(1)); VerifyItem(word3, "./item[@type='txt']", "qaa-x-kal", "support a phrase"); VerifyItem(word3, "./item[@type='gls']", "en", "phrase gloss"); } } } }
public void WordsWithoutGlosses() { const string input2 = @"\lx wordone \lx wordtwo \lx wordthree "; var mappings = new List<InterlinearMapping>(); mappings.Add(new InterlinearMapping { Marker = "lx", Destination = InterlinDestination.Wordform, WritingSystem = "qaa-x-kal" }); mappings.Add(new InterlinearMapping { Marker = "ge", Destination = InterlinDestination.WordGloss, WritingSystem = "en" }); var wsf = GetWsf(); var input = new ByteReader("input2", Encoding.UTF8.GetBytes(input2)); var converter = new Sfm2FlexTextWordsFrag(); var output = converter.Convert(input, mappings, wsf); using (var outputStream = new MemoryStream(output)) { using (var reader = new StreamReader(outputStream)) { var outputElt = XElement.Load(reader); Assert.That(outputElt.Name.LocalName, Is.EqualTo("document")); var words = outputElt.Elements("word").ToList(); Assert.That(words, Has.Count.EqualTo(3)); { var word1 = words[0]; var txtItems = word1.XPathSelectElements("item[@type='txt']").ToList(); Assert.That(txtItems, Has.Count.EqualTo(1)); VerifyItem(word1, "./item[@type='txt']", "qaa-x-kal", "wordone"); } { var word2 = words[1]; var txtItems = word2.XPathSelectElements("item[@type='txt']").ToList(); Assert.That(txtItems, Has.Count.EqualTo(1)); VerifyItem(word2, "./item[@type='txt']", "qaa-x-kal", "wordtwo"); } { var word3 = words[2]; var txtItems = word3.XPathSelectElements("item[@type='txt']").ToList(); Assert.That(txtItems, Has.Count.EqualTo(1)); VerifyItem(word3, "./item[@type='txt']", "qaa-x-kal", "wordthree"); } } } }