public void ImportNewHumanApprovedByDefaultWordGloss() { var wsf = Cache.WritingSystemFactory; const string xml = "<document><interlinear-text>" + "<paragraphs><paragraph><phrases><phrase><words>" + "<word>" + "<item type='txt' lang='en'>supercalifragilisticexpialidocious</item>" + "<item type='gls' lang='pt'>absurdo</item>" + "</word>" + "</words></phrase></phrases></paragraph></paragraphs></interlinear-text></document>"; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); FDO.IText importedText = null; var options = CreateImportInterlinearOptions(xml); li.ImportInterlinear(options, ref importedText); using (var firstEntry = Cache.LanguageProject.Texts.GetEnumerator()) { firstEntry.MoveNext(); var imported = firstEntry.Current; Assert.IsNotNull(imported); var para = imported.ContentsOA.ParagraphsOS[0] as IStTxtPara; Assert.IsNotNull(para); Assert.That(para.Analyses.Count(), Is.EqualTo(1)); var wfiWord = para.Analyses.First().Wordform; int wsWordform = wsf.get_Engine("en").Handle; Assert.That(wfiWord.Form.get_String(wsf.get_Engine("en").Handle).Text, Is.EqualTo("supercalifragilisticexpialidocious")); Assert.That(wfiWord.AnalysesOC.Count, Is.GreaterThan(0)); var wfiAnalysis = wfiWord.AnalysesOC.First(); // make sure we also created a morpheme form AssertMorphemeFormMatchesWordform(wfiWord, wfiAnalysis, wsWordform); // make sure we created a human approved opinion AssertHumanApprovedOpinion(wfiWord, wfiAnalysis); var at = new AnalysisTree(para.Analyses.First()); Assert.IsNotNull(at.Gloss, "IAnalysis should be WfiGloss"); Assert.That(at.Gloss.Form.get_String(wsf.get_Engine("pt").Handle).Text, Is.EqualTo("absurdo")); Assert.That(Cache.ServiceLocator.GetInstance<IWfiGlossRepository>().Count, Is.EqualTo(1)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiMorphBundleRepository>().Count, Is.EqualTo(1)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiAnalysisRepository>().Count, Is.EqualTo(1)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiWordformRepository>().Count, Is.EqualTo(1)); } }
public void ImportWordsWithMultipleWss() { var wsEn = Cache.WritingSystemFactory.GetWsFromStr("en"); IWritingSystem wsWbl; Cache.ServiceLocator.WritingSystemManager.GetOrSet("wbl-Arab-AF", out wsWbl); wsWbl.RightToLeftScript = true; IWritingSystem wsWblIpa; Cache.ServiceLocator.WritingSystemManager.GetOrSet("wbl-Qaaa-AF-fonipa-x-Zipa", out wsWblIpa); const string xml = @"<document version='2'> <interlinear-text guid='5eecc8be-f41b-4433-be94-8950a8ce75e5'> <item type='title' lang='wbl-Arab-AF'>تست</item> <item type='title' lang='en'>Test</item> <item type='comment' lang='en'></item> <paragraphs> <paragraph guid='b21daced-5c85-4610-8023-8d7d4b3191f4'> <phrases> <phrase guid='0b0346e0-3bb8-40e7-a0a4-f7771d233e93'> <item type='segnum' lang='en'>1</item> <words> <word guid='0b548dff-6a8e-4c21-a977-fcc4ddc268be'> <item type='txt' lang='wbl-Arab-AF'>baseline</item> <item type='txt' lang='wbl-Qaaa-AF-fonipa-x-Zipa'>beslain</item> <item type='gls' lang='en'>gloss</item> </word> </words> </phrase> </phrases> </paragraph> </paragraphs> <languages> <language lang='wbl-Arab-AF' font='Times New Roman' vernacular='true' RightToLeft='true' /> <language lang='en' font='Times New Roman' /> <language lang='wbl-Qaaa-AF-fonipa-x-Zipa' font='Doulos SIL' vernacular='true' /> </languages> </interlinear-text> </document>"; using (var stream = new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray()))) { var options = new LinguaLinksImport.ImportInterlinearOptions { Progress = new DummyProgressDlg(), AnalysesLevel = LinguaLinksImport.ImportAnalysesLevel.Wordform, BirdData = stream, AllottedProgress = 0, CheckAndAddLanguages = DummyCheckAndAddLanguagesInternal }; FDO.IText importedText = null; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); var result = li.ImportInterlinear(options, ref importedText); Assert.True(result, "ImportInterlinear was not successful."); Assert.That(importedText.ContentsOA.ParagraphsOS.Count, Is.EqualTo(1)); var paraImported = importedText.ContentsOA[0]; var testPara = paraImported.Contents; Assert.That(testPara.Text, Is.EqualTo("baseline")); Assert.That(TsStringUtils.GetWsAtOffset(testPara, 0), Is.EqualTo(wsWbl.Handle)); Assert.That(testPara.RunCount, Is.EqualTo(1)); // main writing system should be the first one in the import var mainWs = importedText.ContentsOA.MainWritingSystem; Assert.That(mainWs, Is.EqualTo(wsWbl.Handle)); // Next verify that the IPA content got added to the imported word form var a0 = paraImported.SegmentsOS[0].AnalysesRS[0]; var wf0 = a0 as IWfiWordform; Assert.That(wf0, Is.Not.Null); Assert.That(wf0.Form.get_String(wsWbl.Handle).Text, Is.EqualTo("baseline")); Assert.That(wf0.Form.get_String(wsWblIpa.Handle).Text, Is.EqualTo("beslain")); } }
public void ImportWordsWithMultipleWss() { var wsEn = Cache.WritingSystemFactory.GetWsFromStr("en"); IWritingSystem wsWbl; Cache.ServiceLocator.WritingSystemManager.GetOrSet("wbl-Arab-AF", out wsWbl); wsWbl.RightToLeftScript = true; IWritingSystem wsWblIpa; Cache.ServiceLocator.WritingSystemManager.GetOrSet("wbl-Qaaa-AF-fonipa-x-Zipa", out wsWblIpa); const string xml = @"<document version='2'> <interlinear-text guid='5eecc8be-f41b-4433-be94-8950a8ce75e5'> <item type='title' lang='wbl-Arab-AF'>تست</item> <item type='title' lang='en'>Test</item> <item type='comment' lang='en'></item> <paragraphs> <paragraph guid='b21daced-5c85-4610-8023-8d7d4b3191f4'> <phrases> <phrase guid='0b0346e0-3bb8-40e7-a0a4-f7771d233e93'> <item type='segnum' lang='en'>1</item> <words> <word guid='0b548dff-6a8e-4c21-a977-fcc4ddc268be'> <item type='txt' lang='wbl-Arab-AF'>baseline</item> <item type='txt' lang='wbl-Qaaa-AF-fonipa-x-Zipa'>beslain</item> <item type='gls' lang='en'>gloss</item> </word> </words> </phrase> </phrases> </paragraph> </paragraphs> <languages> <language lang='wbl-Arab-AF' font='Times New Roman' vernacular='true' RightToLeft='true' /> <language lang='en' font='Times New Roman' /> <language lang='wbl-Qaaa-AF-fonipa-x-Zipa' font='Doulos SIL' vernacular='true' /> </languages> </interlinear-text> </document>" ; using (var stream = new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray()))) { var options = new LinguaLinksImport.ImportInterlinearOptions { Progress = new DummyProgressDlg(), AnalysesLevel = LinguaLinksImport.ImportAnalysesLevel.Wordform, BirdData = stream, AllottedProgress = 0, CheckAndAddLanguages = DummyCheckAndAddLanguagesInternal }; FDO.IText importedText = null; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); var result = li.ImportInterlinear(options, ref importedText); Assert.True(result, "ImportInterlinear was not successful."); Assert.That(importedText.ContentsOA.ParagraphsOS.Count, Is.EqualTo(1)); var paraImported = importedText.ContentsOA[0]; var testPara = paraImported.Contents; Assert.That(testPara.Text, Is.EqualTo("baseline")); Assert.That(TsStringUtils.GetWsAtOffset(testPara, 0), Is.EqualTo(wsWbl.Handle)); Assert.That(testPara.RunCount, Is.EqualTo(1)); // main writing system should be the first one in the import var mainWs = importedText.ContentsOA.MainWritingSystem; Assert.That(mainWs, Is.EqualTo(wsWbl.Handle)); // Next verify that the IPA content got added to the imported word form var a0 = paraImported.SegmentsOS[0].AnalysesRS[0]; var wf0 = a0 as IWfiWordform; Assert.That(wf0, Is.Not.Null); Assert.That(wf0.Form.get_String(wsWbl.Handle).Text, Is.EqualTo("baseline")); Assert.That(wf0.Form.get_String(wsWblIpa.Handle).Text, Is.EqualTo("beslain")); } }
public void WordsFragDoc_OneWordPhraseAndOneGloss_AvoidDuplicates() { var wsEn = Cache.WritingSystemFactory.GetWsFromStr("en"); IWritingSystem wsKal; Cache.ServiceLocator.WritingSystemManager.GetOrSet("qaa-x-kal", out wsKal); const string xml = @"<document> <word> <item type='txt' lang='qaa-x-kal'>support a phrase</item> <item type='gls' lang='en'>phrase gloss</item> </word> </document>"; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); // First Import Assert.DoesNotThrow(() => li.ImportWordsFrag( () => new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray())), LinguaLinksImport.ImportAnalysesLevel.WordGloss)); // Second Import Assert.DoesNotThrow(() => li.ImportWordsFrag( () => new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray())), LinguaLinksImport.ImportAnalysesLevel.WordGloss)); var wordsRepo = Cache.ServiceLocator.GetInstance<IWfiWordformRepository>(); var wff1 = wordsRepo.GetMatchingWordform(wsKal.Handle, "support a phrase"); Assert.That(wff1, Is.Not.Null); Assert.That(wff1.AnalysesOC, Has.Count.EqualTo(1)); Assert.That(wff1.AnalysesOC.ElementAt(0).MeaningsOC, Has.Count.EqualTo(1)); Assert.That(wff1.AnalysesOC.ElementAt(0).MeaningsOC.ElementAt(0).Form.get_String(wsEn).Text, Is.EqualTo("phrase gloss")); Assert.That(Cache.ServiceLocator.GetInstance<IWfiGlossRepository>().Count, Is.EqualTo(1)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiAnalysisRepository>().Count, Is.EqualTo(1)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiWordformRepository>().Count, Is.EqualTo(1)); }
public void WordsFragDoc_OneWordAndMultiGloss() { var wsEn = Cache.WritingSystemFactory.GetWsFromStr("en"); IWritingSystem wsKal; Cache.ServiceLocator.WritingSystemManager.GetOrSet("qaa-x-kal", out wsKal); const string xml = @"<document> <word> <item type='txt' lang='qaa-x-kal'>glossedtwice</item> <item type='gls' lang='en'>firstgloss</item> <item type='gls' lang='en'>secondgloss</item> </word> </document>"; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); Assert.DoesNotThrow(() => li.ImportWordsFrag( () => new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray())), LinguaLinksImport.ImportAnalysesLevel.WordGloss)); var wordsRepo = Cache.ServiceLocator.GetInstance<IWfiWordformRepository>(); var wff1 = wordsRepo.GetMatchingWordform(wsKal.Handle, "glossedtwice"); Assert.That(wff1, Is.Not.Null); Assert.That(wff1.AnalysesOC, Has.Count.EqualTo(2), "multiple word glosses (without specifying morphology) should create separate WfiAnalyses with separate glosses"); Assert.That(wff1.AnalysesOC.ElementAt(0).MeaningsOC, Has.Count.EqualTo(1)); Assert.That(wff1.AnalysesOC.ElementAt(0).MeaningsOC.ElementAt(0).Form.get_String(wsEn).Text, Is.EqualTo("firstgloss")); Assert.That(wff1.AnalysesOC.ElementAt(1).MeaningsOC, Has.Count.EqualTo(1)); Assert.That(wff1.AnalysesOC.ElementAt(1).MeaningsOC.ElementAt(0).Form.get_String(wsEn).Text, Is.EqualTo("secondgloss")); Assert.That(Cache.ServiceLocator.GetInstance<IWfiGlossRepository>().Count, Is.EqualTo(2)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiAnalysisRepository>().Count, Is.EqualTo(2)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiWordformRepository>().Count, Is.EqualTo(1)); }
public void DeserializeWordsFragDocument() { const string xml = @"<document> <word> <item type='txt' lang='qaa-x-kal'>glossedonce</item> <item type='gls' lang='en'>onlygloss</item> </word> <word> <item type='txt' lang='qaa-x-kal'>glossedtwice</item> <item type='gls' lang='en'>firstgloss</item> <item type='gls' lang='en'>secondgloss</item> </word> <word> <item type='txt' lang='qaa-x-kal'>support a phrase</item> <item type='gls' lang='en'>phrase gloss</item> </word> </document>"; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); Assert.DoesNotThrow(() => li.ImportWordsFrag( () => new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray())), LinguaLinksImport.ImportAnalysesLevel.WordGloss)); }
public void ImportUnknownPhraseWholeSegmentVersion_MakesPhrase() { // import an analysis with word gloss string xml = "<document version=\"2\"><interlinear-text guid='AAAAAAAA-AAAA-AAAA-AAAA-AAAAAAAAAAAA'>" + "<paragraphs><paragraph><phrases><phrase><words>" + "<word>" + "<item type='txt' lang='en'>this is not a phrase</item>" + "</word>" + "</words></phrase></phrases></paragraph></paragraphs></interlinear-text></document>"; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); var options = CreateImportInterlinearOptions(xml); FDO.IText importedText = null; li.ImportInterlinear(options, ref importedText); var stText = importedText.ContentsOA; var para = (IStTxtPara)stText.ParagraphsOS[0]; var seg = para.SegmentsOS[0]; Assert.That(seg.AnalysesRS.Count, Is.EqualTo(1)); }
public void ImportKnownPhraseWholeSegmentNoVersion_MakesPhrase() { // import an analysis with word gloss string xml = "<document><interlinear-text guid='AAAAAAAA-AAAA-AAAA-AAAA-AAAAAAAAAAAA'>" + "<paragraphs><paragraph><phrases><phrase><words>" + "<word>" + "<item type='txt' lang='en'>this is a phrase</item>" + "</word>" + "</words></phrase></phrases></paragraph></paragraphs></interlinear-text></document>"; UndoableUnitOfWorkHelper.Do("undo", "redo", m_actionHandler, () => { var wf = Cache.ServiceLocator.GetInstance<IWfiWordformFactory>().Create(); int wsEn = Cache.WritingSystemFactory.GetWsFromStr("en"); wf.Form.set_String(wsEn, Cache.TsStrFactory.MakeString("this is a phrase", wsEn)); }); var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); var options = CreateImportInterlinearOptions(xml); FDO.IText importedText = null; li.ImportInterlinear(options, ref importedText); var stText = importedText.ContentsOA; var para = (IStTxtPara)stText.ParagraphsOS[0]; var seg = para.SegmentsOS[0]; Assert.That(seg.AnalysesRS.Count, Is.EqualTo(1)); }
public void ImportUnknownPhraseWholeSegmentNoVersion_MakesSeparateWords() { // import an analysis with word gloss string xml = "<document><interlinear-text guid='AAAAAAAA-AAAA-AAAA-AAAA-AAAAAAAAAAAA'>" + "<paragraphs><paragraph><phrases><phrase><words>" + "<word>" + "<item type='txt' lang='en'>this is not a phrase</item>" + "</word>" + "</words></phrase></phrases></paragraph></paragraphs></interlinear-text></document>"; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); var options = CreateImportInterlinearOptions(xml); FDO.IText importedText = null; li.ImportInterlinear(options, ref importedText); var stText = importedText.ContentsOA; var para = (IStTxtPara)stText.ParagraphsOS[0]; var seg = para.SegmentsOS[0]; Assert.That(para.Contents.Text, Is.EqualTo("this is not a phrase")); // It's acceptable either that it hasn't been parsed at all (and will be when we look at it) and so // has no analyses, or that it's been parsed into five words. The other likely outcome is one phrase, // which is not acceptable for parsing Saymore output (LT-12621). Assert.That(seg.AnalysesRS.Count, Is.EqualTo(5).Or.EqualTo(0)); }
public void ImportNewUserConfirmedWordGlossSeparatedFromToExistingWfiAnalysis() { // build pre-existing data var sl = Cache.ServiceLocator; var wsf = Cache.WritingSystemFactory; FDO.IText text; IWfiWordform word = null; ITsString paraContents = null; var segGuid = new Guid(); NonUndoableUnitOfWorkHelper.Do(Cache.ActionHandlerAccessor, () => { text = sl.GetInstance<ITextFactory>().Create(Cache, new Guid("AAAAAAAA-AAAA-AAAA-AAAA-AAAAAAAAAAAA")); //Cache.LangProject.TextsOC.Add(text); var sttext = sl.GetInstance<IStTextFactory>().Create(); text.ContentsOA = sttext; IStTxtPara para = sl.GetInstance<IStTxtParaFactory>().Create(); sttext.ParagraphsOS.Add(para); para.Contents = Cache.TsStrFactory.MakeString("supercalifragilisticexpialidocious", wsf.get_Engine("en").Handle); paraContents = para.Contents; ISegment segment = sl.GetInstance<ISegmentFactory>().Create(); para.SegmentsOS.Add(segment); ITsString wform = TsStringUtils.MakeTss("supercalifragilisticexpialidocious", wsf.get_Engine("en").Handle); segGuid = segment.Guid; word = sl.GetInstance<IWfiWordformFactory>().Create(wform); var newWfiAnalysis = sl.GetInstance<IWfiAnalysisFactory>().Create(); word.AnalysesOC.Add(newWfiAnalysis); segment.AnalysesRS.Add(word); }); // import an analysis with word gloss string xml = "<document><interlinear-text guid='AAAAAAAA-AAAA-AAAA-AAAA-AAAAAAAAAAAA'>" + "<paragraphs><paragraph><phrases><phrase guid='" + segGuid + "'><words>" + "<word>" + "<item type='txt' lang='en'>supercalifragilisticexpialidocious</item>" + "<item type='gls' lang='pt'>absurdo</item>" + "</word>" + "</words></phrase></phrases></paragraph></paragraphs></interlinear-text></document>"; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); var options = CreateImportInterlinearOptions(xml); FDO.IText importedText = null; li.ImportInterlinear(options, ref importedText); using (var firstEntry = Cache.LanguageProject.Texts.GetEnumerator()) { firstEntry.MoveNext(); var imported = firstEntry.Current; Assert.IsNotNull(imported); var importedPara = imported.ContentsOA.ParagraphsOS[0] as IStTxtPara; Assert.IsNotNull(importedPara); // assert that new Analysis was created Assert.That(Cache.ServiceLocator.GetInstance<IWfiAnalysisRepository>().Count, Is.EqualTo(2)); // make sure imported word gloss is correct Assert.That(importedPara.SegmentsOS[0].AnalysesRS.Count, Is.EqualTo(1)); var importedAnalysis = importedPara.SegmentsOS[0].AnalysesRS[0]; var skippedWord = importedAnalysis.Wordform; var at = new AnalysisTree(importedAnalysis); Assert.IsNotNull(at.Gloss, "IAnalysis should be WfiGloss"); var newGloss = at.Gloss; Assert.That(newGloss.Form.get_String(wsf.get_Engine("pt").Handle).Text, Is.EqualTo("absurdo")); Assert.That(skippedWord.Guid, Is.EqualTo(word.Guid)); // make sure nothing else has changed: Assert.That(Cache.LanguageProject.Texts.Count, Is.EqualTo(1)); Assert.That(imported.ContentsOA.ParagraphsOS.Count, Is.EqualTo(1)); Assert.AreEqual(paraContents.Text, importedPara.Contents.Text, "Imported Para contents differ from original"); Assert.IsTrue(paraContents.Equals(importedPara.Contents), "Ws mismatch between imported and original paragraph"); Assert.That(skippedWord.Form.get_String(wsf.get_Engine("en").Handle).Text, Is.EqualTo("supercalifragilisticexpialidocious")); Assert.That(skippedWord.Guid, Is.EqualTo(word.Guid)); // make sure nothing else changed Assert.That(Cache.ServiceLocator.GetInstance<IWfiGlossRepository>().Count, Is.EqualTo(1)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiWordformRepository>().Count, Is.EqualTo(1)); } }
public void SkipNewGuessedWordGloss() { var wsf = Cache.WritingSystemFactory; const string xml = "<document><interlinear-text>" + "<paragraphs><paragraph><phrases><phrase><words>" + "<word>" + "<item type='txt' lang='en'>supercalifragilisticexpialidocious</item>" + "<item type='gls' lang='pt' analysisStatus='guessByHumanApproved'>absurdo</item>" + "</word>" + "</words></phrase></phrases></paragraph></paragraphs></interlinear-text></document>"; var li = new BIRDFormatImportTests.LLIMergeExtension(Cache, null, null); FDO.IText importedText = null; var options = CreateImportInterlinearOptions(xml); li.ImportInterlinear(options, ref importedText); using (var firstEntry = Cache.LanguageProject.Texts.GetEnumerator()) { firstEntry.MoveNext(); var imported = firstEntry.Current; Assert.IsNotNull(imported); var para = imported.ContentsOA.ParagraphsOS[0] as IStTxtPara; Assert.IsNotNull(para); Assert.That(para.Analyses.Count(), Is.EqualTo(1)); Assert.That(para.Analyses.First().Wordform.Form.get_String(wsf.get_Engine("en").Handle).Text, Is.EqualTo("supercalifragilisticexpialidocious")); var at = new AnalysisTree(para.Analyses.First()); Assert.IsNull(at.Gloss, "Analysis should not be WfiGloss"); // assert that nothing else was created Assert.That(Cache.ServiceLocator.GetInstance<IWfiGlossRepository>().Count, Is.EqualTo(0)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiAnalysisRepository>().Count, Is.EqualTo(0)); Assert.That(Cache.ServiceLocator.GetInstance<IWfiWordformRepository>().Count, Is.EqualTo(1)); } }