public void OneSegPerVerse() { string pc1 = "Das Buch ist rot. "; string verse1 = "9"; string pc2 = "Der Herr ist gross."; string verse2 = "10"; string pc3 = "Ich spreche nicht viel Deutsch."; ITsStrBldr bldr = m_tsf.MakeString(pc1 + verse1 + pc2 + verse2 + pc3, m_wsVern).GetBldr(); bldr.SetStrPropValue(pc1.Length, pc1.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); int ichV2 = pc1.Length + verse1.Length + pc2.Length; bldr.SetStrPropValue(ichV2, ichV2 + verse2.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); m_para.Contents = bldr.GetString(); using (ParagraphParser pp = new ParagraphParser(m_para)) { List<int> eosIndexes; var segments = pp.CollectSegments(m_para.Contents, out eosIndexes); Assert.AreEqual(5, segments.Count); Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 0)); Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 2)); Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 4)); } }
/// <summary> /// non-undoable task /// </summary> private void DoSetupFixture() { var textFactory = Cache.ServiceLocator.GetInstance<ITextFactory>(); var stTextFactory = Cache.ServiceLocator.GetInstance<IStTextFactory>(); m_text = textFactory.Create(); //Cache.LangProject.TextsOC.Add(m_text); m_stText = stTextFactory.Create(); m_text.ContentsOA = m_stText; m_para0 = m_stText.AddNewTextPara(null); m_para0.Contents = TsStringUtils.MakeTss("Xxxhope xxxthis xxxwill xxxdo. xxxI xxxhope.", Cache.DefaultVernWs); m_para1 = m_stText.AddNewTextPara(null); m_para1.Contents = TsStringUtils.MakeTss("Xxxcertain xxxto xxxcatch xxxa xxxfrog. xxxCertainly xxxcan. xxxOn xxxLake xxxMonroe.", Cache.DefaultVernWs); m_para2 = null; using (ParagraphParser pp = new ParagraphParser(Cache)) foreach (IStTxtPara para in m_stText.ParagraphsOS) pp.Parse(para); m_expectedAnOcs = new List<AnalysisOccurrence>(); foreach (IStTxtPara para in m_stText.ParagraphsOS) foreach (ISegment seg in para.SegmentsOS) for (int i = 0; i < seg.AnalysesRS.Count; i++) m_expectedAnOcs.Add(new AnalysisOccurrence(seg, i)); m_expectedAnOcsPara0 = new List<AnalysisOccurrence>(); foreach (ISegment seg in m_para0.SegmentsOS) for (int i = 0; i < seg.AnalysesRS.Count; i++) m_expectedAnOcsPara0.Add(new AnalysisOccurrence(seg, i)); }
/// <summary> /// Dispose ParagraphParser after each test /// </summary> public override void TestTearDown() { if (m_pp != null) m_pp.Dispose(); m_pp = null; base.TestTearDown(); }
private void ParseText() { using (var pp = new ParagraphParser(Cache)) { pp.Parse(m_txtPara); } var seg = m_txtPara.SegmentsOS[0]; var wordArray = seg.AnalysesRS.ToArray(); var cwords = wordArray.Length; m_occurrences = new AnalysisOccurrence[cwords]; for (var i = 0; i < cwords; i++) m_occurrences[i] = new AnalysisOccurrence(seg, i); }
/// <summary> /// Break an occurrence that is a phrase into its constituent wordforms. /// </summary> public void BreakPhrase() { using (var pp = new ParagraphParser(Paragraph)) { // This is a new paragraph parser, and we haven't set up any pre-existing analyses, so it doesn't matter // what we pass for cWfAnalysisPrev. IList<IAnalysis> wordforms = pp.CollectSegmentForms(GetMyBeginOffsetInPara(), GetMyEndOffsetInPara(), 0, false); if (wordforms.Count > 1) { var oldWordform = Analysis.Wordform; Segment.AnalysesRS.Replace(Index, 1, wordforms.Cast<ICmObject>()); // Enhance JohnT: for this sort of automatic deletion, I wonder whether we should make // stronger checks, such as that it has no analysis or glosses? DeleteWordformIfPossible(oldWordform); } } }
/// <summary> /// Parse all the paragraphs in the text. /// </summary> public static void ParseText(IStText sttext) { using (var parser = new ParagraphParser(sttext.Cache)) { foreach (IStTxtPara para in sttext.ParagraphsOS) parser.Parse(para); } }
/// <summary> /// Parse a single paragraph with the specified options. /// </summary> public static void ParseParagraph(IStTxtPara para, ParagraphParserOptions options) { if (para.ParseIsCurrent) return; using (var pp = new ParagraphParser(para.Cache)) { pp.ParseWithOptions(para, options); } }
internal SegmentMaker(ITsString text, ILgWritingSystemFactory wsf, ParagraphParser pp) : base(text, wsf) { m_paraParser = pp; }
/// <summary> /// Retrieve the wordforms collected during the last parsing session. /// </summary> /// <param name="cache"></param> /// <returns></returns> public static Set<int> WordformsFromLastParseSession(FdoCache cache) { Set<int> parsedWordforms = null; using (ParagraphParser pp = new ParagraphParser(cache)) { parsedWordforms = new Set<int>(pp.WordformIdOccurrencesTable); } return parsedWordforms; }
/// <summary> /// Create a new ParagraphParser for each test /// </summary> public override void TestSetup() { base.TestSetup(); m_pp = new ParagraphParser(m_para); }
public void TwoSegsPerVerse() { string pc1 = "Das Buch ist rot. "; string pc2 = "Das Maedchen ist schoen."; string verse1 = "9"; string pc3 = "Der Herr ist gross."; string pc4 = "Ich spreche nicht viel Deutsch."; string verse2 = "10"; string pc5 = "Was ist das?"; string pc6 = "Wie gehts?"; ITsStrBldr bldr = m_tsf.MakeString(pc1 + pc2 + verse1 + pc3 + pc4 + verse2 + pc5 + pc6, m_wsVern).GetBldr(); bldr.SetStrPropValue(pc1.Length + pc2.Length, pc1.Length + pc2.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); int ichEndV1 = pc1.Length + pc2.Length + verse1.Length + pc3.Length + pc4.Length; bldr.SetStrPropValue(ichEndV1, ichEndV1 + verse2.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); m_para.Contents = bldr.GetString(); using (ParagraphParser pp = new ParagraphParser(m_para)) { List<int> eosIndexes; var segments = pp.CollectSegments(m_para.Contents, out eosIndexes); Assert.AreEqual(8, segments.Count); Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 0)); Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 1)); Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 3)); Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 4)); Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 6)); Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 7)); } }
private IList<ISegment> GetSegments(ITsStrBldr bldr, IScrTxtPara para) { para.Contents = bldr.GetString(); using (ParagraphParser pp = new ParagraphParser(para)) { List<int> eosIndexes; var segments = pp.CollectSegments(para.Contents, out eosIndexes); return segments; } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Fixes the paragraph's analysis. /// </summary> /// <param name="para">The paragraph.</param> /// ------------------------------------------------------------------------------------ private static void FixParaAnalysis(IScrTxtPara para) { // If it has any word-level analysis, we need to reparse the whole text. if ((from segment in para.SegmentsOS where segment.AnalysesRS.Count > 0 select segment).FirstOrDefault() == null) { // No analyses; just resegment it. using (ParagraphParser parser = new ParagraphParser(para)) { parser.CollectPreExistingParaAnnotations(); SegmentMaker segmentMaker = new SegmentMaker(para.Contents, para.Cache.WritingSystemFactory, parser); segmentMaker.Run(); if (segmentMaker.Segments.Count < para.SegmentsOS.Count) { // The paragraph has more segments than it should have, so remove any // extras that are floating around. for (int i = para.SegmentsOS.Count - 1; i >= segmentMaker.Segments.Count; i--) para.SegmentsOS.RemoveAt(i); } } } else { // Reparse the whole thing. ParagraphParser.ParseParagraph(para); } }
internal void DoDataSetup() { var textFactory = Cache.ServiceLocator.GetInstance<ITextFactory>(); var stTextFactory = Cache.ServiceLocator.GetInstance<IStTextFactory>(); Text = textFactory.Create(); //Cache.LangProject.TextsOC.Add(Text); StText = stTextFactory.Create(); Text.ContentsOA = StText; Para0 = (StTxtPara)StText.AddNewTextPara(null); var wfFactory = Cache.ServiceLocator.GetInstance<IWfiWordformFactory>(); var wsVern = Cache.DefaultVernWs; /* A a a a. */ IWfiWordform A = wfFactory.Create(TsStringUtils.MakeTss("A", wsVern)); IWfiWordform a = wfFactory.Create(TsStringUtils.MakeTss("a", wsVern)); Words_para0.Add(A); Words_para0.Add(a); Words_para0.Add(a); Words_para0.Add(a); Para0.Contents = TsStringUtils.MakeTss( Words_para0[0].Form.BestVernacularAlternative.Text + " " + Words_para0[1].Form.BestVernacularAlternative.Text + " " + Words_para0[2].Form.BestVernacularAlternative.Text + " " + Words_para0[3].Form.BestVernacularAlternative.Text + ".", wsVern); /* b B. */ IWfiWordform b = wfFactory.Create(TsStringUtils.MakeTss("b", wsVern)); IWfiWordform B = wfFactory.Create(TsStringUtils.MakeTss("B", wsVern)); Words_para0.Add(b); Words_para0.Add(B); var bldr = Para0.Contents.GetIncBldr(); bldr.AppendTsString(TsStringUtils.MakeTss( " " + Words_para0[4].Form.BestVernacularAlternative.Text + " " + Words_para0[5].Form.BestVernacularAlternative.Text + ".", wsVern)); Para0.Contents = bldr.GetString(); using (ParagraphParser pp = new ParagraphParser(Cache)) { foreach (IStTxtPara para in StText.ParagraphsOS) pp.Parse(para); } }
private void Setup2ndText() { var textFactory = Cache.ServiceLocator.GetInstance<ITextFactory>(); var stTextFactory = Cache.ServiceLocator.GetInstance<IStTextFactory>(); var text2 = textFactory.Create(); //Cache.LangProject.TextsOC.Add(text2); m_stText2 = stTextFactory.Create(); text2.ContentsOA = m_stText2; m_para2 = m_stText2.AddNewTextPara(null); m_para2.Contents = TsStringUtils.MakeTss("Small one segment paragraph.", Cache.DefaultVernWs); using (ParagraphParser pp = new ParagraphParser(Cache)) foreach (IStTxtPara para in m_stText2.ParagraphsOS) pp.Parse(para); m_expectedAnOcsPara2 = new List<AnalysisOccurrence>(); foreach (IStTxtPara para in m_stText2.ParagraphsOS) foreach (ISegment seg in para.SegmentsOS) for (int i = 0; i < seg.AnalysesRS.Count; i++) m_expectedAnOcsPara2.Add(new AnalysisOccurrence(seg, i)); }
private IStTxtPara MakeSimpleParsedText() { var textFactory = Cache.ServiceLocator.GetInstance<ITextFactory>(); var stTextFactory = Cache.ServiceLocator.GetInstance<IStTextFactory>(); var text = textFactory.Create(); //Cache.LangProject.TextsOC.Add(text); var stText = stTextFactory.Create(); text.ContentsOA = stText; var para0 = stText.AddNewTextPara(null); para0.Contents = TsStringUtils.MakeTss("the book is red. the pages in the book are the color of the paper.", Cache.DefaultVernWs); using (ParagraphParser pp = new ParagraphParser(Cache)) foreach (IStTxtPara para in stText.ParagraphsOS) pp.Parse(para); return para0; }