public void OneSegPerVerse()
		{
			string pc1 = "Das Buch ist rot. ";
			string verse1 = "9";
			string pc2 = "Der Herr ist gross.";
			string verse2 = "10";
			string pc3 = "Ich spreche nicht viel Deutsch.";

			ITsStrBldr bldr = m_tsf.MakeString(pc1 + verse1 + pc2 + verse2 + pc3, m_wsVern).GetBldr();
			bldr.SetStrPropValue(pc1.Length, pc1.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			int ichV2 = pc1.Length + verse1.Length + pc2.Length;
			bldr.SetStrPropValue(ichV2, ichV2 + verse2.Length, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			m_para.Contents = bldr.GetString();
			using (ParagraphParser pp = new ParagraphParser(m_para))
			{
				List<int> eosIndexes;
				var segments = pp.CollectSegments(m_para.Contents, out eosIndexes);
				Assert.AreEqual(5, segments.Count);
				Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 0));
				Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 2));
				Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 4));
			}
		}
		/// <summary>
		/// non-undoable task
		/// </summary>
		private void DoSetupFixture()
		{
			var textFactory = Cache.ServiceLocator.GetInstance<ITextFactory>();
			var stTextFactory = Cache.ServiceLocator.GetInstance<IStTextFactory>();
			m_text = textFactory.Create();
			//Cache.LangProject.TextsOC.Add(m_text);
			m_stText = stTextFactory.Create();
			m_text.ContentsOA = m_stText;
			m_para0 = m_stText.AddNewTextPara(null);
			m_para0.Contents = TsStringUtils.MakeTss("Xxxhope xxxthis xxxwill xxxdo. xxxI xxxhope.", Cache.DefaultVernWs);
			m_para1 = m_stText.AddNewTextPara(null);
			m_para1.Contents = TsStringUtils.MakeTss("Xxxcertain xxxto xxxcatch xxxa xxxfrog. xxxCertainly xxxcan. xxxOn xxxLake xxxMonroe.", Cache.DefaultVernWs);
			m_para2 = null;

			using (ParagraphParser pp = new ParagraphParser(Cache))
				foreach (IStTxtPara para in m_stText.ParagraphsOS)
					pp.Parse(para);

			m_expectedAnOcs = new List<AnalysisOccurrence>();
			foreach (IStTxtPara para in m_stText.ParagraphsOS)
				foreach (ISegment seg in para.SegmentsOS)
					for (int i = 0; i < seg.AnalysesRS.Count; i++)
						m_expectedAnOcs.Add(new AnalysisOccurrence(seg, i));

			m_expectedAnOcsPara0 = new List<AnalysisOccurrence>();
			foreach (ISegment seg in m_para0.SegmentsOS)
				for (int i = 0; i < seg.AnalysesRS.Count; i++)
					m_expectedAnOcsPara0.Add(new AnalysisOccurrence(seg, i));
		}
		/// <summary>
		/// Dispose ParagraphParser after each test
		/// </summary>
		public override void TestTearDown()
		{
			if (m_pp != null)
				m_pp.Dispose();
			m_pp = null;

			base.TestTearDown();
		}
		private void ParseText()
		{
			using (var pp = new ParagraphParser(Cache))
			{
				pp.Parse(m_txtPara);
			}
			var seg = m_txtPara.SegmentsOS[0];
			var wordArray = seg.AnalysesRS.ToArray();
			var cwords = wordArray.Length;
			m_occurrences = new AnalysisOccurrence[cwords];
			for (var i = 0; i < cwords; i++)
				m_occurrences[i] = new AnalysisOccurrence(seg, i);
		}
示例#5
0
		/// <summary>
		/// Break an occurrence that is a phrase into its constituent wordforms.
		/// </summary>
		public void BreakPhrase()
		{
			using (var pp = new ParagraphParser(Paragraph))
			{
				// This is a new paragraph parser, and we haven't set up any pre-existing analyses, so it doesn't matter
				// what we pass for cWfAnalysisPrev.
				IList<IAnalysis> wordforms = pp.CollectSegmentForms(GetMyBeginOffsetInPara(), GetMyEndOffsetInPara(), 0, false);
				if (wordforms.Count > 1)
				{
					var oldWordform = Analysis.Wordform;
					Segment.AnalysesRS.Replace(Index, 1, wordforms.Cast<ICmObject>());
					// Enhance JohnT: for this sort of automatic deletion, I wonder whether we should make
					// stronger checks, such as that it has no analysis or glosses?
					DeleteWordformIfPossible(oldWordform);
				}
			}
		}
示例#6
0
		/// <summary>
		/// Parse all the paragraphs in the text.
		/// </summary>
		public static void ParseText(IStText sttext)
		{
			using (var parser = new ParagraphParser(sttext.Cache))
			{
				foreach (IStTxtPara para in sttext.ParagraphsOS)
					parser.Parse(para);
			}
		}
示例#7
0
		/// <summary>
		/// Parse a single paragraph with the specified options.
		/// </summary>
		public static void ParseParagraph(IStTxtPara para, ParagraphParserOptions options)
		{
			if (para.ParseIsCurrent)
				return;
			using (var pp = new ParagraphParser(para.Cache))
			{
				pp.ParseWithOptions(para, options);
			}
		}
示例#8
0
		internal SegmentMaker(ITsString text, ILgWritingSystemFactory wsf, ParagraphParser pp)
			: base(text, wsf)
		{
			m_paraParser = pp;
		}
示例#9
0
		/// <summary>
		/// Retrieve the wordforms collected during the last parsing session.
		/// </summary>
		/// <param name="cache"></param>
		/// <returns></returns>
		public static Set<int> WordformsFromLastParseSession(FdoCache cache)
		{
			Set<int> parsedWordforms = null;
			using (ParagraphParser pp = new ParagraphParser(cache))
			{
				parsedWordforms = new Set<int>(pp.WordformIdOccurrencesTable);
			}
			return parsedWordforms;
		}
示例#10
0
		/// <summary>
		/// Create a new ParagraphParser for each test
		/// </summary>
		public override void TestSetup()
		{
			base.TestSetup();

			m_pp = new ParagraphParser(m_para);
		}
		public void TwoSegsPerVerse()
		{
			string pc1 = "Das Buch ist rot. ";
			string pc2 = "Das Maedchen ist schoen.";
			string verse1 = "9";
			string pc3 = "Der Herr ist gross.";
			string pc4 = "Ich spreche nicht viel Deutsch.";
			string verse2 = "10";
			string pc5 = "Was ist das?";
			string pc6 = "Wie gehts?";

			ITsStrBldr bldr = m_tsf.MakeString(pc1 + pc2 + verse1 + pc3 + pc4 + verse2 + pc5 + pc6, m_wsVern).GetBldr();
			bldr.SetStrPropValue(pc1.Length + pc2.Length, pc1.Length + pc2.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			int ichEndV1 = pc1.Length + pc2.Length + verse1.Length + pc3.Length + pc4.Length;
			bldr.SetStrPropValue(ichEndV1, ichEndV1 + verse2.Length, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			m_para.Contents = bldr.GetString();
			using (ParagraphParser pp = new ParagraphParser(m_para))
			{
				List<int> eosIndexes;
				var segments = pp.CollectSegments(m_para.Contents, out eosIndexes);
				Assert.AreEqual(8, segments.Count);
				Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 0));
				Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 1));
				Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 3));
				Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 4));
				Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 6));
				Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 7));
			}
		}
		private IList<ISegment> GetSegments(ITsStrBldr bldr, IScrTxtPara para)
		{
			para.Contents = bldr.GetString();
			using (ParagraphParser pp = new ParagraphParser(para))
			{
				List<int> eosIndexes;
				var segments = pp.CollectSegments(para.Contents, out eosIndexes);
				return segments;
			}
		}
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Fixes the paragraph's analysis.
		/// </summary>
		/// <param name="para">The paragraph.</param>
		/// ------------------------------------------------------------------------------------
		private static void FixParaAnalysis(IScrTxtPara para)
		{
			// If it has any word-level analysis, we need to reparse the whole text.
			if ((from segment in para.SegmentsOS where segment.AnalysesRS.Count > 0 select segment).FirstOrDefault() == null)
			{
				// No analyses; just resegment it.
				using (ParagraphParser parser = new ParagraphParser(para))
				{
					parser.CollectPreExistingParaAnnotations();
					SegmentMaker segmentMaker = new SegmentMaker(para.Contents, para.Cache.WritingSystemFactory, parser);
					segmentMaker.Run();
					if (segmentMaker.Segments.Count < para.SegmentsOS.Count)
					{
						// The paragraph has more segments than it should have, so remove any
						// extras that are floating around.
						for (int i = para.SegmentsOS.Count - 1; i >= segmentMaker.Segments.Count; i--)
							para.SegmentsOS.RemoveAt(i);
					}
				}
			}
			else
			{
				// Reparse the whole thing.
				ParagraphParser.ParseParagraph(para);
			}
		}
			internal void DoDataSetup()
			{
				var textFactory = Cache.ServiceLocator.GetInstance<ITextFactory>();
				var stTextFactory = Cache.ServiceLocator.GetInstance<IStTextFactory>();
				Text = textFactory.Create();
				//Cache.LangProject.TextsOC.Add(Text);
				StText = stTextFactory.Create();
				Text.ContentsOA = StText;
				Para0 = (StTxtPara)StText.AddNewTextPara(null);
				var wfFactory = Cache.ServiceLocator.GetInstance<IWfiWordformFactory>();
				var wsVern = Cache.DefaultVernWs;
				/* A a a a. */
				IWfiWordform A = wfFactory.Create(TsStringUtils.MakeTss("A", wsVern));
				IWfiWordform a = wfFactory.Create(TsStringUtils.MakeTss("a", wsVern));
				Words_para0.Add(A);
				Words_para0.Add(a);
				Words_para0.Add(a);
				Words_para0.Add(a);
				Para0.Contents = TsStringUtils.MakeTss(
					Words_para0[0].Form.BestVernacularAlternative.Text + " " +
					Words_para0[1].Form.BestVernacularAlternative.Text + " " +
					Words_para0[2].Form.BestVernacularAlternative.Text + " " +
					Words_para0[3].Form.BestVernacularAlternative.Text + ".", wsVern);
				/* b B. */
				IWfiWordform b = wfFactory.Create(TsStringUtils.MakeTss("b", wsVern));
				IWfiWordform B = wfFactory.Create(TsStringUtils.MakeTss("B", wsVern));
				Words_para0.Add(b);
				Words_para0.Add(B);
				var bldr = Para0.Contents.GetIncBldr();
				bldr.AppendTsString(TsStringUtils.MakeTss(
					" " + Words_para0[4].Form.BestVernacularAlternative.Text + " " +
					Words_para0[5].Form.BestVernacularAlternative.Text + ".", wsVern));
				Para0.Contents = bldr.GetString();
				using (ParagraphParser pp = new ParagraphParser(Cache))
				{
					foreach (IStTxtPara para in StText.ParagraphsOS)
						pp.Parse(para);
				}
			}
		private void Setup2ndText()
		{
			var textFactory = Cache.ServiceLocator.GetInstance<ITextFactory>();
			var stTextFactory = Cache.ServiceLocator.GetInstance<IStTextFactory>();
			var text2 = textFactory.Create();
			//Cache.LangProject.TextsOC.Add(text2);
			m_stText2 = stTextFactory.Create();
			text2.ContentsOA = m_stText2;
			m_para2 = m_stText2.AddNewTextPara(null);
			m_para2.Contents = TsStringUtils.MakeTss("Small one segment paragraph.", Cache.DefaultVernWs);

			using (ParagraphParser pp = new ParagraphParser(Cache))
				foreach (IStTxtPara para in m_stText2.ParagraphsOS)
					pp.Parse(para);

			m_expectedAnOcsPara2 = new List<AnalysisOccurrence>();
			foreach (IStTxtPara para in m_stText2.ParagraphsOS)
				foreach (ISegment seg in para.SegmentsOS)
					for (int i = 0; i < seg.AnalysesRS.Count; i++)
						m_expectedAnOcsPara2.Add(new AnalysisOccurrence(seg, i));
		}
		private IStTxtPara MakeSimpleParsedText()
		{
			var textFactory = Cache.ServiceLocator.GetInstance<ITextFactory>();
			var stTextFactory = Cache.ServiceLocator.GetInstance<IStTextFactory>();
			var text = textFactory.Create();
			//Cache.LangProject.TextsOC.Add(text);
			var stText = stTextFactory.Create();
			text.ContentsOA = stText;
			var para0 = stText.AddNewTextPara(null);
			para0.Contents =
				TsStringUtils.MakeTss("the book is red. the pages in the book are the color of the paper.",
					Cache.DefaultVernWs);

			using (ParagraphParser pp = new ParagraphParser(Cache))
				foreach (IStTxtPara para in stText.ParagraphsOS)
					pp.Parse(para);
			return para0;
		}