public void OneSegPerVerse()
		{
			string pc1 = "Das buch ist rot. ";
			string verse1 = "9";
			string pc2 = "Der Herr ist gross.";
			string verse2 = "10";
			string pc3 = "Ich spreche nicht viel Deutsch.";

			ITsStrBldr bldr = m_tsf.MakeString(pc1 + verse1 + pc2 + verse2 + pc3, m_wsVern).GetBldr();
			bldr.SetStrPropValue(pc1.Length, pc1.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			int ichV2 = pc1.Length + verse1.Length + pc2.Length;
			bldr.SetStrPropValue(ichV2, ichV2 + verse2.Length, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			m_para.Contents.UnderlyingTsString = bldr.GetString();
			ParagraphParser pp = new ParagraphParser(m_para);
			List<int> eosIndexes;
			List<int> segments = pp.CollectSegmentAnnotations(m_para.Contents.UnderlyingTsString, out eosIndexes);
			Cache.VwCacheDaAccessor.CacheVecProp(m_para.Hvo, ktagParaSegments, segments.ToArray(), segments.Count);
			Assert.AreEqual(5, segments.Count);
			Assert.AreEqual("", AnnotationRefHandler.VerseSegLabel(m_para, 0, ktagParaSegments));
			Assert.AreEqual("", AnnotationRefHandler.VerseSegLabel(m_para, 2, ktagParaSegments));
			Assert.AreEqual("", AnnotationRefHandler.VerseSegLabel(m_para, 4, ktagParaSegments));
		}
		public void SegmentBreaks()
		{
			ITsStrFactory tsf = TsStrFactoryClass.Create();
			ParagraphParser pp = new ParagraphParser(m_para);
			string test1 = "This is a simple sentence";
			ITsString tss = tsf.MakeString(test1, 1);
			m_para.Contents.UnderlyingTsString = tss;
			List<int> results;
			List<int> segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[0], results, "no punct string");
			Assert.AreEqual(1, segments.Count);
			VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "no punct string");

			// Empty string.
			ITsString tssEmpty = tsf.MakeString("", 1);
			m_para.Contents.UnderlyingTsString = tssEmpty;
			segments = pp.CollectSegmentAnnotations(tssEmpty, out results);
			VerifyBreaks(new int[0], results, "empty string");
			Assert.AreEqual(0, segments.Count);
			//String with multiple segments.
			string test2 = "This is a more complex sentence (ending with a 'quote').";
			string test3 = "  2 ";
			string test4 = "This is the second sentence.";
			ITsString tssMulti = tsf.MakeString(test2 + test3 + test4, 1);
			m_para.Contents.UnderlyingTsString = tssMulti;
			segments = pp.CollectSegmentAnnotations(tssMulti, out results);
			VerifyBreaks(new int[] {test2.Length - 1, test2.Length + test3.Length + test4.Length - 1}, results, "multi-sentence string");
			Assert.AreEqual(2, segments.Count);
			// The segments end and begin at the '2' in test3.
			VerifySegment(segments[0], 0, test2.Length + 2, m_para.Hvo, "first seg of multi-sentence");
			VerifySegment(segments[1], test2.Length + 2,
				test2.Length + test3.Length + test4.Length, m_para.Hvo, "second seg of multi-sentence");

			// String with embedded verse/chapter numbers (and implementation).
			ITsStrBldr bldr = tssMulti.GetBldr();
			bldr.SetStrPropValue(test2.Length + 2, test2.Length + 3, (int) FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			ITsString tssMultiV = bldr.GetString();
			m_para.Contents.UnderlyingTsString = tssMultiV;
			segments = pp.CollectSegmentAnnotations(tssMultiV, out results);
			VerifyBreaks(new int[] { test2.Length - 1, test2.Length + 4, test2.Length + test3.Length + test4.Length - 1 },
				results, "multi-sentence string with verse");
			Assert.AreEqual(3, segments.Count);
			// The segments end and begin at the '2' in test3.
			VerifySegment(segments[0], 0, test2.Length + 2, m_para.Hvo, "first seg of multi-sentence w. verse");
			VerifySegment(segments[1], test2.Length + 2, test2.Length + 4, m_para.Hvo, "second seg of multi-sentence w. verse");
			VerifySegment(segments[2], test2.Length + 4,
				test2.Length + test3.Length + test4.Length, m_para.Hvo, "third seg of multi-sentence w. verse");

			string test6 = "13 1 ";
			string test7 = "121";
			ITsString tssStartFinish = tsf.MakeString(test6 + test2 + test7, 1);
			bldr = tssStartFinish.GetBldr();
			bldr.SetStrPropValue(0, 2, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.ChapterNumber);
			bldr.SetStrPropValue(3, test6.Length, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			bldr.SetStrPropValue(test6.Length + test2.Length, tssStartFinish.Length, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			tssStartFinish = bldr.GetString();
			m_para.Contents.UnderlyingTsString = tssStartFinish;
			segments = pp.CollectSegmentAnnotations(tssStartFinish, out results);
			VerifyBreaks(new int[] { test6.Length, test6.Length + test2.Length - 1 },
				results, "start/finish breaks");
			Assert.AreEqual(3, segments.Count);
			// The segments end and begin at the '2' in test3.
			VerifySegment(segments[0], 0, test6.Length, m_para.Hvo, "first seg of start/finish");
			VerifySegment(segments[1], test6.Length , test6.Length + test2.Length, m_para.Hvo, "second seg of start/finish");
			VerifySegment(segments[2], test6.Length + test2.Length,
				tssStartFinish.Length, m_para.Hvo, "third seg of start/finish");

			// However, anything non-white between two label-style runs separates them. Change the space between the
			// two runs to something that's neither an EOS nor a letter.
			bldr = tssStartFinish.GetBldr();
			bldr.ReplaceTsString(2,3, tsf.MakeString(":",1));
			ITsString tssSplitLabelRuns = bldr.GetString();
			m_para.Contents.UnderlyingTsString = tssSplitLabelRuns;
			segments = pp.CollectSegmentAnnotations(tssSplitLabelRuns, out results);
			VerifyBreaks(new int[] { 2, 3, test6.Length, test6.Length + test2.Length - 1 },
				results, "broken pair breaks");
			Assert.AreEqual(5, segments.Count);
			// The segments end and begin at the '2' in test3.
			VerifySegment(segments[0], 0, 2, m_para.Hvo, "first seg of broken pair");
			VerifySegment(segments[1], 2, 3, m_para.Hvo, "2nd seg of start/finish");
			VerifySegment(segments[2], 3, test6.Length, m_para.Hvo, "3rd seg of start/finish");
			VerifySegment(segments[3], test6.Length, test6.Length + test2.Length, m_para.Hvo, "second seg of start/finish");
			VerifySegment(segments[4], test6.Length + test2.Length,
				tssStartFinish.Length, m_para.Hvo, "third seg of start/finish");

			// Check that we get the correct breaks when the material before a label segment doesn't have an EOS.
			string test8 = "This text has no EOS ";
			ITsString tssMultiNoEos = tsf.MakeString(test8 + test3 + test4, 1);
			bldr = tssMultiNoEos.GetBldr();
			bldr.SetStrPropValue(test8.Length + 2, test8.Length + 3, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			tssMultiNoEos = bldr.GetString();
			m_para.Contents.UnderlyingTsString = tssMultiNoEos;
			segments = pp.CollectSegmentAnnotations(tssMultiNoEos, out results);
			VerifyBreaks(new int[] { test8.Length + 2, test8.Length + 4, test8.Length + test3.Length + test4.Length - 1 },
				results, "no EOS before label");
			Assert.AreEqual(3, segments.Count);
			// The segments end and begin at the '2' in test3.
			VerifySegment(segments[0], 0, test8.Length + 2, m_para.Hvo, "first seg ofno EOS before label");
			VerifySegment(segments[1], test8.Length + 2, test8.Length + 4, m_para.Hvo, "second seg of no EOS before label");
			VerifySegment(segments[2], test8.Length + 4,
				test8.Length + test3.Length + test4.Length, m_para.Hvo, "third seg of no EOS before label");
		}
		public void HardLineBreaks()
		{
			ITsStrFactory tsf = TsStrFactoryClass.Create();
			ParagraphParser pp = new ParagraphParser(m_para);
			// String with embedded ORC.
			string test1 = "This is a simple sentence";
			string lineBreak = "\x2028";
			string test3 = "with a hard break.";
			ITsString tss = tsf.MakeString(test1 + lineBreak + test3, 1);
			m_para.Contents.UnderlyingTsString = tss;
			List<int> results;
			List<int> segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] { test1.Length, test1.Length + 1, tss.Length - 1 },
				results, "simple string with hard break");
			Assert.AreEqual(3, segments.Count);
			// The segments break around the ORC.
			VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "simple string with hard break");
			VerifySegment(segments[1], test1.Length, test1.Length + 1, m_para.Hvo, "simple string with hard break");
			VerifySegment(segments[2], test1.Length + 1, tss.Length, m_para.Hvo, "simple string with hard break");

			// Now try with an EOS before the hard break.
			string test1a = "This is a proper sentence?!";
			tss = tsf.MakeString(test1a + lineBreak + test3, 1);
			m_para.Contents.UnderlyingTsString = tss;
			segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] { test1a.Length - 2, test1a.Length + 1, tss.Length - 1 },
				results, "EOS before hard break");
			Assert.AreEqual(3, segments.Count);
			// The segments break around the ORC.
			VerifySegment(segments[0], 0, test1a.Length, m_para.Hvo, "EOS before hard break");
			VerifySegment(segments[1], test1a.Length, test1a.Length + 1, m_para.Hvo, "EOS before hard break");
			VerifySegment(segments[2], test1a.Length + 1, tss.Length, m_para.Hvo, "EOS before hard break");		}
		public void OrcIsLabel()
		{
			ITsStrFactory tsf = TsStrFactoryClass.Create();
			ParagraphParser pp = new ParagraphParser(m_para);
			// String with embedded ORC.
			string test1 = "This is a simple sentence";
			string test2 = "\xfffc";
			string test3 = " with a footnote.";
			ITsString tss = tsf.MakeString(test1 + test2 + test3, 1);
			// To be recognized an ORC must have unique properties.
			ITsStrBldr bldr = tss.GetBldr();
			bldr.SetStrPropValue(test1.Length, test1.Length + test2.Length, (int)FwTextPropType.ktptObjData, "nonsence");
			tss = bldr.GetString();
			m_para.Contents.UnderlyingTsString = tss;
			List<int> results;
			List<int> segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] { test1.Length, test1.Length + test2.Length + 1, test1.Length + test2.Length + test3.Length - 1 },
				results, "multi-sentence string with ORC");
			Assert.AreEqual(3, segments.Count);
			// The segments break around the ORC.
			VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "first seg of multi-sentence w. ORC");
			VerifySegment(segments[1], test1.Length, test1.Length + test2.Length + 1, m_para.Hvo, "second seg of multi-sentence w. ORC");
			VerifySegment(segments[2], test1.Length + test2.Length + 1,
				test1.Length + test2.Length + test3.Length, m_para.Hvo, "third seg of multi-sentence w. ORC");
		}
		public void LeadingPunctuation()
		{
			ITsStrFactory tsf = TsStrFactoryClass.Create();
			ParagraphParser pp = new ParagraphParser(m_para);
			string test1 = "?This is a question with special punctuation?";
			ITsString tss = tsf.MakeString(test1, 1);
			m_para.Contents.UnderlyingTsString = tss;
			List<int> results;
			List<int> segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] { test1.Length - 1 }, results, "leading QM");
			Assert.AreEqual(1, segments.Count);
			VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "leading QM");

			// Now try leading punctuation following a verse number.
			ITsStrBldr bldr = tss.GetBldr();
			string verse = "5 ";
			bldr.Replace(0, 0, verse, null);
			bldr.SetStrPropValue(0, 1, (int)FwTextPropType.ktptNamedStyle,
								 ScrStyleNames.VerseNumber);
			ITsString tssMultiV = bldr.GetString();
			m_para.Contents.UnderlyingTsString = tssMultiV;
			segments = pp.CollectSegmentAnnotations(tssMultiV, out results);
			VerifyBreaks(new int[] { verse.Length, tssMultiV.Length - 1 },
				results, "leading verse and QM");
			Assert.AreEqual(2, segments.Count);
			VerifySegment(segments[0], 0, verse.Length, m_para.Hvo, "first seg of leading verse and QM");
			VerifySegment(segments[1], verse.Length, tssMultiV.Length, m_para.Hvo, "second seg of leading verse and QM");
		}
		public void EllipsesAndRefs()
		{
			ITsStrFactory tsf = TsStrFactoryClass.Create();
			ParagraphParser pp = new ParagraphParser(m_para);
			string test1 = "This is...not ... a simple sentence; it discusses Scripture (Gen 1.2 and Rom 1.2-4.5) and has ellipses.";
			ITsString tss = tsf.MakeString(test1, 1);
			m_para.Contents.UnderlyingTsString = tss;
			List<int> results;
			List<int> segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] {test1.Length - 1}, results, "ellipses verse period string");
			Assert.AreEqual(1, segments.Count);
			VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "ellipses verse period");

			string test2a = "Here we have";
			string twoDots = "..";
			string test2b = "just two periods, and at the end, another two";
			tss = tsf.MakeString(test2a + twoDots + test2b + twoDots, 1);
			m_para.Contents.UnderlyingTsString = tss;
			segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] { test2a.Length, test2a.Length + 2 + test2b.Length }, results, "string with double dots");
			Assert.AreEqual(2, segments.Count);
			VerifySegment(segments[0], 0, test2a.Length + 2, m_para.Hvo, "string with double dots(1)");
			VerifySegment(segments[1], test2a.Length + 2, tss.Length, m_para.Hvo, "string with double dots(2)");

			string test3 = "This sentence ends with an ellipsis...";
			tss = tsf.MakeString(test3, 1);
			m_para.Contents.UnderlyingTsString = tss;
			segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] {  }, results, "string with final ellipsis");
			Assert.AreEqual(1, segments.Count);
			VerifySegment(segments[0], 0, test3.Length, m_para.Hvo, "string with final ellipsis");

			string fourDots = "....";
			tss = tsf.MakeString(test2a + fourDots + test2b + fourDots, 1);
			m_para.Contents.UnderlyingTsString = tss;
			segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] { test2a.Length, test2a.Length + 4 + test2b.Length }, results, "string with four dots");
			Assert.AreEqual(2, segments.Count);
			VerifySegment(segments[0], 0, test2a.Length + 4, m_para.Hvo, "string with four dots(1)");
			VerifySegment(segments[1], test2a.Length + 4, tss.Length, m_para.Hvo, "string with four dots(2)");
			// Case 2 periods with surrounding numbers

			string test5a = "Here is a number and two dots: 5";
			string test5b = "2 and another number, and the final dot has a number before it: 2.";
			tss = tsf.MakeString(test5a + twoDots + test5b, 1);
			m_para.Contents.UnderlyingTsString = tss;
			segments = pp.CollectSegmentAnnotations(tss, out results);
			VerifyBreaks(new int[] { test5a.Length, test5a.Length + 2 + test5b.Length - 1 }, results, "string with numbers and double dots");
			Assert.AreEqual(2, segments.Count);
			// One plus 2 for the two dots, but the following digit and space go in the previous segment, too.
			VerifySegment(segments[0], 0, test5a.Length + 2 + 2, m_para.Hvo, "string with numbers and double dots(1)");
			VerifySegment(segments[1], test5a.Length + 2 + 2, tss.Length, m_para.Hvo, "string with numbers and double dots(2)");
		}
Exemple #7
0
			public void BreakPhraseAnnotation()
			{
				List<int> newAnnotations = null;
				using (ParagraphParser phraseParser = new ParagraphParser(m_para))
				{
					newAnnotations = phraseParser.BreakPhraseAnnotation(m_hvoCurrentAnnotation, ParasInView);
					this.HvoNewAnnotation = newAnnotations[0];
				}
			}
			virtual internal protected List<int> BreakPhrase(int iSegment, int iSegForm)
			{
				ParagraphParser pp = new ParagraphParser(m_para);
				List<int> newCbas = pp.BreakPhraseAnnotation(GetSegmentForm(iSegment, iSegForm), new int[] { m_para.Hvo });
				// Recompute other segment forms that match this combination of phrases.
				NeedReparseParagraph = true;
				pp.Dispose();
				return newCbas;
			}
Exemple #9
0
		private static void ParseTextCore(IStText text, ParagraphParserOptions options, ProgressState progress)
		{
			using (ParagraphParser pp = new ParagraphParser(text.Cache))
			{
				pp.SetOptions(options);
				if (options.ResetConcordance)
					(text.Cache.LangProject.WordformInventoryOA as WordformInventory).ResetAllWordformOccurrences();
				if (text.LastParsedTimestamp != 0)
				{
					// We actually have parsed before...yet we have to again. Possibly another program changed
					// the data. Reload it as efficiently as possible.
					string sql = "select Id, UpdStmp, Contents, Contents_Fmt from StTxtPara_ where Owner$ = " + text.Hvo + " order by OwnOrd$";
					IDbColSpec dcs = DbColSpecClass.Create();
					dcs.Push((int)DbColType.koctObjVecOwn, 0, (int)StText.StTextTags.kflidParagraphs, 0);
					dcs.Push((int)DbColType.koctTimeStamp, 1, 0, 0);
					dcs.Push((int)DbColType.koctString, 1, (int)StTxtPara.StTxtParaTags.kflidContents, 0);
					dcs.Push((int)DbColType.koctFmt, 1, (int)StTxtPara.StTxtParaTags.kflidContents, 0);
					text.Cache.VwOleDbDaAccessor.Load(sql, dcs, text.Hvo, 0, null, false);
				}
				pp.SalvageDummyAnnotations(text);
				pp.Parse(text, progress);
				text.RecordParseTimestamp();
				pp.AddEntryGuesses(progress);
				pp.CleanupLeftoverAnnotations(progress);
			}
		}
Exemple #10
0
		private static void ParseParagraph(IStTxtPara para, int tagSegments, int tagSegForms, ParagraphParserOptions options)
		{
			using (ParagraphParser pp = new ParagraphParser(para.Cache, tagSegments, tagSegForms))
			{
				pp.ParseWithOptions(para, options);
			}
		}
Exemple #11
0
		internal static List<int> ConcordParagraphs(FdoCache cache, int[] hvosStTxtPara, ProgressState progress,
			IMatcher matcher, ConcordanceControl.ConcordanceLines line)
		{
			using (ParagraphParser pp = new ParagraphParser(cache))
			{
				// this will effectively clear ConcordanceWordforms, which seems overkill, but
				// since we are changing the occurrences on those wordforms,
				// and also possibly adding many new wordforms, we should just allow RecordLists that use
				// ConcordanceWordforms to reload the list.
				// (Enhance: is there any way we can make those lists be smart about when they need to reload,
				// rather than forcing them to?)
				(pp.m_wfi as WordformInventory).SuspendUpdatingConcordanceWordforms = true;
				pp.CreateDummyWordforms = true;
				pp.m_hvosStTxtPara = hvosStTxtPara;
				if (matcher != null)
					pp.m_matchingAnnotations = new List<int>();
				ParagraphParser.ResetParseSessionDependentStaticData();

				// Estimate the number of total number of milestones we'll set.
				// Enhance: we could construct a way to set percentage done based upon
				// number of texts and paragraphs in each text.
				if (progress is MilestoneProgressState)
				{
					MilestoneProgressState mp = progress as SIL.FieldWorks.Common.Controls.MilestoneProgressState;
					for (int i = 0; i < pp.m_hvosStTxtPara.Length; ++i)
					{
						mp.AddMilestone(1);
					}
				}

				// Preload all the paragraphs.
				cache.PreloadIfMissing(hvosStTxtPara, (int)StTxtPara.StTxtParaTags.kflidContents, 0, false);

				// Parse each text to load our paragraph and wordform segment annotations.
				int cPara = 0;
				using (SuppressSubTasks suppressor = new SuppressSubTasks(cache, true))
				{
					foreach (IStTxtPara para in new FdoObjectSet<IStTxtPara>(cache, pp.m_hvosStTxtPara, false))
					{
						++cPara;
						pp.Parse(para, matcher, line);
						progress.SetMilestone();
						progress.Breath();
						if (pp.m_matchingAnnotations != null &&
							pp.m_matchingAnnotations.Count >= ConcordanceControl.MaxConcordanceMatches())
						{
							MessageBox.Show(String.Format(ITextStrings.ksShowingOnlyTheFirstXXXMatches,
								pp.m_matchingAnnotations.Count, cPara, pp.m_hvosStTxtPara.Length),
								ITextStrings.ksNotice, MessageBoxButtons.OK, MessageBoxIcon.Information);
							break;
						}
					}
					pp.CleanupLeftoverAnnotations(progress);
				}
				progress.SetMilestone();
				progress.Breath();
				(pp.m_wfi as WordformInventory).SuspendUpdatingConcordanceWordforms = false;
				return pp.m_matchingAnnotations;
			}
		}
Exemple #12
0
		internal SegmentMaker(ITsString text, ILgWritingSystemFactory wsf, ParagraphParser pp)
			: base(text, wsf)
		{
			m_paraParaser = pp;
		}
Exemple #13
0
		/// <summary>
		/// Parse through all the given texts, even if they've been fully analyzed.
		/// Collect occurrences of words and cache all paragraph and wordform related virtual properties.
		/// </summary>
		/// <param name="cache"></param>
		/// <param name="hvosStText">list of hvos for StText objects</param>
		public static void ConcordTexts(FdoCache cache, int[] hvosStText, ProgressState progress)
		{
			if (progress == null)
				progress = new NullProgressState();
			using (ParagraphParser pp = new ParagraphParser(cache))
			{
#if PROFILING
				long ticks = DateTime.Now.Ticks;
#endif
				// Ensure all info about paragraphs of texts and contents of paragraphs is in cache and current.
				// Enhance JohnT: possibly performance would be helped, especially in cases where we have a lot
				// of archived Scripture versions, by restricting this to just the texts in hvosStText.
				cache.LoadAllOfAnOwningVectorProp((int)StText.StTextTags.kflidParagraphs, "StText");
				cache.LoadAllOfAStringProp((int)StTxtPara.StTxtParaTags.kflidContents);
#if PROFILING
				Debug.WriteLine("Time to end of loading text data = " + (DateTime.Now.Ticks - ticks));
#endif
				pp.m_hvosStText = hvosStText;
				//// Get a list of all the paragraphs.
				//List<int> targetParagraphs = new List<int>();
				//foreach (IStText text in new FdoObjectSet<IStText>(cache, pp.m_hvosStText, true))
				//{
				//    targetParagraphs.AddRange(text.ParagraphsOS.HvoArray);
				//}

				pp.RebuildingConcordanceWordforms = true;
				WordformInventory wfi =	(cache.LangProject.WordformInventoryOA as WordformInventory);
				wfi.ResetConcordanceWordformsAndOccurrences();

#if PROFILING
				Debug.WriteLine("Time to end of reset occurrenes = " + (DateTime.Now.Ticks - ticks));
#endif

				ParagraphParser.ResetParseSessionDependentStaticData();

				// Estimate the number of total number of milestones we'll set.
				// Enhance: we could construct a way to set percentage done based upon
				// number of texts and paragraphs in each text.
				if (progress is MilestoneProgressState)
				{
					MilestoneProgressState mp = progress as SIL.FieldWorks.Common.Controls.MilestoneProgressState;
					for (int i = 0; i < pp.m_hvosStText.Length; ++i)
					{
						AddParseTextMilestones(mp);
					}
				}

				// Parse each text to load our paragraph and wordform segment annotations.
				using (SuppressSubTasks suppressor = new SuppressSubTasks(cache, true))
				{
					List<IStText> texts = new List<IStText>(new FdoObjectSet<IStText>(cache, pp.m_hvosStText, false));
					// Anything like this is currently redundant, we loaded the contents of ALL paragraphs above.
					//List<IStText> parsedTexts = texts.FindAll(HasLastParsedTimestamp);
					//if (parsedTexts.Count != 0)
					//{
					//    // We actually have parsed some texts before...yet we have to again. Possibly another program changed
					//    // the data. Reload it as efficiently as possible.
					//    int[] parsedHvos = new int[parsedTexts.Count];
					//    for (int i = 0; i < parsedHvos.Length; i++)
					//        parsedHvos[i] = parsedTexts[i].Hvo;
					//    int index = 0;
					//    string Hvos = DbOps.MakePartialIdList(ref index, parsedHvos);
					//    string whereClause = "";
					//    if (index == parsedHvos.Length)
					//    {
					//        // If we can make a single where clause we'll do it; otherwise do them all
					//        whereClause = " where Owner$ in (" + Hvos + ")";
					//    }
					//    string sql = "select Owner$, Id, UpdStmp, Contents, Contents_Fmt from StTxtPara_ " + whereClause + " order by owner$, OwnOrd$";
					//    IDbColSpec dcs = DbColSpecClass.Create();
					//    dcs.Push((int)DbColType.koctBaseId, 0, 0, 0);
					//    dcs.Push((int)DbColType.koctObjVecOwn, 1, (int)StText.StTextTags.kflidParagraphs, 0);
					//    dcs.Push((int)DbColType.koctTimeStamp, 2, 0, 0);
					//    dcs.Push((int)DbColType.koctString, 2, (int)StTxtPara.StTxtParaTags.kflidContents, 0);
					//    dcs.Push((int)DbColType.koctFmt, 2, (int)StTxtPara.StTxtParaTags.kflidContents, 0);
					//    cache.VwOleDbDaAccessor.Load(sql, dcs, 0, 0, null, false);
					//}

					// Need a separate loop for these, otherwise things get confused as we start to reuse
					// annotations in pp.Parse() and then re-encounter them in later attempts to salvage Pfics and segments.
#if PROFILING
					Debug.WriteLine("Time to end of preliminaries = " + (DateTime.Now.Ticks - ticks));
#endif
					foreach (IStText text in texts)
						pp.SalvageDummyAnnotations(text);
#if PROFILING
					Debug.WriteLine("Time to start of main parse loop = " + (DateTime.Now.Ticks - ticks));
#endif
					foreach (IStText text in texts)
					{
						pp.Parse(text, progress);
					}
#if PROFILING
					Debug.WriteLine("Time to end of main parse loop = " + (DateTime.Now.Ticks - ticks));
#endif
					StText.RecordParseTimestamps(texts);
					pp.CleanupLeftoverAnnotations(progress);
				}
				//Debug.WriteLine("Time for whole ConcordTexts = " + (DateTime.Now.Ticks - ticks));
				progress.SetMilestone();
				progress.Breath();
#if PROFILING
				Debug.WriteLine("Parse required " + pp.m_cDummyAnnotations + " dummy annotations"
					+ " but could only reuse " + pp.m_dummyAnnotationsToReuse.Count);
				Debug.WriteLine("  Parse created " + pp.m_cWficsMade + " Wfics, " + pp.m_cPficsMade + " Pfics, and "
					+ pp.m_cSegmentsMade + " Segments");
				Debug.WriteLine("  So far we made a total of " + s_cTotalDummiesMade + "; this parse making dummies took " + pp.m_cTicksMakingDummies);
				Debug.WriteLine("  This parse we reset " + pp.m_cTotalDummiesReset + " in a time of " + pp.m_cTicksResettingDummies);
#endif
			}
		}
Exemple #14
0
		/// <summary>
		/// Retrieve the wordforms collected during the last parsing session.
		/// </summary>
		/// <param name="cache"></param>
		/// <returns></returns>
		public static Set<int> WordformsFromLastParseSession(FdoCache cache)
		{
			Set<int> parsedWordforms = null;
			using (ParagraphParser pp = new ParagraphParser(cache))
			{
				parsedWordforms = new Set<int>(pp.WordformIdOccurrencesTable);
			}
			return parsedWordforms;
		}
		private List<int> GetSegments(ITsStrBldr bldr, ScrTxtPara para)
		{
			para.Contents.UnderlyingTsString = bldr.GetString();
			ParagraphParser pp = new ParagraphParser(para);
			List<int> eosIndexes;
			List<int> segments = pp.CollectSegmentAnnotations(para.Contents.UnderlyingTsString, out eosIndexes);
			Cache.VwCacheDaAccessor.CacheVecProp(para.Hvo, ktagParaSegments, segments.ToArray(), segments.Count);
			return segments;
		}
Exemple #16
0
		/// <summary>
		/// Ensure that the segments property of the paragraph is consistent with its contents and consists of real
		/// database objects.
		/// </summary>
		internal static FdoCache EnsureMainParaSegments(IStTxtPara para, int wsBt)
		{
			ParagraphParser pp = new ParagraphParser(para);
			List<int> EosOffsets;
			List<int> segs = pp.CollectSegmentAnnotationsOfPara(out EosOffsets);
			// Make sure the segments list is up to date.
			FdoCache cache = para.Cache;
			cache.VwCacheDaAccessor.CacheVecProp(para.Hvo, StTxtPara.SegmentsFlid(cache), segs.ToArray(), segs.Count);
			// This further makes sure all are real.
			StTxtPara.LoadSegmentFreeTranslations(new int[] { para.Hvo }, cache, wsBt);
			return cache;
		}