Example #1
0
		private bool TryGetTwficLowercaseSentenceInitialForm(int hvoAnnotation, out int hvoWfiLower)
		{
			hvoWfiLower = 0;
			int ktagMatchingLowercaseForm = InterlinVc.MatchingLowercaseWordForm(Cache);
			if (Cache.MainCacheAccessor.get_IsPropInCache(hvoAnnotation, ktagMatchingLowercaseForm, (int)CellarModuleDefns.kcptReferenceAtom, 0))
				hvoWfiLower = Cache.GetObjProperty(hvoAnnotation, ktagMatchingLowercaseForm);
			else
			{
				StTxtPara.TwficInfo twficInfo = new StTxtPara.TwficInfo(m_fdoCache, hvoAnnotation);
				if (twficInfo.IsFirstTwficInSegment)
				{
					ITsString tssWfBaseline = StTxtPara.TssSubstring(twficInfo.Object);
					CpeTracker tracker = new CpeTracker(Cache.LanguageWritingSystemFactoryAccessor, tssWfBaseline);
					ILgCharacterPropertyEngine cpe = tracker.CharPropEngine(0);
					string sLower = cpe.ToLower(tssWfBaseline.Text);
					hvoWfiLower = Cache.LangProject.WordformInventoryOA.GetWordformId(sLower,
						StringUtils.GetWsAtOffset(tssWfBaseline, 0));
				}
			}
			return hvoWfiLower != 0;
		}
Example #2
0
		public SegmentBreaker(ITsString text, ILgWritingSystemFactory wsf)
		{
			tssText = text;
			m_cpeTracker = new CpeTracker(wsf, text);
			// Make sure this is always something.
			m_cpe = m_cpeTracker.CharPropEngine(0);
		}
Example #3
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determine if the specified text exists in the string
		/// </summary>
		/// <param name="wordFormTss">text to look for</param>
		/// <param name="sourceTss">text to search in</param>
		/// <param name="wsf">source of char prop engines</param>
		/// <param name="fMatchWholeWord">True to match a whole word, false otherwise</param>
		/// <param name="ichMin">The start of the string where the text was found (undefined if
		/// this method returns false)</param>
		/// <param name="ichLim">The limit (one character position past the end) of the string
		/// where the text was found (undefined if this method returns false)</param>
		/// <returns><c>true</c> if the text is matched exactly (matching case)
		/// </returns>
		/// <remarks>
		/// TODO: Add a parameter to make it possible to do a case-insensitive match?</remarks>
		/// ------------------------------------------------------------------------------------
		public static bool FindTextInString(ITsString wordFormTss, ITsString sourceTss,
			ILgWritingSystemFactory wsf, bool fMatchWholeWord, out int ichMin, out int ichLim)
		{
			ichMin = ichLim = 0;

			if (wordFormTss == null || sourceTss == null || wordFormTss.Length == 0 ||
				sourceTss.Length == 0)
			{
				// Nothing we can really do
				return false;
			}

			int ichWordForm = 0;
			bool fMatchInProgress = false;
			CpeTracker sourceTracker = new CpeTracker(wsf, sourceTss);
			CpeTracker wordTracker = new CpeTracker(wsf, wordFormTss);
			string wordForm = wordFormTss.Text;
			string sourceText = sourceTss.Text;
			bool fWordFormAndSrcAreBothNormalized = wordForm.IsNormalized() && sourceText.IsNormalized();
			bool fPrevCharWasWordForming = false;
			// Must use local temp variable because some callers pass same variable as both
			// ichMin and ichLim.
			int ichLimT;
			for (int ichSrc = 0; ichSrc < sourceText.Length; ichSrc = ichLimT)
			{
				ichLimT = ichSrc + 1;
				bool fWordForming = sourceTracker.CharPropEngine(ichSrc).get_IsWordForming(sourceText[ichSrc]);
				if (!fMatchInProgress && fPrevCharWasWordForming && fWordForming && fMatchWholeWord)
					continue;

				fPrevCharWasWordForming = fWordForming;
				if (!fMatchInProgress && !fWordForming)
					continue;

				bool fMatch = (wordForm[ichWordForm] == sourceText[ichSrc]);

				if (fMatch)
				{
					ichWordForm++;
				}
				else
				{
					if (sourceText[ichSrc] == kchObject)
						fMatch = true;
					else if (!fWordFormAndSrcAreBothNormalized)
					{
						int ichWfCharLim = ichWordForm + 1;

						while (ichWfCharLim < wordForm.Length &&
							!wordTracker.CharPropEngine(ichWfCharLim).get_IsLetter(wordForm[ichWfCharLim]) &&
							wordTracker.CharPropEngine(ichWfCharLim).get_IsWordForming(wordForm[ichWfCharLim]))
						{
							ichWfCharLim++;
						}

						while (ichLimT < sourceText.Length &&
							!sourceTracker.CharPropEngine(ichLimT).get_IsLetter(sourceText[ichLimT]) &&
							sourceTracker.CharPropEngine(ichLimT).get_IsWordForming(sourceText[ichLimT]))
						{
							ichLimT++;
						}

						if (wordForm.Substring(ichWordForm, ichWfCharLim - ichWordForm).Normalize() ==
							sourceText.Substring(ichSrc, ichLimT - ichSrc).Normalize())
						{
							ichWordForm = ichWfCharLim;
							fMatch = true;
						}
					}
				}

				if (fMatch)
				{
					// If this character is the start of a match but the previous character was
					// word-forming, then this is a bogus match, so we keep looking.
					if (!fMatchInProgress)
					{
						if (ichSrc == 0 || !sourceTracker.CharPropEngine(ichSrc - 1).get_IsWordForming(sourceText[ichSrc - 1]) || !fMatchWholeWord)
						{
							ichMin = ichSrc;
							fMatchInProgress = true;
						}
						else
							ichWordForm = 0;
					}

					if (fMatchInProgress && ichWordForm == wordForm.Length)
					{
						if (++ichSrc < sourceText.Length && sourceTracker.CharPropEngine(ichSrc).get_IsWordForming(sourceText[ichSrc]) &&
							fMatchWholeWord)
						{
							ichWordForm = 0;
							fMatchInProgress = false;
						}
						else
						{
							ichLim = ichLimT;
							return true;
						}
					}
				}
				else
				{
					ichWordForm = 0;
					fMatchInProgress = false;
				}
			}
			return false;
		}
Example #4
0
		/// <summary>
		/// Start it off analyzing a string.
		/// </summary>
		/// <param name="tss"></param>
		/// <param name="cpe">engine to use.</param>
		public WordMaker(ITsString tss, ILgWritingSystemFactory wsf)
		{
			Init(tss);
			m_tracker = new CpeTracker(wsf, tss);
			m_cpe = m_tracker.CharPropEngine(0); // a default for functions that don't depend on wordforming.
		}