Quotation Mark Categorizer (don't ask!)
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="ProcessPunctationTokens"/> class.
		/// </summary>
		/// <param name="categorizer">The categorizer.</param>
		/// <param name="quotationCategorizer">The quotation categorizer.</param>
		/// <param name="level">Indicator to determine how much to combine contiguous
		/// punctuation sequences into patterns. Advanced = All contiguous punctuation and
		/// whitespace characters form a single pattern; Intermediate = Contiguous punctuation
		/// forms a single pattern (delimeted by whitespace); Basic = Each punctuation character
		/// stands alone. In all three modes, whitespace before and/or after a punctuation token
		/// indicates whether is is word-initial, word-medial, word-final, or isolated</param>
		/// ------------------------------------------------------------------------------------
		public ProcessPunctationTokens(CharacterCategorizer categorizer,
			QuotationMarkCategorizer quotationCategorizer, CheckingLevel level)
		{
			m_categorizer = categorizer;
			m_quotationCategorizer = quotationCategorizer;
			m_level = level;
		}
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Return a TextTokenSubstring for all occurances of the desiredKey.
		/// </summary>
		/// <param name="tokens"></param>
		/// <param name="desiredKey">e.g., _[_ or empty string to look for all patterns</param>
		/// <returns></returns>
		/// ------------------------------------------------------------------------------------
		public List<TextTokenSubstring> GetReferences(IEnumerable<ITextToken> tokens, string desiredKey)
		{
#if DEBUG
			List<ITextToken> AllTokens = new List<ITextToken>(tokens);
			if (AllTokens.Count == 0)
			{
				// Keep the compiler from complaining about assigning to a variable, but not using it.
			}
#endif
			m_characterCategorizer = m_checksDataSource.CharacterCategorizer;
			string sXmlMatchedPairs = m_checksDataSource.GetParameterValue("PunctuationPatterns");
			if (sXmlMatchedPairs != null && sXmlMatchedPairs.Trim().Length > 0)
			{
				m_validItemsList = new List<string>();
				m_invalidItemsList = new List<string>();
				PuncPatternsList puncPatternsList = PuncPatternsList.Load(sXmlMatchedPairs,
					m_checksDataSource.GetParameterValue("DefaultWritingSystemName"));
				foreach (PuncPattern pattern in puncPatternsList)
				{
					if (pattern.Valid)
						m_validItemsList.Add(pattern.Pattern);
					else
						m_invalidItemsList.Add(pattern.Pattern);
				}
			}
			else
			{
				ValidItems = m_checksDataSource.GetParameterValue(kValidItemsParameter);
				InvalidItems = m_checksDataSource.GetParameterValue(kInvalidItemsParameter);
			}

			string sLevel = m_checksDataSource.GetParameterValue("PunctCheckLevel");
			CheckingLevel level;
			switch (sLevel)
			{
				case "Advanced": level = CheckingLevel.Advanced; break;
				case "Intermediate": level = CheckingLevel.Intermediate; break;
				case "Basic":
				default:
					level = CheckingLevel.Basic;
					break;
			}
			string sWhitespaceRep = m_checksDataSource.GetParameterValue("PunctWhitespaceChar");
			if (!String.IsNullOrEmpty(sWhitespaceRep))
				s_whitespaceRep = sWhitespaceRep.Substring(0, 1);
			string preferredLocale =
				m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty;

			QuotationMarkCategorizer quotationCategorizer =
				new QuotationMarkCategorizer(m_checksDataSource);

			// create processing state machines, one for body text, one for notes
			ProcessPunctationTokens bodyProcessor = new ProcessPunctationTokens(
				m_characterCategorizer, quotationCategorizer, level);

			ProcessPunctationTokens noteProcessor =	new ProcessPunctationTokens(
				m_characterCategorizer, quotationCategorizer, level);

			m_punctuationSequences = new List<TextTokenSubstring>();

			// build list of note and non-note tokens
			foreach (ITextToken tok in tokens)
			{
				if (tok.Text == null || (tok.Locale ?? string.Empty) != preferredLocale)
					continue;

				if (tok.TextType == TextType.Note)
				{
					// if a new note is starting finalize any punctuation sequences from the previous note
					if (tok.IsNoteStart)
						noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
					noteProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences);
				}
				else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other)
				{
					// body text: finalize any note that was in progress and continue with body text
					noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
					bodyProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences);
				}
				else if (tok.IsParagraphStart)
				{
					bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
					bodyProcessor.TreatAsParagraphStart = true;
				}
			}

			noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
			bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);

			return m_punctuationSequences;
		}
Exemple #3
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Gets a list if TextTokenSubstrings containing the references and character offsets
		/// where quotation problems occur.
		/// </summary>
		/// <param name="tokens">The tokens (from the data source) to check for quotation problems.</param>
		/// <param name="desiredKey">empty string.</param>
		/// ------------------------------------------------------------------------------------
		public List<TextTokenSubstring> GetReferences(IEnumerable<ITextToken> tokens, string desiredKey)
		{
			m_charCategorizer = m_chkDataSource.CharacterCategorizer;
			ValidItems = m_chkDataSource.GetParameterValue(m_validItemsParameter);
			InvalidItems = m_chkDataSource.GetParameterValue(m_invalidItemsParameter);

			QuotationMarkCategorizer qmCategorizer = new QuotationMarkCategorizer(m_chkDataSource);
			m_qmProblems = new List<TextTokenSubstring>();

			QTokenProcessor bodyProcessor =	new QTokenProcessor(m_chkDataSource,
				m_charCategorizer, qmCategorizer, desiredKey, m_qmProblems);

			QTokenProcessor noteProcessor =	new QTokenProcessor(m_chkDataSource,
				m_charCategorizer, qmCategorizer, desiredKey, m_qmProblems);

			VerseTextToken scrToken = new VerseTextToken();
			foreach (ITextToken tok in tokens)
			{
				if (tok.TextType == TextType.Note)
				{
					// If a new note is starting finalize any sequences from the previous note.
					if (tok.IsNoteStart)
						noteProcessor.FinalizeResult();
					noteProcessor.ProcessToken(tok, null);
				}
				else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other ||
					tok.IsParagraphStart)
				{
					scrToken.Token = tok;
					// body text: finalize any note that was in progress and continue with body text
					noteProcessor.FinalizeResult();
					bodyProcessor.ProcessToken(tok, scrToken);
				}
			}

			noteProcessor.FinalizeResult();
			bodyProcessor.FinalizeResult();
			return m_qmProblems;
		}
Exemple #4
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="QuotationRelatedTokensProcessor"/> class.
		/// </summary>
		/// <param name="source">The checks data source.</param>
		/// <param name="charCategorizer">The character categorizer.</param>
		/// <param name="qmCategorizer">The quotation mark categorizer.</param>
		/// <param name="desiredKey">The desired key (can be string.Empty).</param>
		/// <param name="results">The result.</param>
		/// ------------------------------------------------------------------------------------
		internal QTokenProcessor(IChecksDataSource dataSource,
			CharacterCategorizer charCategorizer, QuotationMarkCategorizer qmCategorizer,
			string desiredKey, List<TextTokenSubstring> results)
		{
			m_chkDataSource = dataSource;
			m_charCategorizer = charCategorizer;
			m_qmCategorizer = qmCategorizer;
			m_desiredKey = desiredKey;
			m_results = results;
			m_verboseQuotes = (m_chkDataSource.GetParameterValue("VerboseQuotes") == "Yes");
			m_noCloserMsg = Localize("Unmatched opening mark: level {0}");
			m_noOpenerMsg = Localize("Unmatched closing mark: level {0}");
			m_regExQuotes = new Regex(qmCategorizer.Pattern);

			m_regExNonQuotes = new Regex(string.Format("[^{0}|\\s]",
				qmCategorizer.Pattern.Replace("]", "\\]"))); // Make sure brackets are escaped
		}
Exemple #5
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="QuotationMarkToken"/> class.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		internal QuotationMarkToken(TextTokenSubstring tts, QuotationMarkCategorizer categorizer,
			bool fIsOpener, bool fPossibleContinuer)
		{
			Tts = tts;
			m_categorizer = categorizer;
			m_fIsOpener = fIsOpener;
			m_fPossibleContinuer = fPossibleContinuer;
		}