/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="ProcessPunctationTokens"/> class. /// </summary> /// <param name="categorizer">The categorizer.</param> /// <param name="quotationCategorizer">The quotation categorizer.</param> /// <param name="level">Indicator to determine how much to combine contiguous /// punctuation sequences into patterns. Advanced = All contiguous punctuation and /// whitespace characters form a single pattern; Intermediate = Contiguous punctuation /// forms a single pattern (delimeted by whitespace); Basic = Each punctuation character /// stands alone. In all three modes, whitespace before and/or after a punctuation token /// indicates whether is is word-initial, word-medial, word-final, or isolated</param> /// ------------------------------------------------------------------------------------ public ProcessPunctationTokens(CharacterCategorizer categorizer, QuotationMarkCategorizer quotationCategorizer, CheckingLevel level) { m_categorizer = categorizer; m_quotationCategorizer = quotationCategorizer; m_level = level; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Return a TextTokenSubstring for all occurances of the desiredKey. /// </summary> /// <param name="tokens"></param> /// <param name="desiredKey">e.g., _[_ or empty string to look for all patterns</param> /// <returns></returns> /// ------------------------------------------------------------------------------------ public List<TextTokenSubstring> GetReferences(IEnumerable<ITextToken> tokens, string desiredKey) { #if DEBUG List<ITextToken> AllTokens = new List<ITextToken>(tokens); if (AllTokens.Count == 0) { // Keep the compiler from complaining about assigning to a variable, but not using it. } #endif m_characterCategorizer = m_checksDataSource.CharacterCategorizer; string sXmlMatchedPairs = m_checksDataSource.GetParameterValue("PunctuationPatterns"); if (sXmlMatchedPairs != null && sXmlMatchedPairs.Trim().Length > 0) { m_validItemsList = new List<string>(); m_invalidItemsList = new List<string>(); PuncPatternsList puncPatternsList = PuncPatternsList.Load(sXmlMatchedPairs, m_checksDataSource.GetParameterValue("DefaultWritingSystemName")); foreach (PuncPattern pattern in puncPatternsList) { if (pattern.Valid) m_validItemsList.Add(pattern.Pattern); else m_invalidItemsList.Add(pattern.Pattern); } } else { ValidItems = m_checksDataSource.GetParameterValue(kValidItemsParameter); InvalidItems = m_checksDataSource.GetParameterValue(kInvalidItemsParameter); } string sLevel = m_checksDataSource.GetParameterValue("PunctCheckLevel"); CheckingLevel level; switch (sLevel) { case "Advanced": level = CheckingLevel.Advanced; break; case "Intermediate": level = CheckingLevel.Intermediate; break; case "Basic": default: level = CheckingLevel.Basic; break; } string sWhitespaceRep = m_checksDataSource.GetParameterValue("PunctWhitespaceChar"); if (!String.IsNullOrEmpty(sWhitespaceRep)) s_whitespaceRep = sWhitespaceRep.Substring(0, 1); string preferredLocale = m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty; QuotationMarkCategorizer quotationCategorizer = new QuotationMarkCategorizer(m_checksDataSource); // create processing state machines, one for body text, one for notes ProcessPunctationTokens bodyProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); ProcessPunctationTokens noteProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); m_punctuationSequences = new List<TextTokenSubstring>(); // build list of note and non-note tokens foreach (ITextToken tok in tokens) { if (tok.Text == null || (tok.Locale ?? string.Empty) != preferredLocale) continue; if (tok.TextType == TextType.Note) { // if a new note is starting finalize any punctuation sequences from the previous note if (tok.IsNoteStart) noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); noteProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other) { // body text: finalize any note that was in progress and continue with body text noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.IsParagraphStart) { bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.TreatAsParagraphStart = true; } } noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); return m_punctuationSequences; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets a list if TextTokenSubstrings containing the references and character offsets /// where quotation problems occur. /// </summary> /// <param name="tokens">The tokens (from the data source) to check for quotation problems.</param> /// <param name="desiredKey">empty string.</param> /// ------------------------------------------------------------------------------------ public List<TextTokenSubstring> GetReferences(IEnumerable<ITextToken> tokens, string desiredKey) { m_charCategorizer = m_chkDataSource.CharacterCategorizer; ValidItems = m_chkDataSource.GetParameterValue(m_validItemsParameter); InvalidItems = m_chkDataSource.GetParameterValue(m_invalidItemsParameter); QuotationMarkCategorizer qmCategorizer = new QuotationMarkCategorizer(m_chkDataSource); m_qmProblems = new List<TextTokenSubstring>(); QTokenProcessor bodyProcessor = new QTokenProcessor(m_chkDataSource, m_charCategorizer, qmCategorizer, desiredKey, m_qmProblems); QTokenProcessor noteProcessor = new QTokenProcessor(m_chkDataSource, m_charCategorizer, qmCategorizer, desiredKey, m_qmProblems); VerseTextToken scrToken = new VerseTextToken(); foreach (ITextToken tok in tokens) { if (tok.TextType == TextType.Note) { // If a new note is starting finalize any sequences from the previous note. if (tok.IsNoteStart) noteProcessor.FinalizeResult(); noteProcessor.ProcessToken(tok, null); } else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other || tok.IsParagraphStart) { scrToken.Token = tok; // body text: finalize any note that was in progress and continue with body text noteProcessor.FinalizeResult(); bodyProcessor.ProcessToken(tok, scrToken); } } noteProcessor.FinalizeResult(); bodyProcessor.FinalizeResult(); return m_qmProblems; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="QuotationRelatedTokensProcessor"/> class. /// </summary> /// <param name="source">The checks data source.</param> /// <param name="charCategorizer">The character categorizer.</param> /// <param name="qmCategorizer">The quotation mark categorizer.</param> /// <param name="desiredKey">The desired key (can be string.Empty).</param> /// <param name="results">The result.</param> /// ------------------------------------------------------------------------------------ internal QTokenProcessor(IChecksDataSource dataSource, CharacterCategorizer charCategorizer, QuotationMarkCategorizer qmCategorizer, string desiredKey, List<TextTokenSubstring> results) { m_chkDataSource = dataSource; m_charCategorizer = charCategorizer; m_qmCategorizer = qmCategorizer; m_desiredKey = desiredKey; m_results = results; m_verboseQuotes = (m_chkDataSource.GetParameterValue("VerboseQuotes") == "Yes"); m_noCloserMsg = Localize("Unmatched opening mark: level {0}"); m_noOpenerMsg = Localize("Unmatched closing mark: level {0}"); m_regExQuotes = new Regex(qmCategorizer.Pattern); m_regExNonQuotes = new Regex(string.Format("[^{0}|\\s]", qmCategorizer.Pattern.Replace("]", "\\]"))); // Make sure brackets are escaped }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="QuotationMarkToken"/> class. /// </summary> /// ------------------------------------------------------------------------------------ internal QuotationMarkToken(TextTokenSubstring tts, QuotationMarkCategorizer categorizer, bool fIsOpener, bool fPossibleContinuer) { Tts = tts; m_categorizer = categorizer; m_fIsOpener = fIsOpener; m_fPossibleContinuer = fPossibleContinuer; }