/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="ProcessPunctationTokens"/> class. /// </summary> /// <param name="categorizer">The categorizer.</param> /// <param name="quotationCategorizer">The quotation categorizer.</param> /// <param name="level">Indicator to determine how much to combine contiguous /// punctuation sequences into patterns. Advanced = All contiguous punctuation and /// whitespace characters form a single pattern; Intermediate = Contiguous punctuation /// forms a single pattern (delimeted by whitespace); Basic = Each punctuation character /// stands alone. In all three modes, whitespace before and/or after a punctuation token /// indicates whether is is word-initial, word-medial, word-final, or isolated</param> /// ------------------------------------------------------------------------------------ public ProcessPunctationTokens(CharacterCategorizer categorizer, QuotationMarkCategorizer quotationCategorizer, CheckingLevel level) { m_categorizer = categorizer; m_quotationCategorizer = quotationCategorizer; m_level = level; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Return a TextTokenSubstring for all occurances of the desiredKey. /// </summary> /// <param name="tokens"></param> /// <param name="desiredKey">e.g., _[_ or empty string to look for all patterns</param> /// <returns></returns> /// ------------------------------------------------------------------------------------ public List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey) { #if DEBUG List <ITextToken> AllTokens = new List <ITextToken>(tokens); if (AllTokens.Count == 0) { // Keep the compiler from complaining about assigning to a variable, but not using it. } #endif m_characterCategorizer = m_checksDataSource.CharacterCategorizer; string sXmlMatchedPairs = m_checksDataSource.GetParameterValue("PunctuationPatterns"); if (sXmlMatchedPairs != null && sXmlMatchedPairs.Trim().Length > 0) { m_validItemsList = new List <string>(); m_invalidItemsList = new List <string>(); PuncPatternsList puncPatternsList = PuncPatternsList.Load(sXmlMatchedPairs, m_checksDataSource.GetParameterValue("DefaultWritingSystemName")); foreach (PuncPattern pattern in puncPatternsList) { if (pattern.Valid) { m_validItemsList.Add(pattern.Pattern); } else { m_invalidItemsList.Add(pattern.Pattern); } } } else { ValidItems = m_checksDataSource.GetParameterValue(kValidItemsParameter); InvalidItems = m_checksDataSource.GetParameterValue(kInvalidItemsParameter); } string sLevel = m_checksDataSource.GetParameterValue("PunctCheckLevel"); CheckingLevel level; switch (sLevel) { case "Advanced": level = CheckingLevel.Advanced; break; case "Intermediate": level = CheckingLevel.Intermediate; break; case "Basic": default: level = CheckingLevel.Basic; break; } string sWhitespaceRep = m_checksDataSource.GetParameterValue("PunctWhitespaceChar"); if (!String.IsNullOrEmpty(sWhitespaceRep)) { s_whitespaceRep = sWhitespaceRep.Substring(0, 1); } string preferredLocale = m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty; QuotationMarkCategorizer quotationCategorizer = new QuotationMarkCategorizer(m_checksDataSource); // create processing state machines, one for body text, one for notes ProcessPunctationTokens bodyProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); ProcessPunctationTokens noteProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); m_punctuationSequences = new List <TextTokenSubstring>(); // build list of note and non-note tokens foreach (ITextToken tok in tokens) { if (tok.Text == null || (tok.Locale ?? string.Empty) != preferredLocale) { continue; } if (tok.TextType == TextType.Note) { // if a new note is starting finalize any punctuation sequences from the previous note if (tok.IsNoteStart) { noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); } noteProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other) { // body text: finalize any note that was in progress and continue with body text noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.IsParagraphStart) { bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.TreatAsParagraphStart = true; } } noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); return(m_punctuationSequences); }