/// ------------------------------------------------------------------------------------ /// <summary> /// Return a TextTokenSubstring for all occurances of the desiredKey. /// </summary> /// <param name="tokens"></param> /// <param name="desiredKey">e.g., _[_ or empty string to look for all patterns</param> /// <returns></returns> /// ------------------------------------------------------------------------------------ public List<TextTokenSubstring> GetReferences(IEnumerable<ITextToken> tokens, string desiredKey) { #if DEBUG List<ITextToken> AllTokens = new List<ITextToken>(tokens); if (AllTokens.Count == 0) { // Keep the compiler from complaining about assigning to a variable, but not using it. } #endif m_characterCategorizer = m_checksDataSource.CharacterCategorizer; string sXmlMatchedPairs = m_checksDataSource.GetParameterValue("PunctuationPatterns"); if (sXmlMatchedPairs != null && sXmlMatchedPairs.Trim().Length > 0) { m_validItemsList = new List<string>(); m_invalidItemsList = new List<string>(); PuncPatternsList puncPatternsList = PuncPatternsList.Load(sXmlMatchedPairs, m_checksDataSource.GetParameterValue("DefaultWritingSystemName")); foreach (PuncPattern pattern in puncPatternsList) { if (pattern.Valid) m_validItemsList.Add(pattern.Pattern); else m_invalidItemsList.Add(pattern.Pattern); } } else { ValidItems = m_checksDataSource.GetParameterValue(kValidItemsParameter); InvalidItems = m_checksDataSource.GetParameterValue(kInvalidItemsParameter); } string sLevel = m_checksDataSource.GetParameterValue("PunctCheckLevel"); CheckingLevel level; switch (sLevel) { case "Advanced": level = CheckingLevel.Advanced; break; case "Intermediate": level = CheckingLevel.Intermediate; break; case "Basic": default: level = CheckingLevel.Basic; break; } string sWhitespaceRep = m_checksDataSource.GetParameterValue("PunctWhitespaceChar"); if (!String.IsNullOrEmpty(sWhitespaceRep)) s_whitespaceRep = sWhitespaceRep.Substring(0, 1); string preferredLocale = m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty; QuotationMarkCategorizer quotationCategorizer = new QuotationMarkCategorizer(m_checksDataSource); // create processing state machines, one for body text, one for notes ProcessPunctationTokens bodyProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); ProcessPunctationTokens noteProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); m_punctuationSequences = new List<TextTokenSubstring>(); // build list of note and non-note tokens foreach (ITextToken tok in tokens) { if (tok.Text == null || (tok.Locale ?? string.Empty) != preferredLocale) continue; if (tok.TextType == TextType.Note) { // if a new note is starting finalize any punctuation sequences from the previous note if (tok.IsNoteStart) noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); noteProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other) { // body text: finalize any note that was in progress and continue with body text noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.IsParagraphStart) { bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.TreatAsParagraphStart = true; } } noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); return m_punctuationSequences; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Return a TextTokenSubstring for all occurances of the desiredKey. /// </summary> /// <param name="tokens"></param> /// <param name="desiredKey">e.g., _[_ or empty string to look for all patterns</param> /// <returns></returns> /// ------------------------------------------------------------------------------------ public List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey) { #if DEBUG List <ITextToken> AllTokens = new List <ITextToken>(tokens); if (AllTokens.Count == 0) { // Keep the compiler from complaining about assigning to a variable, but not using it. } #endif m_characterCategorizer = m_checksDataSource.CharacterCategorizer; string sXmlMatchedPairs = m_checksDataSource.GetParameterValue("PunctuationPatterns"); if (sXmlMatchedPairs != null && sXmlMatchedPairs.Trim().Length > 0) { m_validItemsList = new List <string>(); m_invalidItemsList = new List <string>(); PuncPatternsList puncPatternsList = PuncPatternsList.Load(sXmlMatchedPairs, m_checksDataSource.GetParameterValue("DefaultWritingSystemName")); foreach (PuncPattern pattern in puncPatternsList) { if (pattern.Valid) { m_validItemsList.Add(pattern.Pattern); } else { m_invalidItemsList.Add(pattern.Pattern); } } } else { ValidItems = m_checksDataSource.GetParameterValue(kValidItemsParameter); InvalidItems = m_checksDataSource.GetParameterValue(kInvalidItemsParameter); } string sLevel = m_checksDataSource.GetParameterValue("PunctCheckLevel"); CheckingLevel level; switch (sLevel) { case "Advanced": level = CheckingLevel.Advanced; break; case "Intermediate": level = CheckingLevel.Intermediate; break; case "Basic": default: level = CheckingLevel.Basic; break; } string sWhitespaceRep = m_checksDataSource.GetParameterValue("PunctWhitespaceChar"); if (!String.IsNullOrEmpty(sWhitespaceRep)) { s_whitespaceRep = sWhitespaceRep.Substring(0, 1); } string preferredLocale = m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty; QuotationMarkCategorizer quotationCategorizer = new QuotationMarkCategorizer(m_checksDataSource); // create processing state machines, one for body text, one for notes ProcessPunctationTokens bodyProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); ProcessPunctationTokens noteProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); m_punctuationSequences = new List <TextTokenSubstring>(); // build list of note and non-note tokens foreach (ITextToken tok in tokens) { if (tok.Text == null || (tok.Locale ?? string.Empty) != preferredLocale) { continue; } if (tok.TextType == TextType.Note) { // if a new note is starting finalize any punctuation sequences from the previous note if (tok.IsNoteStart) { noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); } noteProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other) { // body text: finalize any note that was in progress and continue with body text noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.IsParagraphStart) { bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.TreatAsParagraphStart = true; } } noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); return(m_punctuationSequences); }