/// ------------------------------------------------------------------------------------ /// <summary> /// Gets a list if TextTokenSubstrings conataining the references and character offsets /// where repeated words occur. /// </summary> /// <param name="tokens">The tokens (from the data source) to check for repeated words. /// </param> /// <param name="_desiredKey">If looking for occurrences of a specific repeated word, /// set this to be that word; otherwise pass an empty string.</param> /// <returns></returns> /// ------------------------------------------------------------------------------------ public List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey) { #if DEBUG List <ITextToken> AllTokens = new List <ITextToken>(tokens); #endif characterCategorizer = m_checksDataSource.CharacterCategorizer; // Get a string of words that may be validly repeated. // Words are separated by blanks. ValidItems = m_checksDataSource.GetParameterValue("RepeatableWords"); // List of words that are known to be not repeatable. InvalidItems = m_checksDataSource.GetParameterValue("NonRepeatableWords"); TextType prevTextType = TextType.Other; m_repeatedWords = new List <TextTokenSubstring>(); ProcessRepeatedWords bodyProcessor = new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey); ProcessRepeatedWords noteProcessor = new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey); foreach (ITextToken tok in tokens) { if (tok.IsParagraphStart) { noteProcessor.Reset(); bodyProcessor.Reset(); } if (tok.TextType == TextType.Note) { if (tok.IsNoteStart) { noteProcessor.Reset(); } noteProcessor.ProcessToken(tok); } // When we leave a caption, we start over checking for repeated words. // A caption is a start of a paragraph, so we already start over // when we encounter a picture caption. if (prevTextType == TextType.PictureCaption) { noteProcessor.Reset(); } if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other) { noteProcessor.Reset(); bodyProcessor.ProcessToken(tok); } if (tok.TextType == TextType.ChapterNumber) { bodyProcessor.Reset(); } prevTextType = tok.TextType; } return(m_repeatedWords); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets a list if TextTokenSubstrings conataining the references and character offsets /// where repeated words occur. /// </summary> /// <param name="tokens">The tokens (from the data source) to check for repeated words. /// </param> /// <param name="_desiredKey">If looking for occurrences of a specific repeated word, /// set this to be that word; otherwise pass an empty string.</param> /// <returns></returns> /// ------------------------------------------------------------------------------------ public List<TextTokenSubstring> GetReferences(IEnumerable<ITextToken> tokens, string desiredKey) { #if DEBUG List<ITextToken> AllTokens = new List<ITextToken>(tokens); if (AllTokens.Count == 0) { // Keep the compiler from complaining about assigning to a variable, but not using it. } #endif characterCategorizer = m_checksDataSource.CharacterCategorizer; // Get a string of words that may be validly repeated. // Words are separated by blanks. ValidItems = m_checksDataSource.GetParameterValue("RepeatableWords"); // List of words that are known to be not repeatable. InvalidItems = m_checksDataSource.GetParameterValue("NonRepeatableWords"); TextType prevTextType = TextType.Other; m_repeatedWords = new List<TextTokenSubstring>(); ProcessRepeatedWords bodyProcessor = new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey); ProcessRepeatedWords noteProcessor = new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey); foreach (ITextToken tok in tokens) { if (tok.IsParagraphStart) { noteProcessor.Reset(); bodyProcessor.Reset(); } if (tok.TextType == TextType.Note) { if (tok.IsNoteStart) noteProcessor.Reset(); noteProcessor.ProcessToken(tok); } // When we leave a caption, we start over checking for repeated words. // A caption is a start of a paragraph, so we already start over // when we encounter a picture caption. if (prevTextType == TextType.PictureCaption) noteProcessor.Reset(); if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other) { noteProcessor.Reset(); bodyProcessor.ProcessToken(tok); } if (tok.TextType == TextType.ChapterNumber) bodyProcessor.Reset(); prevTextType = tok.TextType; } return m_repeatedWords; }