Ejemplo n.º 1
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets a list if TextTokenSubstrings conataining the references and character offsets
        /// where repeated words occur.
        /// </summary>
        /// <param name="tokens">The tokens (from the data source) to check for repeated words.
        /// </param>
        /// <param name="_desiredKey">If looking for occurrences of a specific repeated word,
        /// set this to be that word; otherwise pass an empty string.</param>
        /// <returns></returns>
        /// ------------------------------------------------------------------------------------
        public List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey)
        {
#if DEBUG
            List <ITextToken> AllTokens = new List <ITextToken>(tokens);
#endif
            characterCategorizer = m_checksDataSource.CharacterCategorizer;
            // Get a string of words that may be validly repeated.
            // Words are separated by blanks.
            ValidItems = m_checksDataSource.GetParameterValue("RepeatableWords");
            // List of words that are known to be not repeatable.
            InvalidItems = m_checksDataSource.GetParameterValue("NonRepeatableWords");

            TextType prevTextType = TextType.Other;
            m_repeatedWords = new List <TextTokenSubstring>();
            ProcessRepeatedWords bodyProcessor =
                new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey);
            ProcessRepeatedWords noteProcessor =
                new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey);

            foreach (ITextToken tok in tokens)
            {
                if (tok.IsParagraphStart)
                {
                    noteProcessor.Reset();
                    bodyProcessor.Reset();
                }

                if (tok.TextType == TextType.Note)
                {
                    if (tok.IsNoteStart)
                    {
                        noteProcessor.Reset();
                    }
                    noteProcessor.ProcessToken(tok);
                }

                // When we leave a caption, we start over checking for repeated words.
                // A caption is a start of a paragraph, so we already start over
                // when we encounter a picture caption.
                if (prevTextType == TextType.PictureCaption)
                {
                    noteProcessor.Reset();
                }

                if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other)
                {
                    noteProcessor.Reset();
                    bodyProcessor.ProcessToken(tok);
                }

                if (tok.TextType == TextType.ChapterNumber)
                {
                    bodyProcessor.Reset();
                }

                prevTextType = tok.TextType;
            }

            return(m_repeatedWords);
        }
Ejemplo n.º 2
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Gets a list if TextTokenSubstrings conataining the references and character offsets
		/// where repeated words occur.
		/// </summary>
		/// <param name="tokens">The tokens (from the data source) to check for repeated words.
		/// </param>
		/// <param name="_desiredKey">If looking for occurrences of a specific repeated word,
		/// set this to be that word; otherwise pass an empty string.</param>
		/// <returns></returns>
		/// ------------------------------------------------------------------------------------
		public List<TextTokenSubstring> GetReferences(IEnumerable<ITextToken> tokens, string desiredKey)
		{
#if DEBUG
			List<ITextToken> AllTokens = new List<ITextToken>(tokens);
			if (AllTokens.Count == 0)
			{
				// Keep the compiler from complaining about assigning to a variable, but not using it.
			}
#endif
			characterCategorizer = m_checksDataSource.CharacterCategorizer;
			// Get a string of words that may be validly repeated.
			// Words are separated by blanks.
			ValidItems = m_checksDataSource.GetParameterValue("RepeatableWords");
			// List of words that are known to be not repeatable.
			InvalidItems = m_checksDataSource.GetParameterValue("NonRepeatableWords");

			TextType prevTextType = TextType.Other;
			m_repeatedWords = new List<TextTokenSubstring>();
			ProcessRepeatedWords bodyProcessor =
				new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey);
			ProcessRepeatedWords noteProcessor =
				new ProcessRepeatedWords(characterCategorizer, m_repeatedWords, desiredKey);

			foreach (ITextToken tok in tokens)
			{
				if (tok.IsParagraphStart)
				{
					noteProcessor.Reset();
					bodyProcessor.Reset();
				}

				if (tok.TextType == TextType.Note)
				{
					if (tok.IsNoteStart)
						noteProcessor.Reset();
					noteProcessor.ProcessToken(tok);
				}

				// When we leave a caption, we start over checking for repeated words.
				// A caption is a start of a paragraph, so we already start over
				// when we encounter a picture caption.
				if (prevTextType == TextType.PictureCaption)
					noteProcessor.Reset();

				if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other)
				{
					noteProcessor.Reset();
					bodyProcessor.ProcessToken(tok);
				}

				if (tok.TextType == TextType.ChapterNumber)
					bodyProcessor.Reset();

				prevTextType = tok.TextType;
			}

			return m_repeatedWords;
		}