Пример #1
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets the length of the character including any associated diacritics that follow
        /// the base character.
        /// </summary>
        /// <param name="tok">The text token.</param>
        /// <param name="iBaseCharacter">The index of the base character in the text token.</param>
        /// <returns>length of the character, including all following diacritics</returns>
        /// ------------------------------------------------------------------------------------
        private int GetLengthOfChar(ITextToken tok, int iBaseCharacter)
        {
            int charLength = 1;
            int iChar      = iBaseCharacter + 1;

            while (iChar < tok.Text.Length && m_categorizer.IsDiacritic(tok.Text[iChar++]))
            {
                charLength++;
            }

            return(charLength);
        }
Пример #2
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Records the paragraph style.
 /// </summary>
 /// <param name="tok">The Scripture token.</param>
 /// ------------------------------------------------------------------------------------
 private void RecordParagraphStyle(ITextToken tok)
 {
     if (tok.IsParagraphStart)
     {
         m_paragraphStyle     = tok.ParaStyleName;
         m_foundParagraphText = false;
         if (m_processParagraphsSeparately)
         {
             m_fAtSentenceStart = false;
         }
     }
 }
Пример #3
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="TextTokenSubstring"/> class with a
		/// single source token.
		/// </summary>
		/// <param name="token">The token.</param>
		/// <param name="offset">The offset.</param>
		/// <param name="length">The length.</param>
		/// <param name="msg">The error message.</param>
		/// ------------------------------------------------------------------------------------
		public TextTokenSubstring(ITextToken token, int offset, int length, string msg) /*:
			this(new List<ITextToken>(new[] { token }), offset, length, msg)*/
		{
			if (offset < 0)
				throw new ArgumentOutOfRangeException("offset", "Offset must be 0 or greater.");
			if (offset > token.Text.Length)
				throw new ArgumentOutOfRangeException("offset");
			if (length < 0)
				throw new ArgumentOutOfRangeException("length", "Length must be 0 or greater.");
			if (offset + length > token.Text.Length)
				throw new ArgumentOutOfRangeException("length");
			m_tokens = new List<ITextToken>(new ITextToken[] { token });
			m_offset = offset;
			m_length = length;
			m_message = msg;
		}
Пример #4
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Processes the Scripture token.
        /// </summary>
        /// <param name="tok">The token.</param>
        /// <param name="result">The result.</param>
        /// ------------------------------------------------------------------------------------
        public void ProcessToken(ITextToken tok, List <TextTokenSubstring> result)
        {
            string tokenText = RemoveAbbreviations(tok);

            RecordParagraphStyle(tok);
            RecordCharacterStyle(tok);

            // must be at least one character in token to check the case of
            if (tok.Text == String.Empty)
            {
                return;
            }

            for (int iChar = 0; iChar < tokenText.Length; iChar++)
            {
                char ch = tokenText[iChar];

                if (IsSentenceFinalPunctuation(ch))
                {
                    m_fAtSentenceStart = iChar + 1 == tokenText.Length ||
                                         (iChar + 1 < tokenText.Length && !char.IsDigit(tokenText[iChar + 1]));
                    continue;
                }

                if (!m_categorizer.IsWordFormingCharacter(ch))
                {
                    continue;
                }

                if (m_categorizer.IsLower(ch))
                {
                    TextTokenSubstring tts = GetSubstring(tok, iChar);

                    if (!CheckForParaCapitalizationError(tok, tts, result) &&
                        !CheckForCharStyleCapilizationError(tok, tts, result) &&
                        m_fAtSentenceStart)
                    {
                        tts.Message = CapitalizationCheck.GetErrorMessage(m_checksDataSource,
                                                                          StyleCapInfo.CapCheckTypes.SentenceInitial, string.Empty);
                        result.Add(tts);
                    }
                }
                m_fAtSentenceStart   = false;
                m_foundCharacterText = true;
                m_foundParagraphText = true;
            }
        }
Пример #5
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Checks for missing verses in the current chapter.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void CheckForMissingVerses(ITextToken[] versesFound, int bookId, int chapNumber)
        {
            ITextToken prevToken = versesFound[0];

            for (int verse = 1; verse < versesFound.Length; verse++)
            {
                if (versesFound[verse] != null)
                {
                    prevToken = versesFound[verse];
                    continue;
                }

                // At this point, we know we've found a missing verse. Now we need
                // to determine whether or not this is the first verse in a range
                // of missing verses or just a single missing verse.
                int startVerse = verse;
                int endVerse   = verse;
                while (endVerse < versesFound.Length - 1 && versesFound[endVerse + 1] == null)
                {
                    endVerse++;
                }

                prevToken.MissingStartRef = new BCVRef(bookId, chapNumber, startVerse);

                // If previous token is a verse token and it's verse 1 that's missing,
                // then we know we're dealing with the case of a missing chapter token
                // and a missing verse 1 token in that chapter. In that case, we want
                // the offset to fall just before the verse of the token (which is the
                // first verse token we found in the chapter and which we're assuming
                // is associated with a verse that would come after verse 1).
                int offset = (prevToken.TextType == TextType.VerseNumber && verse == 1 ?
                              0 : prevToken.Text.Length);

                if (startVerse == endVerse)
                {
                    AddError(prevToken, offset, 0, Localize("Missing verse number {0}"), startVerse);
                }
                else
                {
                    prevToken.MissingEndRef = new BCVRef(bookId, chapNumber, endVerse);
                    AddError(prevToken, offset, 0, Localize("Missing verse numbers {0}-{1}"),
                             startVerse, endVerse);
                }

                verse = endVerse;
            }
        }
Пример #6
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Extract the punctuation sequences from this token
        /// </summary>
        /// <param name="tok"></param>
        /// <param name="desiredKey"></param>
        /// <param name="result"></param>
        /// ------------------------------------------------------------------------------------
        public void ProcessToken(ITextToken tok, string desiredKey, List <TextTokenSubstring> result)
        {
            if (tok.IsParagraphStart || m_fTreatAsParagraphStart)
            {
                ProcessWhitespaceOrParagraph(true);
                m_fTreatAsParagraphStart = false;
            }

            // for each character in token
            for (int i = 0; i < tok.Text.Length; ++i)
            {
                char cc = tok.Text[i];
                if (m_categorizer.IsPunctuation(cc))
                {
                    ProcessPunctuation(tok, i);
                }
                else if (char.IsDigit(cc))
                {
                    // If the previous finalized was done with a number,
                    // and we have a single punctuation mark
                    // followed by another number, ignore this sequence,
                    // e.g. 3:14
                    if (m_finalizedWithNumber && m_puncts.Count == 1 &&
                        m_puncts[0].TokenType == PunctuationTokenType.punctuation)
                    {
                        m_puncts.Clear();
                    }
                    else
                    {
                        ProcessDigit(tok, i);
                        FinalizeResult(desiredKey, result, false);
                    }
                }
                else if (char.IsWhiteSpace(cc))
                {
                    ProcessWhitespaceOrParagraph(false);
                }
                else
                {
                    // if not punctuation, whitespace, or digit; it must be the start of a new word
                    // therefore finalize any open punctuation sequence
                    FinalizeResult(desiredKey, result, false);
                }
            }
        }
Пример #7
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Checks for missing chapters in the current book.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void CheckForMissingChapters(bool[] chaptersFound)
        {
            for (int chap = 1; chap < chaptersFound.Length; chap++)
            {
                if (chaptersFound[chap] || (m_nChapterToCheck != 0 && chap != m_nChapterToCheck))
                {
                    continue;
                }

                // Find the first chapter token that immediately precedes where the
                // missing chapter would have a token if it weren't missing.
                ChapterToken precedingChapter = null;
                foreach (ChapterToken chapToken in m_chapTokens)
                {
                    if (chapToken.ChapterNumber > chap)
                    {
                        break;
                    }
                    precedingChapter = chapToken;
                }

                // TODO: Deal with what token to use if a book has no chapters at all.
                // This should always succeed
                int        offset = 0;
                ITextToken token  = null;
                if (precedingChapter != null)
                {
                    token  = precedingChapter.Token;
                    offset = precedingChapter.Implicit ? 0 : token.Text.Length;
                }
                else if (m_chapTokens.Count > 0)
                {
                    token = m_chapTokens[0].Token;
                }

                if (token != null)
                {
                    BCVRef scrRefStart = new BCVRef(BCVRef.BookToNumber(token.ScrRefString), chap, 0);
                    token.MissingStartRef = scrRefStart;
                    token.MissingEndRef   = null;
                    AddError(token, offset, 0, Localize("Missing chapter number {0}"), chap);
                }
            }
        }
Пример #8
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        ///
        /// </summary>
        /// ------------------------------------------------------------------------------------
        public void ProcessToken(ITextToken tok, string desiredKey, List <TextTokenSubstring> result)
        {
            if (AnyFoundPairsClosedByPara && tok.IsParagraphStart &&
                !m_styleCategorizer.IsPoeticStyle(tok.ParaStyleName))
            {
                FinalizeResult(desiredKey, result);
            }

            for (int i = 0; i < tok.Text.Length; i++)
            {
                string cc = tok.Text.Substring(i, 1);
                if (m_pairList.BelongsToPair(cc))
                {
                    StoreFoundPairToken(tok, i);
                    RemoveMatchedPunctAtEndOfFirstWordInIntroOutline(tok, i);
                    RemoveIfMatchedPairFound();
                    RecordOverlappingPairs();
                }
            }
        }
Пример #9
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Removes the abbreviations from a Scripture token.
        /// </summary>
        /// <param name="tok">The Scripture token.</param>
        /// <returns>Scripture token with any abbreviations replaced with spaces.</returns>
        /// ------------------------------------------------------------------------------------
        private string RemoveAbbreviations(ITextToken tok)
        {
            string tokenText = tok.Text;

            foreach (string abbreviation in m_abbreviations)
            {
                if (abbreviation == "")
                {
                    continue;
                }

                string spaces = new string(' ', abbreviation.Length);
                tokenText = tokenText.Replace(abbreviation, spaces);
            }

            Debug.Assert(tok.Text.Length == tokenText.Length,
                         "Length of text should not change",
                         "Abbreviations are replaced by spaces, but the overall text length should stay the same.");
            return(tokenText);
        }
Пример #10
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 ///
 /// </summary>
 /// ------------------------------------------------------------------------------------
 internal ChapterToken(ITextToken token, Regex chapterNumberFormat)
 {
     Token        = token;
     m_chapNumber = 0;
     if (!chapterNumberFormat.IsMatch(Token.Text))
     {
         Valid = false;
     }
     foreach (char ch in token.Text)
     {
         if (Char.IsDigit(ch))
         {
             m_chapNumber *= 10;
             m_chapNumber += (int)Char.GetNumericValue(ch);
         }
         else
         {
             Valid        = false;
             m_chapNumber = -1;
             break;
         }
     }
     Implicit = false;
 }
Пример #11
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Initializes a new instance of the <see cref="TextTokenSubstring"/> class with a
 /// single source token.
 /// </summary>
 /// <param name="token">The token.</param>
 /// <param name="offset">The offset.</param>
 /// <param name="length">The length.</param>
 /// <param name="msg">The error message.</param>
 /// ------------------------------------------------------------------------------------
 public TextTokenSubstring(ITextToken token, int offset, int length, string msg)         /*:
                                                                                          * this(new List<ITextToken>(new[] { token }), offset, length, msg)*/
 {
     if (offset < 0)
     {
         throw new ArgumentOutOfRangeException("offset", "Offset must be 0 or greater.");
     }
     if (offset > token.Text.Length)
     {
         throw new ArgumentOutOfRangeException("offset");
     }
     if (length < 0)
     {
         throw new ArgumentOutOfRangeException("length", "Length must be 0 or greater.");
     }
     if (offset + length > token.Text.Length)
     {
         throw new ArgumentOutOfRangeException("length");
     }
     m_tokens  = new List <ITextToken>(new ITextToken[] { token });
     m_offset  = offset;
     m_length  = length;
     m_message = msg;
 }
Пример #12
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Checks the given tokens for chapter/verse errors and calls the given RecordError
        /// handler for each one.
        /// </summary>
        /// <param name="toks">The tokens to check.</param>
        /// <param name="record">Method to call to record errors.</param>
        /// ------------------------------------------------------------------------------------
        public void Check(IEnumerable <ITextToken> toks, RecordErrorHandler record)
        {
            GetParameters();

            m_recordError = record;
            m_versesFound = new List <int>();
            m_chapTokens.Clear();

            ChapterToken currChapterToken = null;
            VerseToken   currVerseToken   = null;

            foreach (ITextToken token in toks)
            {
                // This token is only necessary when a chapter one is missing
                // and we need a token to use for reporting that it's missing.
                if (m_fallbackToken == null)
                {
                    m_fallbackToken = token;
                }

                if (token.TextType == TextType.ChapterNumber)
                {
                    currChapterToken = new ChapterToken(token, m_chapterNumberFormat);
                    currVerseToken   = null;
                    m_chapTokens.Add(currChapterToken);
                }
                else if (token.TextType == TextType.VerseNumber)
                {
                    if (currChapterToken == null)
                    {
                        //assume chapter one
                        currChapterToken = new ChapterToken(token, 1);
                        m_chapTokens.Add(currChapterToken);
                    }

                    currVerseToken = new VerseToken(token);
                    currChapterToken.VerseTokens.Add(currVerseToken);
                }
                else if (token.TextType == TextType.Verse)
                {
                    if (currChapterToken == null)
                    {
                        // no chapter token and no verse number token
                        // oh no! use verse text token as default, but system
                        // should error on missing verse first.
                        if (currVerseToken == null)
                        {
                            //assume chapter one
                            currChapterToken = new ChapterToken(token, 1);
                            m_chapTokens.Add(currChapterToken);

                            //assume verse one
                            currVerseToken = new VerseToken(token, 1);
                            currChapterToken.VerseTokens.Add(currVerseToken);
                        }
                        // no chapter token, but we have verse number token
                        // then use the verse number token
                        else
                        {
                            // this case should not happen because chapter tokens
                            // are automatically created if a verse number token is
                            // encountered first
                            Debug.Assert(false, "verse number token found without chapter number token");
                        }
                    }
                    else
                    {
                        // we have a chapter token, but no verse number token
                        // use the chapter token as the default token.
                        if (currVerseToken == null)
                        {
                            //assume verse one
                            currVerseToken = new VerseToken(token, 1);
                            currChapterToken.VerseTokens.Add(currVerseToken);
                        }
                        // we have a chapter token, and a verse number token
                        // we are happy
                        else
                        {
                            // do nothing
                        }
                    }
                    currVerseToken.IncrementVerseTextCount(token);
                }
            }

            CheckChapterNumbers();
        }
Пример #13
0
		private void AddWord(ITextToken tok, WordAndPunct wap)
		{
			TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length);
			if (desiredKey == "" || desiredKey == tts.InventoryText)
				result.Add(tts);
		}
Пример #14
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="TextTokenSubstring"/> class with a
		/// single source token.
		/// </summary>
		/// <param name="token">The token.</param>
		/// <param name="offset">The offset.</param>
		/// <param name="length">The length.</param>
		/// ------------------------------------------------------------------------------------
		public TextTokenSubstring(ITextToken token, int offset, int length) :
			this(token, offset, length, null)
		{
		}
Пример #15
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Creates a checking error if character style requires an initial uppercase letter,
		/// but the tssFirstLetter is lowercase.
		/// </summary>
		/// <param name="tok">The Scripture token.</param>
		/// <param name="ttsFirstLetter">The token substring of the first word-forming character
		/// in the given token.</param>
		/// <param name="result">The result.</param>
		/// <returns><c>true</c> if an error was added to the list of results; otherwise
		/// <c>false</c></returns>
		/// ------------------------------------------------------------------------------------
		private bool CheckForCharStyleCapilizationError(ITextToken tok,
			TextTokenSubstring ttsFirstLetter, List<TextTokenSubstring> result)
		{
			if (m_foundCharacterText)
				return false;

			m_foundCharacterText = true;

			// The first word-forming character of the character style is lowercase.
			// Look it up in the capitalized styles dictionary to determine if it should be uppercase.
			StyleCapInfo styleCapInfo;
			if (m_allCapitalizedStyles.TryGetValue(m_characterStyle, out styleCapInfo) &&
				styleCapInfo.m_type == StyleInfo.StyleTypes.character)
			{
				ttsFirstLetter.InventoryText = m_characterStyle;
				ttsFirstLetter.Message = CapitalizationCheck.GetErrorMessage(m_checksDataSource,
					styleCapInfo.m_capCheck, m_characterStyle);
				result.Add(ttsFirstLetter);
				return true;
			}
			return false;
		}
Пример #16
0
		public void ProcessToken(ITextToken tok)
		{
			foreach (WordAndPunct wap in characterCategorizer.WordAndPuncts(tok.Text))
				ProcessWord(tok, wap);
		}
Пример #17
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Adds an error for a missing chapter number.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private void AddMissingChapterError(ITextToken token, int missingChapter, int offset)
		{
			BCVRef scrRef = new BCVRef(token.ScrRefString);
			scrRef.Chapter = missingChapter;
			scrRef.Verse = 0;
			token.MissingStartRef = scrRef;
			AddError(token, offset, 0, Localize("Missing chapter number {0}"), missingChapter);
		}
Пример #18
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// ------------------------------------------------------------------------------------
		internal ChapterToken(ITextToken token, int chapNumber)
		{
			Token = token;
			m_chapNumber = chapNumber;
			Implicit = true;
		}
Пример #19
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 ///
 /// </summary>
 /// ------------------------------------------------------------------------------------
 internal VerseToken(ITextToken verseNumber)
 {
     m_verseNumberToken = verseNumber;
 }
Пример #20
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Check verse numbers.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void CheckVerseNumbers(ChapterToken chapToken, int bookId)
        {
            int  lastVrsInChap     = m_versification.LastVerse(bookId, chapToken.ChapterNumber);
            int  nextExpectedVerse = 1;
            bool expectingPartB    = false;
            int  prevVerseStart    = 0;
            int  prevVerseEnd      = 0;

            ITextToken[] versesFound = new ITextToken[lastVrsInChap + 1];
            versesFound[0] = chapToken.Token;

            foreach (VerseToken verseToken in chapToken.VerseTokens)
            {
                ITextToken token            = verseToken.VerseNumber;
                ITextToken reportedToken    = token;
                string     msg              = null;
                int        offset           = 0;
                int        length           = token.Text.Length;
                object[]   errorArgs        = null;
                bool       countFoundVerses = false;
                int        curVerseStart;
                int        curVerseEnd;
                VersePart  vrsPart;

                if (verseToken.ImplicitVerseNumber == 1)
                {
                    versesFound[1] = token;
                    continue;
                }

                ParseVerseResult parseResult = ParseVerseNumber(token.Text,
                                                                out curVerseStart, out curVerseEnd, out vrsPart);

                if (parseResult == ParseVerseResult.ValidWithSpaceInVerse)
                {
                    // Log error telling user there are spaces before or after the verse
                    // number. This means the space(s) have the verse number style. This isn't
                    // considered an invalid verse number, but we do need to tell the user.
                    AddError(token, 0, token.Text.Length,
                             Localize("Space found in verse number"), token.Text);
                }
                else if (parseResult == ParseVerseResult.ValidWithSpaceInVerseBridge)
                {
                    // Log error telling user there are spaces in a verse bridge. This
                    // means the space(s) have the verse number style. This isn't considered
                    // an invalid verse number, but we do need to tell the user.
                    AddError(token, 0, token.Text.Length,
                             Localize("Space found in verse bridge"), token.Text);
                }

                if (parseResult == ParseVerseResult.Invalid)
                {
                    msg = Localize("Invalid verse number");
                }
                else if ((parseResult != ParseVerseResult.InvalidFormat) && VersesAlreadyFound(curVerseStart, curVerseEnd, versesFound) &&
                         !(expectingPartB && vrsPart == VersePart.PartB))
                {
                    if (AnyOverlappingVerses(curVerseStart, curVerseEnd,
                                             prevVerseStart, prevVerseEnd, out errorArgs))
                    {
                        // Duplicate verse(s) found.
                        msg = (errorArgs.Length == 1 ?
                               Localize("Duplicate verse number") :
                               Localize("Duplicate verse numbers"));
                    }
                    else
                    {
                        // Verse number(s) are unexpected
                        msg = (curVerseStart == curVerseEnd ?
                               Localize("Unexpected verse number") :
                               Localize("Unexpected verse numbers"));
                    }
                }
                else if (AnyOverlappingVerses(curVerseStart, curVerseEnd,
                                              lastVrsInChap + 1, int.MaxValue, out errorArgs))
                {
                    countFoundVerses = true;
                    // Start and/or end verse is out of range
                    msg = (errorArgs.Length == 1 ?
                           Localize("Verse number out of range") :
                           Localize("Verse numbers out of range"));
                }
                else if (curVerseStart < nextExpectedVerse)
                {
                    // Verse number(s) are out of order
                    countFoundVerses = true;
                    if (nextExpectedVerse <= lastVrsInChap)
                    {
                        errorArgs = new object[] { nextExpectedVerse };
                        msg       = (curVerseStart == curVerseEnd ?
                                     Localize("Verse number out of order; expected verse {0}") :
                                     Localize("Verse numbers out of order; expected verse {0}"));
                    }
                    else
                    {
                        msg = (curVerseStart == curVerseEnd ?
                               Localize("Verse number out of order") :
                               Localize("Verse numbers out of order"));
                    }
                }
                else if (((vrsPart == VersePart.PartB) != expectingPartB) &&
                         (curVerseStart == curVerseEnd))
                {
                    // Missing part A or B
                    // TODO: cover cases like "4a 5-7" and "4 5b-7". This would require
                    // ParseVerseNumber() to detect verse parts at the beginning of bridges.
                    reportedToken = (vrsPart == VersePart.PartB ? token : versesFound[prevVerseEnd]);
                    msg           = Localize("Missing verse number {0}");
                    offset        = (vrsPart == VersePart.PartB ? 0 : reportedToken.Text.Length);
                    length        = 0;
                    int    reportedVrsNum = (vrsPart == VersePart.PartB ? curVerseStart : prevVerseEnd);
                    string fmt            = (vrsPart == VersePart.PartB ? "{0}a" : "{0}b");
                    errorArgs        = new object[] { string.Format(fmt, reportedVrsNum) };
                    countFoundVerses = true;
                }
                else if ((vrsPart == VersePart.PartB && curVerseStart > prevVerseEnd) &&
                         (curVerseStart == curVerseEnd))
                {
                    // Missing both a part B and A
                    reportedToken = versesFound[prevVerseEnd];

                    AddError(reportedToken, reportedToken.Text.Length, 0,
                             Localize("Missing verse number {0}"),
                             new object[] { string.Format("{0}b", prevVerseEnd) });

                    AddError(token, 0, 0, Localize("Missing verse number {0}"),
                             new object[] { string.Format("{0}a", curVerseStart) });
                }

                if (msg != null)
                {
                    // Report the error found.
                    if (errorArgs == null)
                    {
                        AddError(reportedToken, offset, length, msg);
                    }
                    else
                    {
                        AddError(reportedToken, offset, length, msg, errorArgs);
                    }
                }

                if (msg == null || countFoundVerses)
                {
                    // No error was found for the current verse range so set all the verses
                    // in our found verse list corresponding to those in the range.
                    for (int i = curVerseStart; i <= Math.Min(curVerseEnd, lastVrsInChap); i++)
                    {
                        versesFound[i] = token;
                    }
                }

                if (parseResult == ParseVerseResult.InvalidFormat)
                {
                    AddError(token, 0, token.Text.Length, Localize("Invalid verse number"), token.Text);
                }

                // only worry about this if the chapter and/or verse tokens are in order
                if (verseToken.VerseTextCount < 1)
                {
                    AddError(verseToken.VerseNumber, 0, verseToken.VerseNumber.Text.Length,
                             Localize("Missing verse text in verse {0}"), verseToken.VerseNumber.Text);
                }

                // Determine next expected verse.
                // Don't expect a partB if there was an error with partA
                expectingPartB = (vrsPart == VersePart.PartA && msg == null);
                if (!expectingPartB && curVerseEnd <= lastVrsInChap)
                {
                    nextExpectedVerse = curVerseEnd + 1;
                }

                prevVerseStart = curVerseStart;
                prevVerseEnd   = curVerseEnd;
            }

            CheckForMissingVerses(versesFound, bookId, chapToken.ChapterNumber);
        }
Пример #21
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Increment verse text count
		/// </summary>
		/// ------------------------------------------------------------------------------------
		internal void IncrementVerseTextCount(ITextToken token)
		{
			// only count tokens that aren't all whitespace.
			if (token.Text.Trim().Length > 0)
				m_nbrTextTokens++;
		}
Пример #22
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// ------------------------------------------------------------------------------------
		internal VerseToken(ITextToken implicitVerseNumber, int verseNumber)
		{
			m_verseNumberToken = implicitVerseNumber;
			m_implicitVerseNumber = verseNumber;
		}
Пример #23
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// ------------------------------------------------------------------------------------
		internal VerseToken( ITextToken verseNumber)
		{
			m_verseNumberToken = verseNumber;
		}
Пример #24
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Checks for missing chapters.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void CheckChapterNumbers()
        {
            int bookId              = BCVRef.BookToNumber(m_sBookId);
            int lastChapInBook      = m_versification.LastChapter(bookId);
            int nextExpectedChapter = 1;
            int prevChapNumber      = 0;

            bool[] chaptersFound = new bool[lastChapInBook + 1];

            foreach (ChapterToken chapToken in m_chapTokens)
            {
                if (m_nChapterToCheck != 0 && chapToken.ChapterNumber != m_nChapterToCheck)
                {
                    continue;
                }

                string     msg      = null;
                int        errorArg = chapToken.ChapterNumber;
                ITextToken token    = chapToken.Token;

                if (!chapToken.Valid)
                {
                    // Chapter number is invalid
                    AddError(token, 0, token.Text.Length, Localize("Invalid chapter number"), errorArg);
                }

                if (chapToken.ChapterNumber >= 1)
                {
                    if (chapToken.ChapterNumber > lastChapInBook)
                    {
                        // Chapter number is out of range
                        msg = Localize("Chapter number out of range");
                    }
                    else if (chapToken.ChapterNumber == prevChapNumber)
                    {
                        // Chapter number is repeated
                        msg = Localize("Duplicate chapter number");
                    }
                    else if (chapToken.ChapterNumber < nextExpectedChapter)
                    {
                        // Chapter number is out of order
                        msg      = Localize("Chapter out of order; expected chapter {0}");
                        errorArg = nextExpectedChapter;
                    }

                    if (msg != null)
                    {
                        AddError(token, 0, token.Text.Length, msg, errorArg);
                    }
                    else
                    {
                        chaptersFound[chapToken.ChapterNumber] = true;
                        CheckVerseNumbers(chapToken, bookId);
                    }
                }

                prevChapNumber      = chapToken.ChapterNumber;
                nextExpectedChapter =
                    Math.Max(chapToken.ChapterNumber + 1, nextExpectedChapter);
            }

            CheckForMissingChapters(chaptersFound);
        }
Пример #25
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Checks for missing verses in the current chapter.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private void CheckForMissingVerses(ITextToken[] versesFound, int bookId, int chapNumber)
		{
			ITextToken prevToken = versesFound[0];

			for (int verse = 1; verse < versesFound.Length; verse++)
			{
				if (versesFound[verse] != null)
				{
					prevToken = versesFound[verse];
					continue;
				}

				// At this point, we know we've found a missing verse. Now we need
				// to determine whether or not this is the first verse in a range
				// of missing verses or just a single missing verse.
				int startVerse = verse;
				int endVerse = verse;
				while (endVerse < versesFound.Length - 1 && versesFound[endVerse + 1] == null)
					endVerse++;

				prevToken.MissingStartRef = new BCVRef(bookId, chapNumber, startVerse);

				// If previous token is a verse token and it's verse 1 that's missing,
				// then we know we're dealing with the case of a missing chapter token
				// and a missing verse 1 token in that chapter. In that case, we want
				// the offset to fall just before the verse of the token (which is the
				// first verse token we found in the chapter and which we're assuming
				// is associated with a verse that would come after verse 1).
				int offset = (prevToken.TextType == TextType.VerseNumber && verse == 1 ?
					0 : prevToken.Text.Length);

				if (startVerse == endVerse)
					AddError(prevToken, offset, 0, Localize("Missing verse number {0}"), startVerse);
				else
				{
					prevToken.MissingEndRef = new BCVRef(bookId, chapNumber, endVerse);
					AddError(prevToken, offset, 0, Localize("Missing verse numbers {0}-{1}"),
						startVerse, endVerse);
				}

				verse = endVerse;
			}
		}
Пример #26
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Records an error.
		/// </summary>
		/// <param name="token">The current token being processed.</param>
		/// <param name="offset">Offset in the token where the offending text begins.</param>
		/// <param name="length">The length of the offending text.</param>
		/// <param name="message">The message.</param>
		/// <param name="args">The arguments to format the message.</param>
		/// ------------------------------------------------------------------------------------
		private void AddError(ITextToken token, int offset, int length, string message,
			params object[] args)
		{
			string formattedMsg = (args != null) ? string.Format(message, args) :
				String.Format(message);

			TextTokenSubstring tts = new TextTokenSubstring(token, offset, length, formattedMsg);
			m_recordError(new RecordErrorEventArgs(tts, CheckId));
		}
Пример #27
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// ------------------------------------------------------------------------------------
		internal ChapterToken(ITextToken token, Regex chapterNumberFormat)
		{
			Token = token;
			m_chapNumber = 0;
			if (!chapterNumberFormat.IsMatch(Token.Text))
				Valid = false;
			foreach (char ch in token.Text)
			{
				if (Char.IsDigit(ch))
				{
					m_chapNumber *= 10;
					m_chapNumber += (int) Char.GetNumericValue(ch);
				}
				else
				{
					Valid = false;
					m_chapNumber = -1;
					break;
				}
			}
			Implicit = false;
		}
Пример #28
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Gets the substring for the character starting at position iChar.
		/// </summary>
		/// <param name="tok">The token</param>
		/// <param name="iChar">The index of the character.</param>
		/// ------------------------------------------------------------------------------------
		private TextTokenSubstring GetSubstring(ITextToken tok, int iChar)
		{
			int iCharLength = GetLengthOfChar(tok, iChar);
			TextTokenSubstring tts = new TextTokenSubstring((tok is VerseTextToken ?
				((VerseTextToken)tok).Token : tok), iChar, iCharLength);
			return tts;
		}
Пример #29
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Records the character style.
		/// </summary>
		/// <param name="tok">The Scripture token.</param>
		/// ------------------------------------------------------------------------------------
		private void RecordCharacterStyle(ITextToken tok)
		{
			if (tok.CharStyleName != m_characterStyle)
			{
				m_characterStyle = tok.CharStyleName;
				m_foundCharacterText = false;
			}
		}
Пример #30
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 ///
 /// </summary>
 /// ------------------------------------------------------------------------------------
 internal ChapterToken(ITextToken token, int chapNumber)
 {
     Token        = token;
     m_chapNumber = chapNumber;
     Implicit     = true;
 }
Пример #31
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Checks the list of found verses to see if any verses in the specified range have
		/// already been found.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private bool VersesAlreadyFound(int curVerseStart, int curVerseEnd,
			ITextToken[] versesFound)
		{
			for (int verse = curVerseStart; verse <= curVerseEnd; verse++)
			{
				if (verse < versesFound.Length && verse > 0 && versesFound[verse] != null)
					return true;
			}

			return false;
		}
Пример #32
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 ///
 /// </summary>
 /// ------------------------------------------------------------------------------------
 internal VerseToken(ITextToken implicitVerseNumber, int verseNumber)
 {
     m_verseNumberToken    = implicitVerseNumber;
     m_implicitVerseNumber = verseNumber;
 }
Пример #33
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// <param name="tok"></param>
		/// <param name="wap"></param>
		/// <param name="desiredKey"></param>
		/// ------------------------------------------------------------------------------------
		public void ProcessWord(ITextToken tok, WordAndPunct wap, string desiredKey)
		{
			AWord word = new AWord(wap.Word, m_categorizer);

			if (word.Prefix == string.Empty && word.Suffix == string.Empty)
				return;
			if (m_uncapitalizedPrefixes.Contains(word.Prefix))
				return;
			if (m_uncapitalizedPrefixes.Contains("*" + word.Prefix[word.Prefix.Length - 1]))
				return;
			if (m_uncapitalizedPrefixes.Contains("*"))
				return;
			if (m_capitalizedSuffixes.Contains(word.Suffix))
				return;
			if (m_capitalizedPrefixes.Contains(word.Prefix))
				return;

			AddWord(tok, wap, desiredKey);
		}
Пример #34
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Processes the Scripture token.
		/// </summary>
		/// <param name="tok">The token.</param>
		/// <param name="result">The result.</param>
		/// ------------------------------------------------------------------------------------
		public void ProcessToken(ITextToken tok, List<TextTokenSubstring> result)
		{
			string tokenText = RemoveAbbreviations(tok);

			RecordParagraphStyle(tok);
			RecordCharacterStyle(tok);

			// must be at least one character in token to check the case of
			if (tok.Text == String.Empty)
				return;

			for (int iChar = 0; iChar < tokenText.Length; iChar++)
			{
				char ch = tokenText[iChar];

				if (IsSentenceFinalPunctuation(ch))
				{
					m_fAtSentenceStart = iChar + 1 == tokenText.Length ||
						(iChar + 1 < tokenText.Length && !char.IsDigit(tokenText[iChar + 1]));
					continue;
				}

				if (!m_categorizer.IsWordFormingCharacter(ch))
					continue;

				if (m_categorizer.IsLower(ch))
				{
					TextTokenSubstring tts = GetSubstring(tok, iChar);

					if (!CheckForParaCapitalizationError(tok, tts, result) &&
						!CheckForCharStyleCapilizationError(tok, tts, result) &&
						m_fAtSentenceStart)
					{
						tts.Message = CapitalizationCheck.GetErrorMessage(m_checksDataSource,
						StyleCapInfo.CapCheckTypes.SentenceInitial, string.Empty);
						result.Add(tts);
					}
				}
				m_fAtSentenceStart = false;
				m_foundCharacterText = true;
				m_foundParagraphText = true;
			}
		}
Пример #35
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// <param name="tok"></param>
		/// <param name="wap"></param>
		/// <param name="desiredKey"></param>
		/// ------------------------------------------------------------------------------------
		private void AddWord(ITextToken tok, WordAndPunct wap, string desiredKey)
		{
			TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length);
			if (String.IsNullOrEmpty(desiredKey) || desiredKey == tts.InventoryText)
				m_result.Add(tts);
		}
Пример #36
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Records the paragraph style.
		/// </summary>
		/// <param name="tok">The Scripture token.</param>
		/// ------------------------------------------------------------------------------------
		private void RecordParagraphStyle(ITextToken tok)
		{
			if (tok.IsParagraphStart)
			{
				m_paragraphStyle = tok.ParaStyleName;
				m_foundParagraphText = false;
				if (m_processParagraphsSeparately)
					m_fAtSentenceStart = false;
			}
		}
Пример #37
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public void ProcessToken(ITextToken tok, string desiredKey, List<TextTokenSubstring> result)
		{
			if (AnyFoundPairsClosedByPara && tok.IsParagraphStart &&
				!m_styleCategorizer.IsPoeticStyle(tok.ParaStyleName))
			{
				FinalizeResult(desiredKey, result);
			}

			for (int i = 0; i < tok.Text.Length; i++)
			{
				string cc = tok.Text.Substring(i, 1);
				if (m_pairList.BelongsToPair(cc))
				{
					StoreFoundPairToken(tok, i);
					RemoveMatchedPunctAtEndOfFirstWordInIntroOutline(tok, i);
					RemoveIfMatchedPairFound();
					RecordOverlappingPairs();
				}
			}
		}
Пример #38
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Removes the abbreviations from a Scripture token.
		/// </summary>
		/// <param name="tok">The Scripture token.</param>
		/// <returns>Scripture token with any abbreviations replaced with spaces.</returns>
		/// ------------------------------------------------------------------------------------
		private string RemoveAbbreviations(ITextToken tok)
		{
			string tokenText = tok.Text;
			foreach (string abbreviation in m_abbreviations)
			{
				if (abbreviation == "")
					continue;

				string spaces = new string(' ', abbreviation.Length);
				tokenText = tokenText.Replace(abbreviation, spaces);
			}

			Debug.Assert(tok.Text.Length == tokenText.Length,
				"Length of text should not change",
				"Abbreviations are replaced by spaces, but the overall text length should stay the same.");
			return tokenText;
		}
Пример #39
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private void StoreFoundPairToken(ITextToken tok, int i)
		{
			TextTokenSubstring tts = new TextTokenSubstring(tok, i, 1);

			// Assign an initial, default message which may be changed later
			tts.Message =  m_checksDataSource.GetLocalizedString("Unmatched punctuation");
			m_pairTokensFound.Add(tts);
		}
Пример #40
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Gets the length of the character including any associated diacritics that follow
		/// the base character.
		/// </summary>
		/// <param name="tok">The text token.</param>
		/// <param name="iBaseCharacter">The index of the base character in the text token.</param>
		/// <returns>length of the character, including all following diacritics</returns>
		/// ------------------------------------------------------------------------------------
		private int GetLengthOfChar(ITextToken tok, int iBaseCharacter)
		{
			int charLength = 1;
			int iChar = iBaseCharacter + 1;
			while(iChar < tok.Text.Length && m_categorizer.IsDiacritic(tok.Text[iChar++]))
				charLength++;

			return charLength;
		}
Пример #41
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		///
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private void RemoveMatchedPunctAtEndOfFirstWordInIntroOutline(ITextToken tok, int i)
		{
			if (!m_styleCategorizer.IsIntroductionOutlineStyle(tok.ParaStyleName))
				return;

			// See if we are at the end of the first word
			string[] words = tok.Text.Split();
			string firstWord = words[0];
			if (i + 1 != firstWord.Length)
				return;

			int lastFoundPairToken = m_pairTokensFound.Count - 1;

			// If the current matched pair is in an introduction outline,
			// ends the first word, and is a closing punct, remove it.
			if (m_pairList.IsClose(m_pairTokensFound[lastFoundPairToken].Text))
				m_pairTokensFound.RemoveAt(lastFoundPairToken);
		}
Пример #42
0
		private void ProcessWord(ITextToken tok, WordAndPunct wap)
		{
			if (wap.Word == "")
				return;

			string nextWord = wap.Word.ToLower();

			if (prevWord == nextWord)
				AddWord(tok, wap);

			prevWord = nextWord;

			// If there are characters (such as quotes) between words,
			// then two words are not considered repeating, even if they are identical
			foreach (char cc in wap.Punct)
			{
				if (!char.IsWhiteSpace(cc))
				{
					Reset();
					break;
				}
			}
		}
Пример #43
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Extract the punctuation sequences from this token
		/// </summary>
		/// <param name="tok"></param>
		/// <param name="desiredKey"></param>
		/// <param name="result"></param>
		/// ------------------------------------------------------------------------------------
		public void ProcessToken(ITextToken tok, string desiredKey, List<TextTokenSubstring> result)
		{
			if (tok.IsParagraphStart || m_fTreatAsParagraphStart)
			{
				ProcessWhitespaceOrParagraph(true);
				m_fTreatAsParagraphStart = false;
			}

			// for each character in token
			for (int i = 0; i < tok.Text.Length; ++i)
			{
				char cc = tok.Text[i];
				if (m_categorizer.IsPunctuation(cc))
					ProcessPunctuation(tok, i);
				else if (char.IsDigit(cc))
				{
					// If the previous finalized was done with a number,
					// and we have a single punctuation mark
					// followed by another number, ignore this sequence,
					// e.g. 3:14
					if (m_finalizedWithNumber && m_puncts.Count == 1 &&
						m_puncts[0].TokenType == PunctuationTokenType.punctuation)
					{
						m_puncts.Clear();
					}
					else
					{
						ProcessDigit(tok, i);
						FinalizeResult(desiredKey, result, false);
					}
				}
				else if (char.IsWhiteSpace(cc))
					ProcessWhitespaceOrParagraph(false);
				else
				{
					// if not punctuation, whitespace, or digit; it must be the start of a new word
					// therefore finalize any open punctuation sequence
					FinalizeResult(desiredKey, result, false);
				}
			}
		}
Пример #44
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Adds a token.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public void AddToken(ITextToken token)
		{
			if (token.IsParagraphStart)
				throw new ArgumentException("A substring must be wholly contained within a single paragraph.");
			m_tokens.Add(token);
		}
Пример #45
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Add punctuation to list
		/// </summary>
		/// <param name="tok">The text token</param>
		/// <param name="i">The index of the punctuation character</param>
		/// ------------------------------------------------------------------------------------
		private void ProcessPunctuation(ITextToken tok, int i)
		{
			TextTokenSubstring tts = new TextTokenSubstring(tok, i, 1);
			bool isInitial = m_quotationCategorizer.IsInitialPunctuation(tts.Text);
			bool isFinal = m_quotationCategorizer.IsFinalPunctuation(tts.Text);
			m_puncts.Add(new PunctuationToken(PunctuationTokenType.punctuation, tts, isInitial, isFinal));

			// special case: treat a sequence like
			// opening quotation punctuation/space/opening quotation punctuation
			// as if the space were not there. an example of this would be
			// U+201C LEFT DOUBLE QUOTATION MARK
			// U+0020 SPACE
			// U+2018 LEFT SINGLE QUOTATION MARK
			// this allows a quotation mark to be considered word initial even if it is followed by a space
			if (m_puncts.Count >= 3)
			{
				// If the last three tokens are punctuation/whitespace/punctuation
				if (m_puncts[m_puncts.Count - 2].TokenType == PunctuationTokenType.whitespace &&
					!m_puncts[m_puncts.Count - 2].IsParaBreak &&
					m_puncts[m_puncts.Count - 3].TokenType == PunctuationTokenType.punctuation)
				{
					// And both punctuation have quote directions which point in the same direction,
					if (m_puncts[m_puncts.Count - 3].IsInitial && m_puncts[m_puncts.Count - 1].IsInitial ||
						m_puncts[m_puncts.Count - 3].IsFinal && m_puncts[m_puncts.Count - 1].IsFinal)
					{
						// THEN mark the whitespace as a quote separator.
						m_puncts[m_puncts.Count - 2].TokenType = PunctuationTokenType.quoteSeparator;
					}
				}
			}
		}
Пример #46
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Add a number to the list
		/// </summary>
		/// <param name="tok"></param>
		/// <param name="i"></param>
		/// ------------------------------------------------------------------------------------
		private void ProcessDigit(ITextToken tok, int i)
		{
			m_puncts.Add(new PunctuationToken(PunctuationTokenType.number, null, false, false));

#if UNUSED
			// special case: treat a sequence like
			// number/punctuation/number
			// as if the punctuation were not there. an example of this would be 1:2
			// this allows the : in 1:2 not to be counted as punctuation
			if (tokens.Count >= 3)
			{
				// If the last three tokens are number/select punctuation/number
				if (tokens[tokens.Count - 3].TokenType == PunctuationTokenType.number)
				{
					string separator = tokens[tokens.Count - 2].ToString();
					//! make the list of separator characters configurable
					if (separator == "," || separator == "." || separator == "-" || separator == ":")
					{
						tokens.RemoveAt(tokens.Count - 2);

						// The offset (-2) stays the same as the line of code above
						// since after the previous line is executed some of the tokens shift position.
						tokens.RemoveAt(tokens.Count - 2);
					}
				}
			}
#endif
		}
Пример #47
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Initializes a new instance of the <see cref="TextTokenSubstring"/> class with a
 /// single source token.
 /// </summary>
 /// <param name="token">The token.</param>
 /// <param name="offset">The offset.</param>
 /// <param name="length">The length.</param>
 /// ------------------------------------------------------------------------------------
 public TextTokenSubstring(ITextToken token, int offset, int length) :
     this(token, offset, length, null)
 {
 }