Ejemplo n.º 1
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Add punctuation to list
        /// </summary>
        /// <param name="tok">The text token</param>
        /// <param name="i">The index of the punctuation character</param>
        /// ------------------------------------------------------------------------------------
        private void ProcessPunctuation(ITextToken tok, int i)
        {
            TextTokenSubstring tts = new TextTokenSubstring(tok, i, 1);
            bool isInitial         = m_quotationCategorizer.IsInitialPunctuation(tts.Text);
            bool isFinal           = m_quotationCategorizer.IsFinalPunctuation(tts.Text);

            m_puncts.Add(new PunctuationToken(PunctuationTokenType.punctuation, tts, isInitial, isFinal));

            // special case: treat a sequence like
            // opening quotation punctuation/space/opening quotation punctuation
            // as if the space were not there. an example of this would be
            // U+201C LEFT DOUBLE QUOTATION MARK
            // U+0020 SPACE
            // U+2018 LEFT SINGLE QUOTATION MARK
            // this allows a quotation mark to be considered word initial even if it is followed by a space
            if (m_puncts.Count >= 3)
            {
                // If the last three tokens are punctuation/whitespace/punctuation
                if (m_puncts[m_puncts.Count - 2].TokenType == PunctuationTokenType.whitespace &&
                    !m_puncts[m_puncts.Count - 2].IsParaBreak &&
                    m_puncts[m_puncts.Count - 3].TokenType == PunctuationTokenType.punctuation)
                {
                    // And both punctuation have quote directions which point in the same direction,
                    if (m_puncts[m_puncts.Count - 3].IsInitial && m_puncts[m_puncts.Count - 1].IsInitial ||
                        m_puncts[m_puncts.Count - 3].IsFinal && m_puncts[m_puncts.Count - 1].IsFinal)
                    {
                        // THEN mark the whitespace as a quote separator.
                        m_puncts[m_puncts.Count - 2].TokenType = PunctuationTokenType.quoteSeparator;
                    }
                }
            }
        }
Ejemplo n.º 2
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Creates a checking error if character style requires an initial uppercase letter,
        /// but the tssFirstLetter is lowercase.
        /// </summary>
        /// <param name="tok">The Scripture token.</param>
        /// <param name="ttsFirstLetter">The token substring of the first word-forming character
        /// in the given token.</param>
        /// <param name="result">The result.</param>
        /// <returns><c>true</c> if an error was added to the list of results; otherwise
        /// <c>false</c></returns>
        /// ------------------------------------------------------------------------------------
        private bool CheckForCharStyleCapilizationError(ITextToken tok,
                                                        TextTokenSubstring ttsFirstLetter, List <TextTokenSubstring> result)
        {
            if (m_foundCharacterText)
            {
                return(false);
            }

            m_foundCharacterText = true;

            // The first word-forming character of the character style is lowercase.
            // Look it up in the capitalized styles dictionary to determine if it should be uppercase.
            StyleCapInfo styleCapInfo;

            if (m_allCapitalizedStyles.TryGetValue(m_characterStyle, out styleCapInfo) &&
                styleCapInfo.m_type == StyleInfo.StyleTypes.character)
            {
                ttsFirstLetter.InventoryText = m_characterStyle;
                ttsFirstLetter.Message       = CapitalizationCheck.GetErrorMessage(m_checksDataSource,
                                                                                   styleCapInfo.m_capCheck, m_characterStyle);
                result.Add(ttsFirstLetter);
                return(true);
            }
            return(false);
        }
Ejemplo n.º 3
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        ///
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void StoreFoundPairToken(ITextToken tok, int i)
        {
            TextTokenSubstring tts = new TextTokenSubstring(tok, i, 1);

            // Assign an initial, default message which may be changed later
            tts.Message = m_checksDataSource.GetLocalizedString("Unmatched punctuation");
            m_pairTokensFound.Add(tts);
        }
Ejemplo n.º 4
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Initializes a new instance of the <see cref="PunctuationToken"/> class.
 /// </summary>
 /// <param name="tokenType">Type of the token.</param>
 /// <param name="tts">The TextTokenSubstring.</param>
 /// <param name="isInitial">if set to <c>true</c> is opening quotation mark.</param>
 /// <param name="isFinal">if set to <c>true</c> is closing quotation mark.</param>
 /// ------------------------------------------------------------------------------------
 public PunctuationToken(PunctuationTokenType tokenType, TextTokenSubstring tts,
                         bool isInitial, bool isFinal)
 {
     TokenType = tokenType;
     Tts       = tts;
     IsInitial = isInitial;
     IsFinal   = isFinal;
 }
Ejemplo n.º 5
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets the substring for the character starting at position iChar.
        /// </summary>
        /// <param name="tok">The token</param>
        /// <param name="iChar">The index of the character.</param>
        /// ------------------------------------------------------------------------------------
        private TextTokenSubstring GetSubstring(ITextToken tok, int iChar)
        {
            int iCharLength        = GetLengthOfChar(tok, iChar);
            TextTokenSubstring tts = new TextTokenSubstring((tok is VerseTextToken ?
                                                             ((VerseTextToken)tok).Token : tok), iChar, iCharLength);

            return(tts);
        }
Ejemplo n.º 6
0
        private void AddWord(ITextToken tok, WordAndPunct wap)
        {
            TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length);

            if (desiredKey == "" || desiredKey == tts.InventoryText)
            {
                result.Add(tts);
            }
        }
Ejemplo n.º 7
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        ///
        /// </summary>
        /// <param name="tok"></param>
        /// <param name="wap"></param>
        /// <param name="desiredKey"></param>
        /// ------------------------------------------------------------------------------------
        private void AddWord(ITextToken tok, WordAndPunct wap, string desiredKey)
        {
            TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length);

            if (String.IsNullOrEmpty(desiredKey) || desiredKey == tts.InventoryText)
            {
                m_result.Add(tts);
            }
        }
Ejemplo n.º 8
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Records an error.
        /// </summary>
        /// <param name="token">The current token being processed.</param>
        /// <param name="offset">Offset in the token where the offending text begins.</param>
        /// <param name="length">The length of the offending text.</param>
        /// <param name="message">The message.</param>
        /// <param name="args">The arguments to format the message.</param>
        /// ------------------------------------------------------------------------------------
        private void AddError(ITextToken token, int offset, int length, string message,
                              params object[] args)
        {
            string formattedMsg = (args != null) ? string.Format(message, args) :
                                  String.Format(message);

            TextTokenSubstring tts = new TextTokenSubstring(token, offset, length, formattedMsg);

            m_recordError(new RecordErrorEventArgs(tts, CheckId));
        }
Ejemplo n.º 9
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Processes the Scripture token.
        /// </summary>
        /// <param name="tok">The token.</param>
        /// <param name="result">The result.</param>
        /// ------------------------------------------------------------------------------------
        public void ProcessToken(ITextToken tok, List <TextTokenSubstring> result)
        {
            string tokenText = RemoveAbbreviations(tok);

            RecordParagraphStyle(tok);
            RecordCharacterStyle(tok);

            // must be at least one character in token to check the case of
            if (tok.Text == String.Empty)
            {
                return;
            }

            for (int iChar = 0; iChar < tokenText.Length; iChar++)
            {
                char ch = tokenText[iChar];

                if (IsSentenceFinalPunctuation(ch))
                {
                    m_fAtSentenceStart = iChar + 1 == tokenText.Length ||
                                         (iChar + 1 < tokenText.Length && !char.IsDigit(tokenText[iChar + 1]));
                    continue;
                }

                if (!m_categorizer.IsWordFormingCharacter(ch))
                {
                    continue;
                }

                if (m_categorizer.IsLower(ch))
                {
                    TextTokenSubstring tts = GetSubstring(tok, iChar);

                    if (!CheckForParaCapitalizationError(tok, tts, result) &&
                        !CheckForCharStyleCapilizationError(tok, tts, result) &&
                        m_fAtSentenceStart)
                    {
                        tts.Message = CapitalizationCheck.GetErrorMessage(m_checksDataSource,
                                                                          StyleCapInfo.CapCheckTypes.SentenceInitial, string.Empty);
                        result.Add(tts);
                    }
                }
                m_fAtSentenceStart   = false;
                m_foundCharacterText = true;
                m_foundParagraphText = true;
            }
        }
Ejemplo n.º 10
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Checks if the last two pair tokens in the found pair tokens are a matched pair.
        /// If so, they are removed from the found list since a matched set has been complete.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void RemoveIfMatchedPairFound()
        {
            if (m_pairTokensFound.Count < 2)
            {
                return;
            }

            TextTokenSubstring possibleClose = m_pairTokensFound[m_pairTokensFound.Count - 1];
            TextTokenSubstring possibleOpen  = m_pairTokensFound[m_pairTokensFound.Count - 2];

            if (m_pairList.IsMatchedPair(possibleOpen.Text, possibleClose.Text))
            {
                // Found a matched pair, remove last two tokens
                m_pairTokensFound.RemoveAt(m_pairTokensFound.Count - 1);
                m_pairTokensFound.RemoveAt(m_pairTokensFound.Count - 1);
            }
        }
Ejemplo n.º 11
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        ///
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void RecordOverlappingPairs()
        {
            if (m_pairTokensFound.Count < 4)
            {
                return;
            }

            TextTokenSubstring tok1 = m_pairTokensFound[m_pairTokensFound.Count - 4];
            TextTokenSubstring tok2 = m_pairTokensFound[m_pairTokensFound.Count - 3];
            TextTokenSubstring tok3 = m_pairTokensFound[m_pairTokensFound.Count - 2];
            TextTokenSubstring tok4 = m_pairTokensFound[m_pairTokensFound.Count - 1];

            // Check if pairs are overlapping.
            if (m_pairList.IsOpen(tok1.Text) && m_pairList.IsOpen(tok2.Text) &&
                m_pairList.IsMatchedPair(tok1.Text, tok3.Text) &&
                m_pairList.IsMatchedPair(tok2.Text, tok4.Text))
            {
                // Found overlapping pairs, so record this by changing
                // the message in the needed TextTokenSubstrings
                string msg = m_checksDataSource.GetLocalizedString("Overlapping pair");
                tok1.Message = tok2.Message = tok3.Message = tok4.Message = msg;
            }
        }
Ejemplo n.º 12
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Initializes a new instance of the <see cref="ContextInfo"/> class.
 /// </summary>
 /// <param name="chr">The character or pattern to which this context applies.</param>
 /// <param name="tts">The TextTokenSubstring.</param>
 /// ------------------------------------------------------------------------------------
 internal ContextInfo(string chr, TextTokenSubstring tts)
     : this(chr, tts.Offset, tts.FullTokenText, tts.FirstToken.ScrRefString)
 {
 }
Ejemplo n.º 13
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Initializes a new instance of the <see cref="ContextInfo"/> class.
 /// </summary>
 /// <param name="pattern">The punctuation pattern.</param>
 /// <param name="tts">The TextTokenSubstring.</param>
 /// ------------------------------------------------------------------------------------
 internal ContextInfo(PuncPattern pattern, TextTokenSubstring tts) :
     this(pattern, tts.Offset, tts.FullTokenText, tts.FirstToken.ScrRefString)
 {
 }
Ejemplo n.º 14
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Get (invalid) character references.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey,
                                                        bool invalidCharactersOnly)
        {
            if (m_categorizer == null)
            {
                m_categorizer = m_checksDataSource.CharacterCategorizer;
            }

            m_characterSequences = new List <TextTokenSubstring>();
            Dictionary <string, Dictionary <string, bool> > htValidChars =
                new Dictionary <string, Dictionary <string, bool> >();
            Dictionary <string, bool> currentDictionary = null;
            string preferredLocale = m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty;

            foreach (ITextToken tok in tokens)
            {
                string locale = tok.Locale ?? string.Empty;

                if (tok.Text == null || (!invalidCharactersOnly && locale != preferredLocale))
                {
                    continue;
                }

                if (!htValidChars.TryGetValue(locale, out currentDictionary))
                {
                    currentDictionary = StringToDictionary(GetValidCharacters(locale));
                    htValidChars.Add(locale, currentDictionary);
                }

                int offset = 0;

                foreach (string key in ParseCharacterSequences(tok.Text))
                {
                    bool lookingForASpecificKey = (desiredKey != "");
                    bool keyMatches             = (desiredKey == key);
                    bool invalidItem            = false;

                    if (invalidCharactersOnly)
                    {
                        // REVIEW (BobbydV): IndexOf causes false positives for certain
                        // characters (e.g., U+0234 & U+1234). I think Contains is easier to read
                        // and should work for both TE and Paratext for the "AlwaysValidCharacters"
                        // list. (TomB)
                        if (!m_alwaysValidCharacters.Contains(key) &&
                            !currentDictionary.ContainsKey(key))
                        {
                            invalidItem = true;
                        }
                    }

                    if ((lookingForASpecificKey && keyMatches) ||
                        (!lookingForASpecificKey && !invalidCharactersOnly) ||
                        (invalidCharactersOnly && invalidItem))
                    {
                        TextTokenSubstring tts = new TextTokenSubstring(tok, offset, key.Length);
                        m_characterSequences.Add(tts);
                    }

                    offset += key.Length;
                }
            }

            return(m_characterSequences);
        }