/// ------------------------------------------------------------------------------------ /// <summary> /// Add punctuation to list /// </summary> /// <param name="tok">The text token</param> /// <param name="i">The index of the punctuation character</param> /// ------------------------------------------------------------------------------------ private void ProcessPunctuation(ITextToken tok, int i) { TextTokenSubstring tts = new TextTokenSubstring(tok, i, 1); bool isInitial = m_quotationCategorizer.IsInitialPunctuation(tts.Text); bool isFinal = m_quotationCategorizer.IsFinalPunctuation(tts.Text); m_puncts.Add(new PunctuationToken(PunctuationTokenType.punctuation, tts, isInitial, isFinal)); // special case: treat a sequence like // opening quotation punctuation/space/opening quotation punctuation // as if the space were not there. an example of this would be // U+201C LEFT DOUBLE QUOTATION MARK // U+0020 SPACE // U+2018 LEFT SINGLE QUOTATION MARK // this allows a quotation mark to be considered word initial even if it is followed by a space if (m_puncts.Count >= 3) { // If the last three tokens are punctuation/whitespace/punctuation if (m_puncts[m_puncts.Count - 2].TokenType == PunctuationTokenType.whitespace && !m_puncts[m_puncts.Count - 2].IsParaBreak && m_puncts[m_puncts.Count - 3].TokenType == PunctuationTokenType.punctuation) { // And both punctuation have quote directions which point in the same direction, if (m_puncts[m_puncts.Count - 3].IsInitial && m_puncts[m_puncts.Count - 1].IsInitial || m_puncts[m_puncts.Count - 3].IsFinal && m_puncts[m_puncts.Count - 1].IsFinal) { // THEN mark the whitespace as a quote separator. m_puncts[m_puncts.Count - 2].TokenType = PunctuationTokenType.quoteSeparator; } } } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Creates a checking error if character style requires an initial uppercase letter, /// but the tssFirstLetter is lowercase. /// </summary> /// <param name="tok">The Scripture token.</param> /// <param name="ttsFirstLetter">The token substring of the first word-forming character /// in the given token.</param> /// <param name="result">The result.</param> /// <returns><c>true</c> if an error was added to the list of results; otherwise /// <c>false</c></returns> /// ------------------------------------------------------------------------------------ private bool CheckForCharStyleCapilizationError(ITextToken tok, TextTokenSubstring ttsFirstLetter, List <TextTokenSubstring> result) { if (m_foundCharacterText) { return(false); } m_foundCharacterText = true; // The first word-forming character of the character style is lowercase. // Look it up in the capitalized styles dictionary to determine if it should be uppercase. StyleCapInfo styleCapInfo; if (m_allCapitalizedStyles.TryGetValue(m_characterStyle, out styleCapInfo) && styleCapInfo.m_type == StyleInfo.StyleTypes.character) { ttsFirstLetter.InventoryText = m_characterStyle; ttsFirstLetter.Message = CapitalizationCheck.GetErrorMessage(m_checksDataSource, styleCapInfo.m_capCheck, m_characterStyle); result.Add(ttsFirstLetter); return(true); } return(false); }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// ------------------------------------------------------------------------------------ private void StoreFoundPairToken(ITextToken tok, int i) { TextTokenSubstring tts = new TextTokenSubstring(tok, i, 1); // Assign an initial, default message which may be changed later tts.Message = m_checksDataSource.GetLocalizedString("Unmatched punctuation"); m_pairTokensFound.Add(tts); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="PunctuationToken"/> class. /// </summary> /// <param name="tokenType">Type of the token.</param> /// <param name="tts">The TextTokenSubstring.</param> /// <param name="isInitial">if set to <c>true</c> is opening quotation mark.</param> /// <param name="isFinal">if set to <c>true</c> is closing quotation mark.</param> /// ------------------------------------------------------------------------------------ public PunctuationToken(PunctuationTokenType tokenType, TextTokenSubstring tts, bool isInitial, bool isFinal) { TokenType = tokenType; Tts = tts; IsInitial = isInitial; IsFinal = isFinal; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets the substring for the character starting at position iChar. /// </summary> /// <param name="tok">The token</param> /// <param name="iChar">The index of the character.</param> /// ------------------------------------------------------------------------------------ private TextTokenSubstring GetSubstring(ITextToken tok, int iChar) { int iCharLength = GetLengthOfChar(tok, iChar); TextTokenSubstring tts = new TextTokenSubstring((tok is VerseTextToken ? ((VerseTextToken)tok).Token : tok), iChar, iCharLength); return(tts); }
private void AddWord(ITextToken tok, WordAndPunct wap) { TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length); if (desiredKey == "" || desiredKey == tts.InventoryText) { result.Add(tts); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// <param name="tok"></param> /// <param name="wap"></param> /// <param name="desiredKey"></param> /// ------------------------------------------------------------------------------------ private void AddWord(ITextToken tok, WordAndPunct wap, string desiredKey) { TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length); if (String.IsNullOrEmpty(desiredKey) || desiredKey == tts.InventoryText) { m_result.Add(tts); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Records an error. /// </summary> /// <param name="token">The current token being processed.</param> /// <param name="offset">Offset in the token where the offending text begins.</param> /// <param name="length">The length of the offending text.</param> /// <param name="message">The message.</param> /// <param name="args">The arguments to format the message.</param> /// ------------------------------------------------------------------------------------ private void AddError(ITextToken token, int offset, int length, string message, params object[] args) { string formattedMsg = (args != null) ? string.Format(message, args) : String.Format(message); TextTokenSubstring tts = new TextTokenSubstring(token, offset, length, formattedMsg); m_recordError(new RecordErrorEventArgs(tts, CheckId)); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Processes the Scripture token. /// </summary> /// <param name="tok">The token.</param> /// <param name="result">The result.</param> /// ------------------------------------------------------------------------------------ public void ProcessToken(ITextToken tok, List <TextTokenSubstring> result) { string tokenText = RemoveAbbreviations(tok); RecordParagraphStyle(tok); RecordCharacterStyle(tok); // must be at least one character in token to check the case of if (tok.Text == String.Empty) { return; } for (int iChar = 0; iChar < tokenText.Length; iChar++) { char ch = tokenText[iChar]; if (IsSentenceFinalPunctuation(ch)) { m_fAtSentenceStart = iChar + 1 == tokenText.Length || (iChar + 1 < tokenText.Length && !char.IsDigit(tokenText[iChar + 1])); continue; } if (!m_categorizer.IsWordFormingCharacter(ch)) { continue; } if (m_categorizer.IsLower(ch)) { TextTokenSubstring tts = GetSubstring(tok, iChar); if (!CheckForParaCapitalizationError(tok, tts, result) && !CheckForCharStyleCapilizationError(tok, tts, result) && m_fAtSentenceStart) { tts.Message = CapitalizationCheck.GetErrorMessage(m_checksDataSource, StyleCapInfo.CapCheckTypes.SentenceInitial, string.Empty); result.Add(tts); } } m_fAtSentenceStart = false; m_foundCharacterText = true; m_foundParagraphText = true; } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Checks if the last two pair tokens in the found pair tokens are a matched pair. /// If so, they are removed from the found list since a matched set has been complete. /// </summary> /// ------------------------------------------------------------------------------------ private void RemoveIfMatchedPairFound() { if (m_pairTokensFound.Count < 2) { return; } TextTokenSubstring possibleClose = m_pairTokensFound[m_pairTokensFound.Count - 1]; TextTokenSubstring possibleOpen = m_pairTokensFound[m_pairTokensFound.Count - 2]; if (m_pairList.IsMatchedPair(possibleOpen.Text, possibleClose.Text)) { // Found a matched pair, remove last two tokens m_pairTokensFound.RemoveAt(m_pairTokensFound.Count - 1); m_pairTokensFound.RemoveAt(m_pairTokensFound.Count - 1); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// ------------------------------------------------------------------------------------ private void RecordOverlappingPairs() { if (m_pairTokensFound.Count < 4) { return; } TextTokenSubstring tok1 = m_pairTokensFound[m_pairTokensFound.Count - 4]; TextTokenSubstring tok2 = m_pairTokensFound[m_pairTokensFound.Count - 3]; TextTokenSubstring tok3 = m_pairTokensFound[m_pairTokensFound.Count - 2]; TextTokenSubstring tok4 = m_pairTokensFound[m_pairTokensFound.Count - 1]; // Check if pairs are overlapping. if (m_pairList.IsOpen(tok1.Text) && m_pairList.IsOpen(tok2.Text) && m_pairList.IsMatchedPair(tok1.Text, tok3.Text) && m_pairList.IsMatchedPair(tok2.Text, tok4.Text)) { // Found overlapping pairs, so record this by changing // the message in the needed TextTokenSubstrings string msg = m_checksDataSource.GetLocalizedString("Overlapping pair"); tok1.Message = tok2.Message = tok3.Message = tok4.Message = msg; } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="ContextInfo"/> class. /// </summary> /// <param name="chr">The character or pattern to which this context applies.</param> /// <param name="tts">The TextTokenSubstring.</param> /// ------------------------------------------------------------------------------------ internal ContextInfo(string chr, TextTokenSubstring tts) : this(chr, tts.Offset, tts.FullTokenText, tts.FirstToken.ScrRefString) { }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="ContextInfo"/> class. /// </summary> /// <param name="pattern">The punctuation pattern.</param> /// <param name="tts">The TextTokenSubstring.</param> /// ------------------------------------------------------------------------------------ internal ContextInfo(PuncPattern pattern, TextTokenSubstring tts) : this(pattern, tts.Offset, tts.FullTokenText, tts.FirstToken.ScrRefString) { }
/// ------------------------------------------------------------------------------------ /// <summary> /// Get (invalid) character references. /// </summary> /// ------------------------------------------------------------------------------------ private List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey, bool invalidCharactersOnly) { if (m_categorizer == null) { m_categorizer = m_checksDataSource.CharacterCategorizer; } m_characterSequences = new List <TextTokenSubstring>(); Dictionary <string, Dictionary <string, bool> > htValidChars = new Dictionary <string, Dictionary <string, bool> >(); Dictionary <string, bool> currentDictionary = null; string preferredLocale = m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty; foreach (ITextToken tok in tokens) { string locale = tok.Locale ?? string.Empty; if (tok.Text == null || (!invalidCharactersOnly && locale != preferredLocale)) { continue; } if (!htValidChars.TryGetValue(locale, out currentDictionary)) { currentDictionary = StringToDictionary(GetValidCharacters(locale)); htValidChars.Add(locale, currentDictionary); } int offset = 0; foreach (string key in ParseCharacterSequences(tok.Text)) { bool lookingForASpecificKey = (desiredKey != ""); bool keyMatches = (desiredKey == key); bool invalidItem = false; if (invalidCharactersOnly) { // REVIEW (BobbydV): IndexOf causes false positives for certain // characters (e.g., U+0234 & U+1234). I think Contains is easier to read // and should work for both TE and Paratext for the "AlwaysValidCharacters" // list. (TomB) if (!m_alwaysValidCharacters.Contains(key) && !currentDictionary.ContainsKey(key)) { invalidItem = true; } } if ((lookingForASpecificKey && keyMatches) || (!lookingForASpecificKey && !invalidCharactersOnly) || (invalidCharactersOnly && invalidItem)) { TextTokenSubstring tts = new TextTokenSubstring(tok, offset, key.Length); m_characterSequences.Add(tts); } offset += key.Length; } } return(m_characterSequences); }