/// ------------------------------------------------------------------------------------ /// <summary> /// Treat each punctuation sequence as a single string, breaking the pattern at each /// whitespace (Except for whitespace between pairs of quotes that are both in the /// same direction (both opening or closing quotes). /// </summary> /// <param name="desiredKey">If specified, indicates a specific punctuation pattern to /// seek (all others will be discarded); To retrieve all punctation substrings, specify /// the empty string.</param> /// <param name="result">List of TextTokenSubstring items that will be added to</param> /// ------------------------------------------------------------------------------------ private void IntermediateFinalize(string desiredKey, List <TextTokenSubstring> result) { // concatanate all the punctuation sequences into one string string pattern = ""; PunctuationToken pTok = null; PunctuationToken tok2; for (int i = 0; i < m_puncts.Count; ++i) { tok2 = m_puncts[i]; pattern += tok2.ToString(); // Every generated result must start with a punctuation character. // If we do not currently have a punctuation character (because it // null'ed below) remember this one. if (tok2.TokenType == PunctuationTokenType.punctuation || tok2.TokenType == PunctuationTokenType.quoteSeparator) { Debug.Assert(pTok != null || tok2.TokenType == PunctuationTokenType.punctuation, "Quote separator should never be the first non-whitespace character in a sequence (after all, it IS whitespace!)"); if (pTok == null) { pTok = tok2; } else { if (tok2.Tts != null && pTok.Tts.LastToken != tok2.Tts.FirstToken) { Debug.Assert(tok2.Tts.FirstToken == tok2.Tts.LastToken); pTok.Tts.AddToken(tok2.Tts.FirstToken); } pTok.Tts++; } } // Generate a pattern when you see a non-leading whitespace or end of list if (tok2.TokenType == PunctuationTokenType.whitespace || i == m_puncts.Count - 1) { if (pTok != null) // Must have a punctuation token { pTok.Tts.InventoryText = pattern; if (desiredKey == "" || desiredKey == pTok.Tts.InventoryText) { result.Add(pTok.Tts); } } // Reset pattern to match this token if (tok2.TokenType == PunctuationTokenType.whitespace) { pattern = tok2.ToString(); } else { pattern = ""; } pTok = null; } } }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// <param name="desiredKey"></param> /// <param name="result"></param> /// <param name="addWhitespace"></param> /// ------------------------------------------------------------------------------------ public void FinalizeResult(string desiredKey, List <TextTokenSubstring> result, bool addWhitespace) { // If a digit caused FinalizeResult() to be called set a flag, otherwise clear the flag. // This flag is tested to help see if a punctuation character occurs between two digits. m_finalizedWithNumber = (m_puncts.Count > 0 && m_puncts[m_puncts.Count - 1].TokenType == PunctuationTokenType.number); // if no punctuation character is found clear sequence and quit PunctuationToken currentPTok = null; foreach (PunctuationToken pTok in m_puncts) { if (pTok.TokenType == PunctuationTokenType.punctuation) { currentPTok = pTok; break; } } if (currentPTok == null) { m_puncts.Clear(); return; } // if we have been requested to treat this sequence as if it were followed by whitespace, // then add a space to the sequence. This happens, for example, at the end of a footnote. // \f + text.\f* otherwise the . would be considered word medial instead of word final if (addWhitespace) { ProcessWhitespaceOrParagraph(false); } switch (m_level) { case CheckingLevel.Advanced: AdvancedFinalize(currentPTok, desiredKey, result); break; case CheckingLevel.Intermediate: IntermediateFinalize(desiredKey, result); break; case CheckingLevel.Basic: BasicFinalize(desiredKey, result); break; } m_puncts.Clear(); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Treat each punctuation and whitespace sequence as a single string. It is called /// advanced since many more inventory items for the user to look at, and only advanced /// users (we hope) will look at these results. /// </summary> /// <param name="pTok">The current punctuation token, whose TextToken substring is /// modified to indicate a pattern of multiple punctuation characters</param> /// <param name="desiredKey">If specified, indicates a specific punctuation pattern to /// seek (all others will be discarded); To retrieve all punctation substrings, specify /// the empty string.</param> /// <param name="result">List of TextTokenSubstring items that will be added to</param> /// ------------------------------------------------------------------------------------ private void AdvancedFinalize(PunctuationToken pTok, string desiredKey, List <TextTokenSubstring> result) { // concatanate all the punctuation sequences into one string string pattern = String.Empty; foreach (PunctuationToken pTok2 in m_puncts) { //System.Diagnostics.Debug.Assert(pTok2.Tts == null || pTok2.Tts.Token == pTok.Tts.Token); pattern += pTok2.ToString(); } pTok.Tts.InventoryText = pattern; if (desiredKey == String.Empty || desiredKey == pTok.Tts.InventoryText) { result.Add(pTok.Tts); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Treat each punctuation and whitespace sequence as a single string. It is called /// advanced since many more inventory items for the user to look at, and only advanced /// users (we hope) will look at these results. /// </summary> /// <param name="pTok">The current punctuation token, whose TextToken substring is /// modified to indicate a pattern of multiple punctuation characters</param> /// <param name="desiredKey">If specified, indicates a specific punctuation pattern to /// seek (all others will be discarded); To retrieve all punctation substrings, specify /// the empty string.</param> /// <param name="result">List of TextTokenSubstring items that will be added to</param> /// ------------------------------------------------------------------------------------ private void AdvancedFinalize(PunctuationToken pTok, string desiredKey, List<TextTokenSubstring> result) { // concatanate all the punctuation sequences into one string string pattern = String.Empty; foreach (PunctuationToken pTok2 in m_puncts) { //System.Diagnostics.Debug.Assert(pTok2.Tts == null || pTok2.Tts.Token == pTok.Tts.Token); pattern += pTok2.ToString(); } pTok.Tts.InventoryText = pattern; if (desiredKey == String.Empty || desiredKey == pTok.Tts.InventoryText) result.Add(pTok.Tts); }