Example #1
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets the sentence final punctuation from the punctuation patterns for the given
        /// writing system.
        /// </summary>
        /// <param name="ws">The writing system.</param>
        /// <param name="unicodeCharProps">>The unicode character properties engine.</param>
        /// <returns>sentence final punctuation patterns for this writing system</returns>
        /// ------------------------------------------------------------------------------------
        private string GetSentenceFinalPunctuation(IWritingSystem ws, ILgCharacterPropertyEngine unicodeCharProps)
        {
            string punctuationPatterns = ws.PunctuationPatterns;

            if (!string.IsNullOrEmpty(punctuationPatterns) && punctuationPatterns.Trim().Length > 0)
            {
                var strBldr = new StringBuilder();
                PuncPatternsList puncPatternsList = PuncPatternsList.Load(punctuationPatterns,
                                                                          ws.DisplayLabel);
                // Scan through all the punctuation patterns for this writing system.
                foreach (PuncPattern pattern in puncPatternsList)
                {
                    // For each valid pattern...
                    if (pattern.Status == PuncPatternStatus.Valid &&
                        pattern.ContextPos == ContextPosition.WordFinal)
                    {
                        // scan through the pattern string...
                        foreach (char puncChar in pattern.Pattern)
                        {
                            // and search for sentence-final punctuation patterns that have not yet been added.
                            if (TsStringUtils.IsEndOfSentenceChar(puncChar,
                                                                  unicodeCharProps.get_GeneralCategory(puncChar)) &&
                                strBldr.ToString().IndexOf(puncChar) == -1)
                            {
                                strBldr.Append(puncChar);
                            }
                        }
                    }
                }
                return(strBldr.ToString());
            }

            return(string.Empty);
        }
Example #2
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Return a TextTokenSubstring for all occurances of the desiredKey.
        /// </summary>
        /// <param name="tokens"></param>
        /// <param name="desiredKey">e.g., _[_ or empty string to look for all patterns</param>
        /// <returns></returns>
        /// ------------------------------------------------------------------------------------
        public List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey)
        {
#if DEBUG
            List <ITextToken> AllTokens = new List <ITextToken>(tokens);
            if (AllTokens.Count == 0)
            {
                // Keep the compiler from complaining about assigning to a variable, but not using it.
            }
#endif
            m_characterCategorizer = m_checksDataSource.CharacterCategorizer;
            string sXmlMatchedPairs = m_checksDataSource.GetParameterValue("PunctuationPatterns");
            if (sXmlMatchedPairs != null && sXmlMatchedPairs.Trim().Length > 0)
            {
                m_validItemsList   = new List <string>();
                m_invalidItemsList = new List <string>();
                PuncPatternsList puncPatternsList = PuncPatternsList.Load(sXmlMatchedPairs,
                                                                          m_checksDataSource.GetParameterValue("DefaultWritingSystemName"));
                foreach (PuncPattern pattern in puncPatternsList)
                {
                    if (pattern.Valid)
                    {
                        m_validItemsList.Add(pattern.Pattern);
                    }
                    else
                    {
                        m_invalidItemsList.Add(pattern.Pattern);
                    }
                }
            }
            else
            {
                ValidItems   = m_checksDataSource.GetParameterValue(kValidItemsParameter);
                InvalidItems = m_checksDataSource.GetParameterValue(kInvalidItemsParameter);
            }

            string        sLevel = m_checksDataSource.GetParameterValue("PunctCheckLevel");
            CheckingLevel level;
            switch (sLevel)
            {
            case "Advanced": level = CheckingLevel.Advanced; break;

            case "Intermediate": level = CheckingLevel.Intermediate; break;

            case "Basic":
            default:
                level = CheckingLevel.Basic;
                break;
            }
            string sWhitespaceRep = m_checksDataSource.GetParameterValue("PunctWhitespaceChar");
            if (!String.IsNullOrEmpty(sWhitespaceRep))
            {
                s_whitespaceRep = sWhitespaceRep.Substring(0, 1);
            }
            string preferredLocale =
                m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty;

            QuotationMarkCategorizer quotationCategorizer =
                new QuotationMarkCategorizer(m_checksDataSource);

            // create processing state machines, one for body text, one for notes
            ProcessPunctationTokens bodyProcessor = new ProcessPunctationTokens(
                m_characterCategorizer, quotationCategorizer, level);

            ProcessPunctationTokens noteProcessor = new ProcessPunctationTokens(
                m_characterCategorizer, quotationCategorizer, level);

            m_punctuationSequences = new List <TextTokenSubstring>();

            // build list of note and non-note tokens
            foreach (ITextToken tok in tokens)
            {
                if (tok.Text == null || (tok.Locale ?? string.Empty) != preferredLocale)
                {
                    continue;
                }

                if (tok.TextType == TextType.Note)
                {
                    // if a new note is starting finalize any punctuation sequences from the previous note
                    if (tok.IsNoteStart)
                    {
                        noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
                    }
                    noteProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences);
                }
                else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other)
                {
                    // body text: finalize any note that was in progress and continue with body text
                    noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
                    bodyProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences);
                }
                else if (tok.IsParagraphStart)
                {
                    bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
                    bodyProcessor.TreatAsParagraphStart = true;
                }
            }

            noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
            bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);

            return(m_punctuationSequences);
        }