Example #1
0
        public void Check_ValidPatternsAreNotReported()
        {
            PuncPatternsList puncPatterns = new PuncPatternsList();
            PuncPattern      pattern      = new PuncPattern();

            pattern.Pattern    = "._";
            pattern.ContextPos = ContextPosition.WordFinal;
            pattern.Status     = PuncPatternStatus.Valid;
            puncPatterns.Add(pattern);
            pattern            = new PuncPattern();
            pattern.Pattern    = ",";
            pattern.ContextPos = ContextPosition.WordBreaking;
            pattern.Status     = PuncPatternStatus.Invalid;
            puncPatterns.Add(pattern);
            m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString);
            m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate");

            PunctuationCheck check = new PunctuationCheck(m_dataSource);

            m_dataSource.Text = "\\p This is nice. By nice,I mean really nice!";

            check.Check(m_dataSource.TextTokens(), RecordError);

            Assert.AreEqual(2, m_errors.Count);
            CheckError(0, "This is nice. By nice,I mean really nice!", 21, ",", "Invalid punctuation pattern");
            CheckError(1, "This is nice. By nice,I mean really nice!", 40, "!", "Unspecified use of punctuation pattern");
        }
Example #2
0
        public void Check_PatternsWithSpaceSeparatedQuoteMarks()
        {
            PuncPatternsList puncPatterns = new PuncPatternsList();
            PuncPattern      pattern      = new PuncPattern();

            pattern.Pattern    = ",_";
            pattern.ContextPos = ContextPosition.WordFinal;
            pattern.Status     = PuncPatternStatus.Valid;
            puncPatterns.Add(pattern);
            pattern            = new PuncPattern();
            pattern.Pattern    = "_\u201C";
            pattern.ContextPos = ContextPosition.WordInitial;
            pattern.Status     = PuncPatternStatus.Valid;
            puncPatterns.Add(pattern);
            pattern            = new PuncPattern();
            pattern.Pattern    = "_\u2018";
            pattern.ContextPos = ContextPosition.WordInitial;
            pattern.Status     = PuncPatternStatus.Valid;
            puncPatterns.Add(pattern);
            m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString);
            m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate");

            PunctuationCheck check = new PunctuationCheck(m_dataSource);

            m_dataSource.Text = "\\p Tom replied, \u201CBill said, \u2018Yes!\u2019\u202F\u201D";

            check.Check(m_dataSource.TextTokens(), RecordError);

            Assert.AreEqual(1, m_errors.Count);
            CheckError(0, "Tom replied, \u201CBill said, \u2018Yes!\u2019\u202F\u201D", 29, "!\u2019\u202F\u201D", "Unspecified use of punctuation pattern");
        }
Example #3
0
        public void GetParameterValue_SentenceFinalPunctuationNR()
        {
            // Set up the punctuation for a Roman-script vernacular writing system.
            int            hvoWs = Cache.DefaultVernWs;
            IWritingSystem ws    = Cache.ServiceLocator.WritingSystemManager.Get(hvoWs);
            // We add the following Arabic punctuation: percent sign (066A), decimal separator (066B),
            // thousands separator (066C), five pointed star (066D), full stop (06D4), question mark (061F)
            var list = new PuncPatternsList
            {
                new PuncPattern("\u066A ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
                new PuncPattern("\u066B ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
                new PuncPattern("\u066C ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
                new PuncPattern("\u066D ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
                new PuncPattern("\u06D4 ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
                new PuncPattern("\u061F ", ContextPosition.WordFinal, PuncPatternStatus.Valid)
            };

            ws.PunctuationPatterns = list.XmlString;

            // Get the sentence-final punctuation
            string sentenceFinalPunc = m_dataSource.GetParameterValue("SentenceFinalPunctuation");

            // We expect that only sentence-final punctuation would be returned.
            Assert.AreEqual("\u06D4\u061F", sentenceFinalPunc);
        }
Example #4
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets the sentence final punctuation from the punctuation patterns for the given
        /// writing system.
        /// </summary>
        /// <param name="ws">The writing system.</param>
        /// <param name="unicodeCharProps">>The unicode character properties engine.</param>
        /// <returns>sentence final punctuation patterns for this writing system</returns>
        /// ------------------------------------------------------------------------------------
        private string GetSentenceFinalPunctuation(IWritingSystem ws, ILgCharacterPropertyEngine unicodeCharProps)
        {
            string punctuationPatterns = ws.PunctuationPatterns;

            if (!string.IsNullOrEmpty(punctuationPatterns) && punctuationPatterns.Trim().Length > 0)
            {
                var strBldr = new StringBuilder();
                PuncPatternsList puncPatternsList = PuncPatternsList.Load(punctuationPatterns,
                                                                          ws.DisplayLabel);
                // Scan through all the punctuation patterns for this writing system.
                foreach (PuncPattern pattern in puncPatternsList)
                {
                    // For each valid pattern...
                    if (pattern.Status == PuncPatternStatus.Valid &&
                        pattern.ContextPos == ContextPosition.WordFinal)
                    {
                        // scan through the pattern string...
                        foreach (char puncChar in pattern.Pattern)
                        {
                            // and search for sentence-final punctuation patterns that have not yet been added.
                            if (TsStringUtils.IsEndOfSentenceChar(puncChar,
                                                                  unicodeCharProps.get_GeneralCategory(puncChar)) &&
                                strBldr.ToString().IndexOf(puncChar) == -1)
                            {
                                strBldr.Append(puncChar);
                            }
                        }
                    }
                }
                return(strBldr.ToString());
            }

            return(string.Empty);
        }
Example #5
0
        public void Check_ParaWithSingleQuotationMark()
        {
            PuncPatternsList puncPatterns = new PuncPatternsList();
            PuncPattern      pattern      = new PuncPattern();

            pattern.Pattern    = "._";
            pattern.ContextPos = ContextPosition.WordFinal;
            pattern.Status     = PuncPatternStatus.Valid;
            puncPatterns.Add(pattern);
            m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString);
            m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate");

            PunctuationCheck check = new PunctuationCheck(m_dataSource);

            m_dataSource.Text = "\\p wow\u201D\\p \u2019";

            check.Check(m_dataSource.TextTokens(), RecordError);

            Assert.AreEqual(2, m_errors.Count);
            CheckError(0, "wow\u201D", 3, "\u201D", "Unspecified use of punctuation pattern");
            CheckError(1, "\u2019", 0, "\u2019", "Unspecified use of punctuation pattern");
        }
Example #6
0
        public void GetParameterValue_SentenceFinalPunctuation()
        {
            // Set up the punctuation for a Roman-script vernacular writing system.
            int              hvoWs = Cache.DefaultVernWs;
            IWritingSystem   ws    = Cache.ServiceLocator.WritingSystemManager.Get(hvoWs);
            PuncPatternsList list  = new PuncPatternsList();

            list.Add(new PuncPattern(". ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
            list.Add(new PuncPattern("? ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
            list.Add(new PuncPattern("! ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
            list.Add(new PuncPattern("; ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
            list.Add(new PuncPattern("- ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
            list.Add(new PuncPattern(") ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
            list.Add(new PuncPattern("( ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
            list.Add(new PuncPattern(": ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
            ws.PunctuationPatterns = list.XmlString;

            // Get the sentence-final punctuation
            string sentenceFinalPunc = m_dataSource.GetParameterValue("SentenceFinalPunctuation");

            // We expect that only sentence-final punctuation would be returned.
            Assert.AreEqual(".?!", sentenceFinalPunc);
        }
		public void GetParameterValue_SentenceFinalPunctuation()
		{
			// Set up the punctuation for a Roman-script vernacular writing system.
			int hvoWs = Cache.DefaultVernWs;
			IWritingSystem ws = Cache.ServiceLocator.WritingSystemManager.Get(hvoWs);
			PuncPatternsList list = new PuncPatternsList();
			list.Add(new PuncPattern(". ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
			list.Add(new PuncPattern("? ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
			list.Add(new PuncPattern("! ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
			list.Add(new PuncPattern("; ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
			list.Add(new PuncPattern("- ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
			list.Add(new PuncPattern(") ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
			list.Add(new PuncPattern("( ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
			list.Add(new PuncPattern(": ", ContextPosition.WordFinal, PuncPatternStatus.Valid));
			ws.PunctuationPatterns = list.XmlString;

			// Get the sentence-final punctuation
			string sentenceFinalPunc = m_dataSource.GetParameterValue("SentenceFinalPunctuation");

			// We expect that only sentence-final punctuation would be returned.
			Assert.AreEqual(".?!", sentenceFinalPunc);
		}
		public void GetParameterValue_SentenceFinalPunctuationNR()
		{
			// Set up the punctuation for a Roman-script vernacular writing system.
			int hvoWs = Cache.DefaultVernWs;
			IWritingSystem ws = Cache.ServiceLocator.WritingSystemManager.Get(hvoWs);
			// We add the following Arabic punctuation: percent sign (066A), decimal separator (066B),
			// thousands separator (066C), five pointed star (066D), full stop (06D4), question mark (061F)
			var list = new PuncPatternsList
						{
							new PuncPattern("\u066A ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
							new PuncPattern("\u066B ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
							new PuncPattern("\u066C ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
							new PuncPattern("\u066D ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
							new PuncPattern("\u06D4 ", ContextPosition.WordFinal, PuncPatternStatus.Valid),
							new PuncPattern("\u061F ", ContextPosition.WordFinal, PuncPatternStatus.Valid)
						};
			ws.PunctuationPatterns = list.XmlString;

			// Get the sentence-final punctuation
			string sentenceFinalPunc = m_dataSource.GetParameterValue("SentenceFinalPunctuation");

			// We expect that only sentence-final punctuation would be returned.
			Assert.AreEqual("\u06D4\u061F", sentenceFinalPunc);
		}
Example #9
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Return a TextTokenSubstring for all occurances of the desiredKey.
        /// </summary>
        /// <param name="tokens"></param>
        /// <param name="desiredKey">e.g., _[_ or empty string to look for all patterns</param>
        /// <returns></returns>
        /// ------------------------------------------------------------------------------------
        public List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey)
        {
#if DEBUG
            List <ITextToken> AllTokens = new List <ITextToken>(tokens);
            if (AllTokens.Count == 0)
            {
                // Keep the compiler from complaining about assigning to a variable, but not using it.
            }
#endif
            m_characterCategorizer = m_checksDataSource.CharacterCategorizer;
            string sXmlMatchedPairs = m_checksDataSource.GetParameterValue("PunctuationPatterns");
            if (sXmlMatchedPairs != null && sXmlMatchedPairs.Trim().Length > 0)
            {
                m_validItemsList   = new List <string>();
                m_invalidItemsList = new List <string>();
                PuncPatternsList puncPatternsList = PuncPatternsList.Load(sXmlMatchedPairs,
                                                                          m_checksDataSource.GetParameterValue("DefaultWritingSystemName"));
                foreach (PuncPattern pattern in puncPatternsList)
                {
                    if (pattern.Valid)
                    {
                        m_validItemsList.Add(pattern.Pattern);
                    }
                    else
                    {
                        m_invalidItemsList.Add(pattern.Pattern);
                    }
                }
            }
            else
            {
                ValidItems   = m_checksDataSource.GetParameterValue(kValidItemsParameter);
                InvalidItems = m_checksDataSource.GetParameterValue(kInvalidItemsParameter);
            }

            string        sLevel = m_checksDataSource.GetParameterValue("PunctCheckLevel");
            CheckingLevel level;
            switch (sLevel)
            {
            case "Advanced": level = CheckingLevel.Advanced; break;

            case "Intermediate": level = CheckingLevel.Intermediate; break;

            case "Basic":
            default:
                level = CheckingLevel.Basic;
                break;
            }
            string sWhitespaceRep = m_checksDataSource.GetParameterValue("PunctWhitespaceChar");
            if (!String.IsNullOrEmpty(sWhitespaceRep))
            {
                s_whitespaceRep = sWhitespaceRep.Substring(0, 1);
            }
            string preferredLocale =
                m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty;

            QuotationMarkCategorizer quotationCategorizer =
                new QuotationMarkCategorizer(m_checksDataSource);

            // create processing state machines, one for body text, one for notes
            ProcessPunctationTokens bodyProcessor = new ProcessPunctationTokens(
                m_characterCategorizer, quotationCategorizer, level);

            ProcessPunctationTokens noteProcessor = new ProcessPunctationTokens(
                m_characterCategorizer, quotationCategorizer, level);

            m_punctuationSequences = new List <TextTokenSubstring>();

            // build list of note and non-note tokens
            foreach (ITextToken tok in tokens)
            {
                if (tok.Text == null || (tok.Locale ?? string.Empty) != preferredLocale)
                {
                    continue;
                }

                if (tok.TextType == TextType.Note)
                {
                    // if a new note is starting finalize any punctuation sequences from the previous note
                    if (tok.IsNoteStart)
                    {
                        noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
                    }
                    noteProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences);
                }
                else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other)
                {
                    // body text: finalize any note that was in progress and continue with body text
                    noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
                    bodyProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences);
                }
                else if (tok.IsParagraphStart)
                {
                    bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
                    bodyProcessor.TreatAsParagraphStart = true;
                }
            }

            noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);
            bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true);

            return(m_punctuationSequences);
        }
		public void Check_ParaWithSingleQuotationMark()
		{
			PuncPatternsList puncPatterns = new PuncPatternsList();
			PuncPattern pattern = new PuncPattern();
			pattern.Pattern = "._";
			pattern.ContextPos = ContextPosition.WordFinal;
			pattern.Status = PuncPatternStatus.Valid;
			puncPatterns.Add(pattern);
			m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString);
			m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate");

			PunctuationCheck check = new PunctuationCheck(m_dataSource);
			m_dataSource.Text = "\\p wow\u201D\\p \u2019";

			check.Check(m_dataSource.TextTokens(), RecordError);

			Assert.AreEqual(2, m_errors.Count);
			CheckError(0, "wow\u201D", 3, "\u201D", "Unspecified use of punctuation pattern");
			CheckError(1, "\u2019", 0, "\u2019", "Unspecified use of punctuation pattern");
		}
		public void Check_PatternsWithSpaceSeparatedQuoteMarks()
		{
			PuncPatternsList puncPatterns = new PuncPatternsList();
			PuncPattern pattern = new PuncPattern();
			pattern.Pattern = ",_";
			pattern.ContextPos = ContextPosition.WordFinal;
			pattern.Status = PuncPatternStatus.Valid;
			puncPatterns.Add(pattern);
			pattern = new PuncPattern();
			pattern.Pattern = "_\u201C";
			pattern.ContextPos = ContextPosition.WordInitial;
			pattern.Status = PuncPatternStatus.Valid;
			puncPatterns.Add(pattern);
			pattern = new PuncPattern();
			pattern.Pattern = "_\u2018";
			pattern.ContextPos = ContextPosition.WordInitial;
			pattern.Status = PuncPatternStatus.Valid;
			puncPatterns.Add(pattern);
			m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString);
			m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate");

			PunctuationCheck check = new PunctuationCheck(m_dataSource);

			m_dataSource.Text = "\\p Tom replied, \u201CBill said, \u2018Yes!\u2019\u202F\u201D";

			check.Check(m_dataSource.TextTokens(), RecordError);

			Assert.AreEqual(1, m_errors.Count);
			CheckError(0, "Tom replied, \u201CBill said, \u2018Yes!\u2019\u202F\u201D", 29, "!\u2019\u202F\u201D", "Unspecified use of punctuation pattern");
		}
		public void Check_ValidPatternsAreNotReported()
		{
			PuncPatternsList puncPatterns = new PuncPatternsList();
			PuncPattern pattern = new PuncPattern();
			pattern.Pattern = "._";
			pattern.ContextPos = ContextPosition.WordFinal;
			pattern.Status = PuncPatternStatus.Valid;
			puncPatterns.Add(pattern);
			pattern = new PuncPattern();
			pattern.Pattern = ",";
			pattern.ContextPos = ContextPosition.WordBreaking;
			pattern.Status = PuncPatternStatus.Invalid;
			puncPatterns.Add(pattern);
			m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString);
			m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate");

			PunctuationCheck check = new PunctuationCheck(m_dataSource);

			m_dataSource.Text = "\\p This is nice. By nice,I mean really nice!";

			check.Check(m_dataSource.TextTokens(), RecordError);

			Assert.AreEqual(2, m_errors.Count);
			CheckError(0, "This is nice. By nice,I mean really nice!", 21, ",", "Invalid punctuation pattern");
			CheckError(1, "This is nice. By nice,I mean really nice!", 40, "!", "Unspecified use of punctuation pattern");
		}