public void Check_ValidPatternsAreNotReported() { PuncPatternsList puncPatterns = new PuncPatternsList(); PuncPattern pattern = new PuncPattern(); pattern.Pattern = "._"; pattern.ContextPos = ContextPosition.WordFinal; pattern.Status = PuncPatternStatus.Valid; puncPatterns.Add(pattern); pattern = new PuncPattern(); pattern.Pattern = ","; pattern.ContextPos = ContextPosition.WordBreaking; pattern.Status = PuncPatternStatus.Invalid; puncPatterns.Add(pattern); m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString); m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate"); PunctuationCheck check = new PunctuationCheck(m_dataSource); m_dataSource.Text = "\\p This is nice. By nice,I mean really nice!"; check.Check(m_dataSource.TextTokens(), RecordError); Assert.AreEqual(2, m_errors.Count); CheckError(0, "This is nice. By nice,I mean really nice!", 21, ",", "Invalid punctuation pattern"); CheckError(1, "This is nice. By nice,I mean really nice!", 40, "!", "Unspecified use of punctuation pattern"); }
public void Check_PatternsWithSpaceSeparatedQuoteMarks() { PuncPatternsList puncPatterns = new PuncPatternsList(); PuncPattern pattern = new PuncPattern(); pattern.Pattern = ",_"; pattern.ContextPos = ContextPosition.WordFinal; pattern.Status = PuncPatternStatus.Valid; puncPatterns.Add(pattern); pattern = new PuncPattern(); pattern.Pattern = "_\u201C"; pattern.ContextPos = ContextPosition.WordInitial; pattern.Status = PuncPatternStatus.Valid; puncPatterns.Add(pattern); pattern = new PuncPattern(); pattern.Pattern = "_\u2018"; pattern.ContextPos = ContextPosition.WordInitial; pattern.Status = PuncPatternStatus.Valid; puncPatterns.Add(pattern); m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString); m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate"); PunctuationCheck check = new PunctuationCheck(m_dataSource); m_dataSource.Text = "\\p Tom replied, \u201CBill said, \u2018Yes!\u2019\u202F\u201D"; check.Check(m_dataSource.TextTokens(), RecordError); Assert.AreEqual(1, m_errors.Count); CheckError(0, "Tom replied, \u201CBill said, \u2018Yes!\u2019\u202F\u201D", 29, "!\u2019\u202F\u201D", "Unspecified use of punctuation pattern"); }
public void GetParameterValue_SentenceFinalPunctuationNR() { // Set up the punctuation for a Roman-script vernacular writing system. int hvoWs = Cache.DefaultVernWs; IWritingSystem ws = Cache.ServiceLocator.WritingSystemManager.Get(hvoWs); // We add the following Arabic punctuation: percent sign (066A), decimal separator (066B), // thousands separator (066C), five pointed star (066D), full stop (06D4), question mark (061F) var list = new PuncPatternsList { new PuncPattern("\u066A ", ContextPosition.WordFinal, PuncPatternStatus.Valid), new PuncPattern("\u066B ", ContextPosition.WordFinal, PuncPatternStatus.Valid), new PuncPattern("\u066C ", ContextPosition.WordFinal, PuncPatternStatus.Valid), new PuncPattern("\u066D ", ContextPosition.WordFinal, PuncPatternStatus.Valid), new PuncPattern("\u06D4 ", ContextPosition.WordFinal, PuncPatternStatus.Valid), new PuncPattern("\u061F ", ContextPosition.WordFinal, PuncPatternStatus.Valid) }; ws.PunctuationPatterns = list.XmlString; // Get the sentence-final punctuation string sentenceFinalPunc = m_dataSource.GetParameterValue("SentenceFinalPunctuation"); // We expect that only sentence-final punctuation would be returned. Assert.AreEqual("\u06D4\u061F", sentenceFinalPunc); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets the sentence final punctuation from the punctuation patterns for the given /// writing system. /// </summary> /// <param name="ws">The writing system.</param> /// <param name="unicodeCharProps">>The unicode character properties engine.</param> /// <returns>sentence final punctuation patterns for this writing system</returns> /// ------------------------------------------------------------------------------------ private string GetSentenceFinalPunctuation(IWritingSystem ws, ILgCharacterPropertyEngine unicodeCharProps) { string punctuationPatterns = ws.PunctuationPatterns; if (!string.IsNullOrEmpty(punctuationPatterns) && punctuationPatterns.Trim().Length > 0) { var strBldr = new StringBuilder(); PuncPatternsList puncPatternsList = PuncPatternsList.Load(punctuationPatterns, ws.DisplayLabel); // Scan through all the punctuation patterns for this writing system. foreach (PuncPattern pattern in puncPatternsList) { // For each valid pattern... if (pattern.Status == PuncPatternStatus.Valid && pattern.ContextPos == ContextPosition.WordFinal) { // scan through the pattern string... foreach (char puncChar in pattern.Pattern) { // and search for sentence-final punctuation patterns that have not yet been added. if (TsStringUtils.IsEndOfSentenceChar(puncChar, unicodeCharProps.get_GeneralCategory(puncChar)) && strBldr.ToString().IndexOf(puncChar) == -1) { strBldr.Append(puncChar); } } } } return(strBldr.ToString()); } return(string.Empty); }
public void Check_ParaWithSingleQuotationMark() { PuncPatternsList puncPatterns = new PuncPatternsList(); PuncPattern pattern = new PuncPattern(); pattern.Pattern = "._"; pattern.ContextPos = ContextPosition.WordFinal; pattern.Status = PuncPatternStatus.Valid; puncPatterns.Add(pattern); m_dataSource.SetParameterValue("PunctuationPatterns", puncPatterns.XmlString); m_dataSource.SetParameterValue("PunctCheckLevel", "Intermediate"); PunctuationCheck check = new PunctuationCheck(m_dataSource); m_dataSource.Text = "\\p wow\u201D\\p \u2019"; check.Check(m_dataSource.TextTokens(), RecordError); Assert.AreEqual(2, m_errors.Count); CheckError(0, "wow\u201D", 3, "\u201D", "Unspecified use of punctuation pattern"); CheckError(1, "\u2019", 0, "\u2019", "Unspecified use of punctuation pattern"); }
public void GetParameterValue_SentenceFinalPunctuation() { // Set up the punctuation for a Roman-script vernacular writing system. int hvoWs = Cache.DefaultVernWs; IWritingSystem ws = Cache.ServiceLocator.WritingSystemManager.Get(hvoWs); PuncPatternsList list = new PuncPatternsList(); list.Add(new PuncPattern(". ", ContextPosition.WordFinal, PuncPatternStatus.Valid)); list.Add(new PuncPattern("? ", ContextPosition.WordFinal, PuncPatternStatus.Valid)); list.Add(new PuncPattern("! ", ContextPosition.WordFinal, PuncPatternStatus.Valid)); list.Add(new PuncPattern("; ", ContextPosition.WordFinal, PuncPatternStatus.Valid)); list.Add(new PuncPattern("- ", ContextPosition.WordFinal, PuncPatternStatus.Valid)); list.Add(new PuncPattern(") ", ContextPosition.WordFinal, PuncPatternStatus.Valid)); list.Add(new PuncPattern("( ", ContextPosition.WordFinal, PuncPatternStatus.Valid)); list.Add(new PuncPattern(": ", ContextPosition.WordFinal, PuncPatternStatus.Valid)); ws.PunctuationPatterns = list.XmlString; // Get the sentence-final punctuation string sentenceFinalPunc = m_dataSource.GetParameterValue("SentenceFinalPunctuation"); // We expect that only sentence-final punctuation would be returned. Assert.AreEqual(".?!", sentenceFinalPunc); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Return a TextTokenSubstring for all occurances of the desiredKey. /// </summary> /// <param name="tokens"></param> /// <param name="desiredKey">e.g., _[_ or empty string to look for all patterns</param> /// <returns></returns> /// ------------------------------------------------------------------------------------ public List <TextTokenSubstring> GetReferences(IEnumerable <ITextToken> tokens, string desiredKey) { #if DEBUG List <ITextToken> AllTokens = new List <ITextToken>(tokens); if (AllTokens.Count == 0) { // Keep the compiler from complaining about assigning to a variable, but not using it. } #endif m_characterCategorizer = m_checksDataSource.CharacterCategorizer; string sXmlMatchedPairs = m_checksDataSource.GetParameterValue("PunctuationPatterns"); if (sXmlMatchedPairs != null && sXmlMatchedPairs.Trim().Length > 0) { m_validItemsList = new List <string>(); m_invalidItemsList = new List <string>(); PuncPatternsList puncPatternsList = PuncPatternsList.Load(sXmlMatchedPairs, m_checksDataSource.GetParameterValue("DefaultWritingSystemName")); foreach (PuncPattern pattern in puncPatternsList) { if (pattern.Valid) { m_validItemsList.Add(pattern.Pattern); } else { m_invalidItemsList.Add(pattern.Pattern); } } } else { ValidItems = m_checksDataSource.GetParameterValue(kValidItemsParameter); InvalidItems = m_checksDataSource.GetParameterValue(kInvalidItemsParameter); } string sLevel = m_checksDataSource.GetParameterValue("PunctCheckLevel"); CheckingLevel level; switch (sLevel) { case "Advanced": level = CheckingLevel.Advanced; break; case "Intermediate": level = CheckingLevel.Intermediate; break; case "Basic": default: level = CheckingLevel.Basic; break; } string sWhitespaceRep = m_checksDataSource.GetParameterValue("PunctWhitespaceChar"); if (!String.IsNullOrEmpty(sWhitespaceRep)) { s_whitespaceRep = sWhitespaceRep.Substring(0, 1); } string preferredLocale = m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty; QuotationMarkCategorizer quotationCategorizer = new QuotationMarkCategorizer(m_checksDataSource); // create processing state machines, one for body text, one for notes ProcessPunctationTokens bodyProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); ProcessPunctationTokens noteProcessor = new ProcessPunctationTokens( m_characterCategorizer, quotationCategorizer, level); m_punctuationSequences = new List <TextTokenSubstring>(); // build list of note and non-note tokens foreach (ITextToken tok in tokens) { if (tok.Text == null || (tok.Locale ?? string.Empty) != preferredLocale) { continue; } if (tok.TextType == TextType.Note) { // if a new note is starting finalize any punctuation sequences from the previous note if (tok.IsNoteStart) { noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); } noteProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.TextType == TextType.Verse || tok.TextType == TextType.Other) { // body text: finalize any note that was in progress and continue with body text noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.ProcessToken(tok, desiredKey, m_punctuationSequences); } else if (tok.IsParagraphStart) { bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.TreatAsParagraphStart = true; } } noteProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); bodyProcessor.FinalizeResult(desiredKey, m_punctuationSequences, true); return(m_punctuationSequences); }