/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="TextTokenSubstring"/> class based on /// another TextTokenSubstring, but having a different Message. /// </summary> /// <param name="tts">The instance to copy from.</param> /// <param name="msg">The message.</param> /// ------------------------------------------------------------------------------------ public TextTokenSubstring(TextTokenSubstring tts, string msg) { m_tokens = tts.m_tokens; m_offset = tts.m_offset; m_length = tts.m_length; m_inventoryString = tts.m_inventoryString; m_message = msg; }
public void RecordError_PictureCaptionToken() { ScrChecksDataSource dataSource = new ScrChecksDataSource(Cache); ScrCheckingToken tok = new DummyPictureCheckingToken(m_scr, Cache.DefaultUserWs, "en"); Dictionary<int, Dictionary<Guid, ScrCheckRunResult>> bkChkFailedLst = new Dictionary<int, Dictionary<Guid, ScrCheckRunResult>>(); bkChkFailedLst[tok.StartRef.Book] = new Dictionary<Guid, ScrCheckRunResult>(); bkChkFailedLst[tok.StartRef.Book][kCheckId1] = ScrCheckRunResult.NoInconsistencies; ReflectionHelper.SetField(dataSource, "m_bookChecksFailed", bkChkFailedLst); TextTokenSubstring tts = new TextTokenSubstring(tok, 15, 9, "Weird bilingual picture caption"); dataSource.RecordError(new RecordErrorEventArgs(tts, kCheckId1)); IScrBookAnnotations annotations = m_scr.BookAnnotationsOS[tok.StartRef.Book - 1]; IScrScriptureNote note = annotations.NotesOS[0]; Assert.IsNotNull(note); Assert.AreEqual(NoteType.CheckingError, note.AnnotationType); Assert.AreEqual(m_scr, note.BeginObjectRA); Assert.AreEqual(m_scr, note.EndObjectRA); Assert.AreEqual(15, note.BeginOffset); Assert.AreEqual(24, note.EndOffset); Assert.AreEqual(0, note.CategoriesRS.Count); Assert.AreEqual(1, note.QuoteOA.ParagraphsOS.Count); ITsStrFactory factory = TsStrFactoryClass.Create(); ITsString tssQuote = factory.MakeString("in Monroe", Cache.DefaultUserWs); AssertEx.AreTsStringsEqual(tssQuote, ((StTxtPara)note.QuoteOA.ParagraphsOS[0]).Contents.UnderlyingTsString); Assert.AreEqual(1, note.DiscussionOA.ParagraphsOS.Count); Assert.AreEqual("Weird bilingual picture caption", ((StTxtPara)note.DiscussionOA.ParagraphsOS[0]).Contents.Text); Assert.AreEqual(NoteStatus.Open, note.ResolutionStatus); VerifyEmptyStJournalText(note.RecommendationOA); VerifyEmptyStJournalText(note.ResolutionOA); Assert.AreEqual((int)CmPicture.CmPictureTags.kflidCaption, note.Flid); Assert.AreEqual(Cache.DefaultVernWs, note.WsSelector); Assert.AreEqual(01003034, note.BeginRef); Assert.AreEqual(01003034, note.EndRef); Assert.AreEqual(Cache.LangProject.DefaultComputerAgent, note.SourceRA); }
public void RecordError_ParaContentsToken_SecondRun() { ScrChecksDataSource dataSource = new ScrChecksDataSource(Cache); ScrCheckingToken tok = new DummyParaCheckingToken(m_scr, Cache.DefaultVernWs, 10); Dictionary<int, Dictionary<Guid, ScrCheckRunResult>> bkChkFailedLst = new Dictionary<int, Dictionary<Guid, ScrCheckRunResult>>(); bkChkFailedLst[tok.StartRef.Book] = new Dictionary<Guid, ScrCheckRunResult>(); bkChkFailedLst[tok.StartRef.Book][kCheckId1] = ScrCheckRunResult.NoInconsistencies; ReflectionHelper.SetField(dataSource, "m_bookChecksFailed", bkChkFailedLst); TextTokenSubstring tts = new TextTokenSubstring(tok, 5, 8, "Lousy message"); dataSource.RecordError(new RecordErrorEventArgs(tts, kCheckId1)); IScrBookAnnotations annotations = m_scr.BookAnnotationsOS[tok.StartRef.Book - 1]; IScrScriptureNote note = annotations.NotesOS[0]; Assert.IsNotNull(note); Assert.AreEqual(NoteType.CheckingError, note.AnnotationType); Assert.AreEqual(m_scr, note.BeginObjectRA); Assert.AreEqual(m_scr, note.EndObjectRA); Assert.AreEqual(15, note.BeginOffset); Assert.AreEqual(23, note.EndOffset); Assert.AreEqual(0, note.CategoriesRS.Count); Assert.AreEqual(1, note.QuoteOA.ParagraphsOS.Count); ITsStrFactory factory = TsStrFactoryClass.Create(); ITsString tssQuote = factory.MakeString("is lousy", Cache.DefaultVernWs); AssertEx.AreTsStringsEqual(tssQuote, ((StTxtPara)note.QuoteOA.ParagraphsOS[0]).Contents.UnderlyingTsString); Assert.AreEqual(1, note.DiscussionOA.ParagraphsOS.Count); Assert.AreEqual("Lousy message", ((StTxtPara)note.DiscussionOA.ParagraphsOS[0]).Contents.Text); Assert.AreEqual(NoteStatus.Open, note.ResolutionStatus); VerifyEmptyStJournalText(note.RecommendationOA); VerifyEmptyStJournalText(note.ResolutionOA); Assert.AreEqual((int)StTxtPara.StTxtParaTags.kflidContents, note.Flid); // TODO: Test this for an annotation on a CmPicture: Assert.AreEqual(0, note.WsSelector); Assert.AreEqual(01003034, note.BeginRef); Assert.AreEqual(01003034, note.EndRef); // TODO: Assert.AreEqual(???, note.SourceRA); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Execute the check and call 'RecordError' for every error found. /// </summary> /// <param name="toks">ITextToken's corresponding to the text to be checked. /// Typically this is one books worth.</param> /// <param name="record">Call this delegate to report each error found.</param> /// ------------------------------------------------------------------------------------ public void Check(IEnumerable<ITextToken> toks, RecordErrorHandler record) { foreach (DummyError error in m_ErrorsToReport) { TextTokenSubstring tts = new TextTokenSubstring(error.m_token, error.m_ichStart, error.m_length, error.m_sMessage); record(new RecordErrorEventArgs(tts, m_checkId)); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Get (invalid) character references. /// </summary> /// ------------------------------------------------------------------------------------ private List<TextTokenSubstring> GetReferences(IEnumerable<ITextToken> tokens, string desiredKey, bool invalidCharactersOnly) { if (m_categorizer == null) m_categorizer = m_checksDataSource.CharacterCategorizer; m_characterSequences = new List<TextTokenSubstring>(); Dictionary<string, Dictionary<string, bool>> htValidChars = new Dictionary<string, Dictionary<string, bool>>(); Dictionary<string, bool> currentDictionary = null; string preferredLocale = m_checksDataSource.GetParameterValue("PreferredLocale") ?? string.Empty; foreach (ITextToken tok in tokens) { string locale = tok.Locale ?? string.Empty; if (tok.Text == null || (!invalidCharactersOnly && locale != preferredLocale)) continue; if (!htValidChars.TryGetValue(locale, out currentDictionary)) { currentDictionary = StringToDictionary(GetValidCharacters(locale)); htValidChars.Add(locale, currentDictionary); } int offset = 0; foreach (string key in ParseCharacterSequences(tok.Text)) { bool lookingForASpecificKey = (desiredKey != ""); bool keyMatches = (desiredKey == key); bool invalidItem = false; if (invalidCharactersOnly) { // REVIEW (BobbydV): IndexOf causes false positives for certain // characters (e.g., U+0234 & U+1234). I think Contains is easier to read // and should work for both TE and Paratext for the "AlwaysValidCharacters" // list. (TomB) if (!m_alwaysValidCharacters.Contains(key) && !currentDictionary.ContainsKey(key)) invalidItem = true; } if ((lookingForASpecificKey && keyMatches) || (!lookingForASpecificKey && !invalidCharactersOnly) || (invalidCharactersOnly && invalidItem)) { TextTokenSubstring tts = new TextTokenSubstring(tok, offset, key.Length); m_characterSequences.Add(tts); } offset += key.Length; } } return m_characterSequences; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="ContextInfo"/> class. /// </summary> /// <param name="chr">The character or pattern to which this context applies.</param> /// <param name="tts">The TextTokenSubstring.</param> /// ------------------------------------------------------------------------------------ internal ContextInfo(string chr, TextTokenSubstring tts) : this(chr, tts.Offset, tts.FullTokenText, tts.FirstToken.ScrRefString) { }
private void AddWord(ITextToken tok, WordAndPunct wap) { TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length); if (desiredKey == "" || desiredKey == tts.InventoryText) result.Add(tts); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets the substring for the character starting at position iChar. /// </summary> /// <param name="tok">The token</param> /// <param name="iChar">The index of the character.</param> /// ------------------------------------------------------------------------------------ private TextTokenSubstring GetSubstring(ITextToken tok, int iChar) { int iCharLength = GetLengthOfChar(tok, iChar); TextTokenSubstring tts = new TextTokenSubstring((tok is VerseTextToken ? ((VerseTextToken)tok).Token : tok), iChar, iCharLength); return tts; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Records an error. /// </summary> /// <param name="token">The current token being processed.</param> /// <param name="offset">Offset in the token where the offending text begins.</param> /// <param name="length">The length of the offending text.</param> /// <param name="message">The message.</param> /// <param name="args">The arguments to format the message.</param> /// ------------------------------------------------------------------------------------ private void AddError(ITextToken token, int offset, int length, string message, params object[] args) { string formattedMsg = (args != null) ? string.Format(message, args) : String.Format(message); TextTokenSubstring tts = new TextTokenSubstring(token, offset, length, formattedMsg); m_recordError(new RecordErrorEventArgs(tts, CheckId)); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="RecordErrorEventArgs"/> class. /// </summary> /// <param name="tts">The TextTokenSubstring.</param> /// <param name="checkId">The GUID identifying the check.</param> /// ------------------------------------------------------------------------------------ public RecordErrorEventArgs(TextTokenSubstring tts, Guid checkId) { m_tts = tts; m_checkId = checkId; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Adds the text token substring of a quotation related token to the list of results. /// At this point the message of the substring is either an error or trace message. /// </summary> /// <param name="tts">The text token substring being processed</param> /// ------------------------------------------------------------------------------------ private void Output(TextTokenSubstring tts) { if (m_desiredKey == string.Empty || m_desiredKey == tts.InventoryText) m_results.Add(tts); }
/// ------------------------------------------------------------------------------------ /// <summary> /// If the token starts a typographic paragraph, store it as a paragraph-start token and /// highlight (shows up on user interface) its text. Otherwise, if the token is /// a quotation mark (either opening or closing, as defined by the quotation /// categorizer), store it as a quotation mark token. /// </summary> /// <param name="tok">The token being processed</param> /// ------------------------------------------------------------------------------------ internal void ProcessToken(ITextToken tok, VerseTextToken verseTok) { if (tok == null) throw new ArgumentNullException("tok"); Debug.Assert(!(tok is VerseTextToken)); if (tok.IsParagraphStart) { TextTokenSubstring tts = new TextTokenSubstring(tok, 0, 0); ParaStartToken pstok = new ParaStartToken(tts, tok.ParaStyleName); m_quotationRelatedTokens.Add(pstok); } AddTextToParaStartTokens(tok); // Find the first non whitespace, non quotation mark character in the token's // text. This will be used in the following loop to determine what quotation // marks precede all other characters in the token (i.e. what quotation marks // begin the paragraph and are possible continuers). Match match = m_regExNonQuotes.Match(tok.Text); int iFirstNoneQMarkChar = (match.Success ? match.Index : -1); // Now find all the quotation marks in the token's text. MatchCollection mc = m_regExQuotes.Matches(tok.Text); // Go through all the quotation marks found, creating quotation tokens // for each. foreach (Match m in mc) { TextTokenSubstring tts = new TextTokenSubstring(tok, m.Index, m.Length); bool fIsParaStart = verseTok != null ? verseTok.IsParagraphStart : tok.IsParagraphStart; bool fIsOpener = m_qmCategorizer.IsInitialPunctuation(tts.Text); bool fPossibleContinuer = (m.Index < iFirstNoneQMarkChar && fIsParaStart); QuotationMarkToken qmt = new QuotationMarkToken(tts, m_qmCategorizer, fIsOpener, fPossibleContinuer); m_quotationRelatedTokens.Add(qmt); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="ParaStartToken"/> class. /// </summary> /// ------------------------------------------------------------------------------------ internal ParaStartToken(TextTokenSubstring tts, string styleName) { Tts = tts; StyleName = styleName; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="QuotationMarkToken"/> class. /// </summary> /// ------------------------------------------------------------------------------------ internal QuotationMarkToken(TextTokenSubstring tts, QuotationMarkCategorizer categorizer, bool fIsOpener, bool fPossibleContinuer) { Tts = tts; m_categorizer = categorizer; m_fIsOpener = fIsOpener; m_fPossibleContinuer = fPossibleContinuer; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="PunctuationToken"/> class. /// </summary> /// <param name="tokenType">Type of the token.</param> /// <param name="tts">The TextTokenSubstring.</param> /// <param name="isInitial">if set to <c>true</c> is opening quotation mark.</param> /// <param name="isFinal">if set to <c>true</c> is closing quotation mark.</param> /// ------------------------------------------------------------------------------------ public PunctuationToken(PunctuationTokenType tokenType, TextTokenSubstring tts, bool isInitial, bool isFinal) { TokenType = tokenType; Tts = tts; IsInitial = isInitial; IsFinal = isFinal; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Add punctuation to list /// </summary> /// <param name="tok">The text token</param> /// <param name="i">The index of the punctuation character</param> /// ------------------------------------------------------------------------------------ private void ProcessPunctuation(ITextToken tok, int i) { TextTokenSubstring tts = new TextTokenSubstring(tok, i, 1); bool isInitial = m_quotationCategorizer.IsInitialPunctuation(tts.Text); bool isFinal = m_quotationCategorizer.IsFinalPunctuation(tts.Text); m_puncts.Add(new PunctuationToken(PunctuationTokenType.punctuation, tts, isInitial, isFinal)); // special case: treat a sequence like // opening quotation punctuation/space/opening quotation punctuation // as if the space were not there. an example of this would be // U+201C LEFT DOUBLE QUOTATION MARK // U+0020 SPACE // U+2018 LEFT SINGLE QUOTATION MARK // this allows a quotation mark to be considered word initial even if it is followed by a space if (m_puncts.Count >= 3) { // If the last three tokens are punctuation/whitespace/punctuation if (m_puncts[m_puncts.Count - 2].TokenType == PunctuationTokenType.whitespace && !m_puncts[m_puncts.Count - 2].IsParaBreak && m_puncts[m_puncts.Count - 3].TokenType == PunctuationTokenType.punctuation) { // And both punctuation have quote directions which point in the same direction, if (m_puncts[m_puncts.Count - 3].IsInitial && m_puncts[m_puncts.Count - 1].IsInitial || m_puncts[m_puncts.Count - 3].IsFinal && m_puncts[m_puncts.Count - 1].IsFinal) { // THEN mark the whitespace as a quote separator. m_puncts[m_puncts.Count - 2].TokenType = PunctuationTokenType.quoteSeparator; } } } }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// <param name="tok"></param> /// <param name="wap"></param> /// <param name="desiredKey"></param> /// ------------------------------------------------------------------------------------ private void AddWord(ITextToken tok, WordAndPunct wap, string desiredKey) { TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length); if (String.IsNullOrEmpty(desiredKey) || desiredKey == tts.InventoryText) m_result.Add(tts); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Creates a checking error if character style requires an initial uppercase letter, /// but the tssFirstLetter is lowercase. /// </summary> /// <param name="tok">The Scripture token.</param> /// <param name="ttsFirstLetter">The token substring of the first word-forming character /// in the given token.</param> /// <param name="result">The result.</param> /// <returns><c>true</c> if an error was added to the list of results; otherwise /// <c>false</c></returns> /// ------------------------------------------------------------------------------------ private bool CheckForCharStyleCapilizationError(ITextToken tok, TextTokenSubstring ttsFirstLetter, List<TextTokenSubstring> result) { if (m_foundCharacterText) return false; m_foundCharacterText = true; // The first word-forming character of the character style is lowercase. // Look it up in the capitalized styles dictionary to determine if it should be uppercase. StyleCapInfo styleCapInfo; if (m_allCapitalizedStyles.TryGetValue(m_characterStyle, out styleCapInfo) && styleCapInfo.m_type == StyleInfo.StyleTypes.character) { ttsFirstLetter.InventoryText = m_characterStyle; ttsFirstLetter.Message = CapitalizationCheck.GetErrorMessage(m_checksDataSource, styleCapInfo.m_capCheck, m_characterStyle); result.Add(ttsFirstLetter); return true; } return false; }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// ------------------------------------------------------------------------------------ private void StoreFoundPairToken(ITextToken tok, int i) { TextTokenSubstring tts = new TextTokenSubstring(tok, i, 1); // Assign an initial, default message which may be changed later tts.Message = m_checksDataSource.GetLocalizedString("Unmatched punctuation"); m_pairTokensFound.Add(tts); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="ContextInfo"/> class. /// </summary> /// <param name="pattern">The punctuation pattern.</param> /// <param name="tts">The TextTokenSubstring.</param> /// ------------------------------------------------------------------------------------ internal ContextInfo(PuncPattern pattern, TextTokenSubstring tts) : this(pattern, tts.Offset, tts.FullTokenText, tts.FirstToken.ScrRefString) { }