Ejemplo n.º 1
0
        IEnumerable <MisspellingTag> GetMisspellingsInSpans(NormalizedSnapshotSpanCollection spans)
        {
            foreach (var span in spans)
            {
                string text = span.GetText();

                foreach (var word in GetWordsInText(text))
                {
                    string wordToParse = span.Snapshot.GetText(span.Start + word.Start, word.Length);
                    if (!ProbablyARealWord(wordToParse))
                    {
                        continue;
                    }

                    List <string> suggests;
                    if (!_spellChecker.CheckText(wordToParse, out suggests))
                    {
                        var errorSpan = new SnapshotSpan(span.Start + word.Start, wordToParse.Length);
                        if (ProbablyARealWord(errorSpan.GetText()) && !_dictionary.ShouldIgnoreWord(errorSpan.GetText()))
                        {
                            yield return(new MisspellingTag(errorSpan, suggests));
                        }
                    }
                }
            }
        }
        IEnumerable <MisspellingTag> GetMisspellingsInSpans(NormalizedSnapshotSpanCollection spans, TextBox textBox)
        {
            foreach (var span in spans)
            {
                string text = span.GetText();

                foreach (var word in GetWordsInText(text))
                {
                    string textToParse = span.Snapshot.GetText(span.Start + word.Start, word.Length);

                    if (!ProbablyARealWord(textToParse))
                    {
                        continue;
                    }

                    // Now pass these off to WPF.
                    textBox.Text = textToParse;

                    int nextSearchIndex                 = 0;
                    int nextSpellingErrorIndex          = -1;
                    int nextSpellingErrorIndexOtherLang = -1;

                    while (-1 != (nextSpellingErrorIndex = textBox.GetNextSpellingErrorCharacterIndex(nextSearchIndex, LogicalDirection.Forward)))
                    {
                        var spellingError = textBox.GetSpellingError(nextSpellingErrorIndex);
                        int length        = textBox.GetSpellingErrorLength(nextSpellingErrorIndex);

                        // Work around what looks to be a WPF bug; if the spelling error is followed by a 's, then include that in the error span.
                        string nextChars = textToParse.Substring(nextSpellingErrorIndex + length).ToLowerInvariant();
                        if (nextChars.StartsWith("'s"))
                        {
                            length += 2;
                        }

                        SnapshotSpan errorSpan = new SnapshotSpan(span.Start + word.Start + nextSpellingErrorIndex, length);

                        if (ProbablyARealWord(errorSpan.GetText()) && !_dictionary.ShouldIgnoreWord(errorSpan.GetText()))
                        {
                            yield return(new MisspellingTag(errorSpan, spellingError.Suggestions.ToArray()));
                        }

                        nextSearchIndex = nextSpellingErrorIndex + length;
                        if (nextSearchIndex >= textToParse.Length)
                        {
                            break;
                        }
                    }
                }
            }
        }
Ejemplo n.º 3
0
        const int MinForeignWordSequence = 3;           // the minimum number of words in another language in a sentence's part to not count as misspelled words.


        // This routine checks spelling per sentence in multiple languages. Normally it checks in one language until a misspelling is found. It then checks this word in the other languages and continues checking in that language.
        // After checking the whole sentence, it chooses the language that has the most matches. If there are sequences of misspelling words that are longer than MinForeignWordSequence, and that are in the same language,
        // it does not treat those as misspellings.
        IEnumerable <MisspellingTag> GetMisspellingsInSpans(NormalizedSnapshotSpanCollection spans, List <TextBox> textBoxes)
        {
            var currentLang = textBoxes.First();

            foreach (var span in spans)
            {
                string text = span.GetText();
                if (string.IsNullOrWhiteSpace(text))
                {
                    continue;
                }

                int sentenceStart = 0;

                // process all sentences in text
                foreach (var sentence in text.Split(new string[] { ". ", ", ", "; ", ": ", "? ", "! ", " \"", "\" ", "\".", "\",", "\";", "\"?", "\"!", "\":", " '", "' ", "'.", "', ", "';", "':", "'?", "'!" }, StringSplitOptions.None))
                {
                    if (string.IsNullOrWhiteSpace(sentence))
                    {
                        sentenceStart += sentence.Length + 2;
                        continue;
                    }

                    List <LanguageSpan> languageSpans = new List <LanguageSpan>();

                    foreach (var word in GetWordsInText(sentence))                                                        // process all words in sentence
                    {
                        string textToParse = span.Snapshot.GetText(span.Start + sentenceStart + word.Start, word.Length); // get word from span

                        if (!ProbablyARealWord(textToParse))
                        {
                            continue;
                        }

                        // Now pass these off to WPF.
                        currentLang.Text = textToParse;

                        // System.Diagnostics.Debugger.Log(1, "debug",  textToParse + " ");

                        int nextSearchIndex                 = 0;
                        int nextSpellingErrorIndex          = -1;
                        int nextSpellingErrorIndexOtherLang = -1;

                        var currentLanguageSpan = new LanguageSpan {
                            Language = currentLang, Text = textToParse, Start = sentenceStart + word.Start, Length = 0
                        };

                        while (-1 != (nextSpellingErrorIndex = currentLang.GetNextSpellingErrorCharacterIndex(nextSearchIndex, LogicalDirection.Forward)))                                         // get next spelling error
                        {
                            TextBox validInLang;
                            while (                                              // if spelling error, check other languages too.
                                (validInLang = textBoxes
                                               .Where(lang => lang != currentLang)
                                               .FirstOrDefault(lang =>                                                  // searches first language where word is spelled correctly.
                            {
                                if (lang.Text != textToParse)
                                {
                                    lang.Text = textToParse;
                                }
                                nextSpellingErrorIndexOtherLang = lang.GetNextSpellingErrorCharacterIndex(nextSpellingErrorIndex, LogicalDirection.Forward);
                                return(nextSpellingErrorIndexOtherLang == -1 || nextSpellingErrorIndexOtherLang > nextSpellingErrorIndex);
                            }))
                                != null)
                            {
                                nextSpellingErrorIndex = nextSpellingErrorIndexOtherLang;
                                currentLang            = validInLang;
                                if (nextSpellingErrorIndex > currentLanguageSpan.Length)
                                {
                                    if (currentLanguageSpan.Length > 0)
                                    {
                                        languageSpans.Add(currentLanguageSpan);
                                        currentLanguageSpan = new LanguageSpan {
                                            Language = currentLang, Text = textToParse, Start = sentenceStart + word.Start, Length = 0
                                        };
                                    }
                                    else
                                    {
                                        currentLanguageSpan.Length = nextSpellingErrorIndex;
                                    }
                                }
                                else if (nextSpellingErrorIndex == -1)
                                {
                                    currentLanguageSpan.Length   = textToParse.Length;
                                    currentLanguageSpan.Language = currentLang;
                                    break;
                                }
                                currentLanguageSpan.Language = currentLang;
                            }

                            if (nextSpellingErrorIndex == -1)
                            {
                                break;
                            }

                            languageSpans.Add(currentLanguageSpan);
                            currentLanguageSpan = new LanguageSpan {
                                Language = currentLang, Text = textToParse, Start = sentenceStart + word.Start
                            };


                            var spellingError = currentLang.GetSpellingError(nextSpellingErrorIndex);
                            int length        = currentLang.GetSpellingErrorLength(nextSpellingErrorIndex);

                            // Work around what looks to be a WPF bug; if the spelling error is followed by a 's, then include that in the error span.
                            string nextChars = textToParse.Substring(nextSpellingErrorIndex + length).ToLowerInvariant();
                            if (nextChars.StartsWith("'s"))
                            {
                                length += 2;
                            }

                            SnapshotSpan errorSpan = new SnapshotSpan(span.Start + currentLanguageSpan.Start + nextSpellingErrorIndex, length);

                            if (ProbablyARealWord(errorSpan.GetText()) && !_dictionary.ShouldIgnoreWord(errorSpan.GetText()))
                            {
                                var err = new MisspellingTag(errorSpan, spellingError.Suggestions.ToArray());
                                if (textBoxes.Count > 1)
                                {                                                       // support for multiple languages
                                    currentLanguageSpan.Errors.Add(new MisspellingTag(errorSpan, spellingError.Suggestions.ToArray()));
                                    if (currentLanguageSpan.EndOfFirstError == 0)
                                    {
                                        currentLanguageSpan.EndOfFirstError = nextSpellingErrorIndex + length;
                                    }
                                }
                                else
                                {                                                   // only one language
                                    yield return(err);
                                }
                            }

                            nextSearchIndex = nextSpellingErrorIndex + length;
                            if (nextSearchIndex >= textToParse.Length)
                            {
                                break;
                            }
                            else
                            {
                                currentLanguageSpan.Length = nextSearchIndex;
                                languageSpans.Add(currentLanguageSpan);
                            }
                        }

                        currentLanguageSpan.Length = textToParse.Length - nextSearchIndex;
                        languageSpans.Add(currentLanguageSpan);
                    }

                    if (textBoxes.Count <= 1)
                    {                               // only one language to check, so we're finished with this sentence.
                        sentenceStart += sentence.Length + 2;
                        continue;
                    }

                    // select language that matches best
                    TextBox bestlang = null;
                    var     groups   = languageSpans
                                       .Where(s => s.Errors.Count == 0)                          // only select spans without spelling errors.
                                       .GroupBy(s => s.Language)
                                       .Select(g => new { Language = g.Key, Length = g.Sum(s => s.Length + 1) });
                    int max = 0;
                    foreach (var group in groups)
                    {
                        if (group.Length > max)
                        {
                            max      = group.Length;
                            bestlang = group.Language;
                        }
                    }

                    // process spans;
                    int  i = 0;
                    bool preceedingForeignSpans = true;
                    int  foreignSpanIndex       = 0;
                    int  foreignSequence        = 0;
                    while (i < languageSpans.Count)
                    {
                        var lspan = languageSpans[i];
                        if (lspan.Language != bestlang)
                        {                                         // this span is a foreign span i.e. checked with a different language than bestlang
                            foreignSpanIndex++;
                            if (foreignSpanIndex > foreignSequence)
                            {
                                foreignSequence = languageSpans                                                   // check for a sequence of at least MinForeignWordSequence words in the same language.
                                                  .Skip(i)
                                                  .TakeWhile(s => s.Language == lspan.Language && s.Errors.Count == 0)
                                                  .Count();
                                if (foreignSequence < MinForeignWordSequence)
                                {
                                    foreignSequence = 0;
                                }
                                else
                                {
                                    foreignSequence += foreignSpanIndex - 1;
                                }
                            }
                            if ((foreignSpanIndex > foreignSequence) && (preceedingForeignSpans || foreignSpanIndex > 1 || lspan.Errors.Count != 1))
                            {
                                // recheck preceding foreign spans & foreign span sequences not bigger than MinForeignWordSequence, excluding leading errors & foreign spans with more than one error.
                                // recheck span
                                bestlang.Text = lspan.Text;
                                int nextSearchIndex        = 0;
                                int nextSpellingErrorIndex = -1;
                                if (!preceedingForeignSpans && foreignSpanIndex == 1 && lspan.Errors.Count > 0)
                                {
                                    yield return(lspan.Errors[0]);                                                            // no need to check the first spell error as it was preceeded by a bestlang span, so it's also a bestlang spelling error.

                                    nextSearchIndex = lspan.EndOfFirstError;
                                }
                                while (-1 != (nextSpellingErrorIndex = bestlang.GetNextSpellingErrorCharacterIndex(nextSearchIndex, LogicalDirection.Forward)))
                                {
                                    var spellingError = bestlang.GetSpellingError(nextSpellingErrorIndex);
                                    int length        = bestlang.GetSpellingErrorLength(nextSpellingErrorIndex);

                                    // Work around what looks to be a WPF bug; if the spelling error is followed by a 's, then include that in the error span.
                                    string nextChars = bestlang.Text.Substring(nextSpellingErrorIndex + length).ToLowerInvariant();
                                    if (nextChars.StartsWith("'s"))
                                    {
                                        length += 2;
                                    }

                                    SnapshotSpan errorSpan = new SnapshotSpan(span.Start + lspan.Start + nextSpellingErrorIndex, length);

                                    if (ProbablyARealWord(errorSpan.GetText()) && !_dictionary.ShouldIgnoreWord(errorSpan.GetText()))
                                    {
                                        yield return(new MisspellingTag(errorSpan, spellingError.Suggestions.ToArray()));
                                    }

                                    nextSearchIndex = nextSpellingErrorIndex + length;
                                    if (nextSearchIndex >= bestlang.Text.Length)
                                    {
                                        break;
                                    }
                                }
                                i++;
                                continue;
                            }
                        }
                        else
                        {
                            foreignSpanIndex       = 0;
                            foreignSequence        = 0;
                            preceedingForeignSpans = false;
                        }
                        foreach (var err in lspan.Errors)
                        {
                            yield return(err);                                                                      // yield errors of bestlang spans.
                        }
                        i++;
                    }
                    sentenceStart += sentence.Length + 2;
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Get misspelled words in the given set of spans
        /// </summary>
        /// <param name="spans">The spans to check</param>
        /// <returns>An enumerable list of misspelling tags</returns>
        private IEnumerable <MisspellingTag> GetMisspellingsInSpans(NormalizedSnapshotSpanCollection spans)
        {
            List <Match> xmlTags = null;
            SnapshotSpan errorSpan, deleteWordSpan;

            Microsoft.VisualStudio.Text.Span lastWord;
            string text, textToParse;
            var    ignoredWords = wordsIgnoredOnce;

            foreach (var span in spans)
            {
                text = span.GetText();

                // Note the location of all XML elements if needed
                if (SpellCheckerConfiguration.IgnoreXmlElementsInText)
                {
                    xmlTags = reXml.Matches(text).OfType <Match>().ToList();
                }

                lastWord = new Microsoft.VisualStudio.Text.Span();

                foreach (var word in GetWordsInText(text))
                {
                    textToParse = text.Substring(word.Start, word.Length);

                    // Spell check the word if it looks like one and is not ignored
                    if (IsProbablyARealWord(textToParse) && (xmlTags == null || xmlTags.Count == 0 ||
                                                             !xmlTags.Any(match => word.Start >= match.Index &&
                                                                          word.Start <= match.Index + match.Length - 1)))
                    {
                        // Check for a doubled word.  This isn't perfect as it won't detected doubled words
                        // across a line break.
                        if (lastWord.Length != 0 && text.Substring(lastWord.Start, lastWord.Length).Equals(
                                textToParse, StringComparison.OrdinalIgnoreCase) && String.IsNullOrWhiteSpace(
                                text.Substring(lastWord.Start + lastWord.Length, word.Start - lastWord.Start - lastWord.Length)))
                        {
                            errorSpan = new SnapshotSpan(span.Start + word.Start, word.Length);

                            // If the doubled word is not being ignored at the current location, return it
                            if (!ignoredWords.Any(w => w.StartPoint == errorSpan.Start && w.Word.Equals(textToParse,
                                                                                                        StringComparison.OrdinalIgnoreCase)))
                            {
                                // Delete the whitespace ahead of it too
                                deleteWordSpan = new SnapshotSpan(span.Start + lastWord.Start + lastWord.Length,
                                                                  word.Length + word.Start - lastWord.Start - lastWord.Length);

                                yield return(new MisspellingTag(errorSpan, deleteWordSpan));

                                lastWord = word;
                                continue;
                            }
                        }

                        lastWord = word;

                        if (!_dictionary.ShouldIgnoreWord(textToParse) && !_dictionary.IsSpelledCorrectly(textToParse))
                        {
                            // Sometimes it flags a word as misspelled if it ends with "'s".  Try checking the
                            // word without the "'s".  If ignored or correct without it, don't flag it.  This
                            // appears to be caused by the definitions in the dictionary rather than Hunspell.
                            if (textToParse.EndsWith("'s", StringComparison.OrdinalIgnoreCase))
                            {
                                textToParse = textToParse.Substring(0, textToParse.Length - 2);

                                if (_dictionary.ShouldIgnoreWord(textToParse) ||
                                    _dictionary.IsSpelledCorrectly(textToParse))
                                {
                                    continue;
                                }

                                textToParse += "'s";
                            }

                            // Some dictionaries include a trailing period on certain words such as "etc." which
                            // we don't include.  If the word is followed by a period, try it with the period to
                            // see if we get a match.  If so, consider it valid.
                            if (word.Start + word.Length < text.Length && text[word.Start + word.Length] == '.')
                            {
                                if (_dictionary.ShouldIgnoreWord(textToParse + ".") ||
                                    _dictionary.IsSpelledCorrectly(textToParse + "."))
                                {
                                    continue;
                                }
                            }

                            errorSpan = new SnapshotSpan(span.Start + word.Start, word.Length);

                            // If the word is not being ignored at the current location, return it and its
                            // suggested corrections.
                            if (!ignoredWords.Any(w => w.StartPoint == errorSpan.Start && w.Word.Equals(textToParse,
                                                                                                        StringComparison.OrdinalIgnoreCase)))
                            {
                                yield return(new MisspellingTag(errorSpan, _dictionary.SuggestCorrections(textToParse)));
                            }
                        }
                    }
                }
            }
        }