コード例 #1
0
        internal List <Affix <PrefixEntry> > GetMatchingAffixes(string word)
        {
            if (string.IsNullOrEmpty(word))
            {
                return(new List <Affix <PrefixEntry> >(0));
            }

            var results = new List <Affix <PrefixEntry> >();

            if (AffixesByIndexedByKey.TryGetValue(word[0], out AffixEntryGroupCollection <PrefixEntry> indexedGroups))
            {
                foreach (var group in indexedGroups)
                {
                    foreach (var entry in group.Entries)
                    {
                        if (HunspellTextFunctions.IsSubset(entry.Key, word))
                        {
                            results.Add(Affix <PrefixEntry> .Create(entry, group));
                        }
                    }
                }
            }

            if (AffixesWithDots.HasItems)
            {
                results.AddRange(GetMatchingWithDotAffixes(word, HunspellTextFunctions.IsSubset));
            }

            return(results);
        }
コード例 #2
0
        internal List <Affix <SuffixEntry> > GetMatchingAffixes(string word, FlagSet groupFlagFilter = null)
        {
            if (string.IsNullOrEmpty(word))
            {
                return(new List <Affix <SuffixEntry> >(0));
            }

            var results = new List <Affix <SuffixEntry> >();

            if (AffixesByIndexedByKey.TryGetValue(word[word.Length - 1], out AffixEntryGroupCollection <SuffixEntry> indexedGroups))
            {
                foreach (var group in indexedGroups)
                {
                    if (groupFlagFilter == null || groupFlagFilter.Contains(group.AFlag))
                    {
                        foreach (var entry in group.Entries)
                        {
                            if (HunspellTextFunctions.IsReverseSubset(entry.Key, word))
                            {
                                results.Add(Affix <SuffixEntry> .Create(entry, group));
                            }
                        }
                    }
                }
            }

            if (AffixesWithDots.HasItems)
            {
                results.AddRange(GetMatchingWithDotAffixes(word, HunspellTextFunctions.IsReverseSubset));
            }

            return(results);
        }
コード例 #3
0
        /// <summary>
        /// Forbid compoundings when there are special patterns at word bound.
        /// </summary>
        internal bool Check(string word, int pos, WordEntry r1, WordEntry r2, bool affixed)
        {
#if DEBUG
            if (word == null)
            {
                throw new ArgumentNullException(nameof(word));
            }
            if (r1 == null)
            {
                throw new ArgumentNullException(nameof(r1));
            }
            if (r2 == null)
            {
                throw new ArgumentNullException(nameof(r2));
            }
#endif

            var wordAfterPos = word.Subslice(pos);

            foreach (var patternEntry in items)
            {
                if (
                    HunspellTextFunctions.IsSubset(patternEntry.Pattern2, wordAfterPos)
                    &&
                    (
                        patternEntry.Condition.IsZero
                        ||
                        r1.ContainsFlag(patternEntry.Condition)
                    )
                    &&
                    (
                        patternEntry.Condition2.IsZero
                        ||
                        r2.ContainsFlag(patternEntry.Condition2)
                    )
                    &&
                    // zero length pattern => only TESTAFF
                    // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
                    (
                        string.IsNullOrEmpty(patternEntry.Pattern)
                        ||
                        PatternWordCheck(word, pos, patternEntry.Pattern.StartsWith('0') ? r1.Word : patternEntry.Pattern)
                    )
                    )
                {
                    return(true);
                }
            }

            return(false);
        }
コード例 #4
0
            private WordEntry CheckDetailsInitCap(int abbv, CapitalizationType capType, ref string scw, ref SpellCheckResultType resultType, out string root)
            {
                var u8buffer = HunspellTextFunctions.MakeAllSmall(scw, TextInfo);

                scw = HunspellTextFunctions.MakeInitCap(u8buffer, TextInfo);

                resultType |= SpellCheckResultType.OrigCap;
                if (capType == CapitalizationType.Init)
                {
                    resultType |= SpellCheckResultType.InitCap;
                }

                var rv = CheckWord(scw, ref resultType, out root);

                if (capType == CapitalizationType.Init)
                {
                    resultType &= ~SpellCheckResultType.InitCap;
                }

                // forbid bad capitalization
                // (for example, ijs -> Ijs instead of IJs in Dutch)
                // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)

                if (EnumEx.HasFlag(resultType, SpellCheckResultType.Forbidden))
                {
                    rv = null;
                    return(rv);
                }

                if (capType == CapitalizationType.All && rv != null && IsKeepCase(rv))
                {
                    rv = null;
                }

                if (rv != null || (!Affix.CultureUsesDottedI && scw.StartsWith('İ')))
                {
                    return(rv);
                }

                rv = CheckWord(u8buffer, ref resultType, out root);

                if (abbv != 0 && rv == null)
                {
                    u8buffer += ".";
                    rv        = CheckWord(u8buffer, ref resultType, out root);
                    if (rv == null)
                    {
                        u8buffer = scw + ".";
                        if (capType == CapitalizationType.Init)
                        {
                            resultType |= SpellCheckResultType.InitCap;
                        }

                        rv = CheckWord(u8buffer, ref resultType, out root);

                        if (capType == CapitalizationType.Init)
                        {
                            resultType &= ~SpellCheckResultType.InitCap;
                        }

                        if (capType == CapitalizationType.All && rv != null && IsKeepCase(rv))
                        {
                            rv = null;
                        }

                        return(rv);
                    }
                }

                if (
                    rv != null
                    &&
                    IsKeepCase(rv)
                    &&
                    (
                        capType == CapitalizationType.All
                        ||
                        // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed in INITCAP form, too.
                        !(Affix.CheckSharps && u8buffer.Contains('ß'))
                    )
                    )
                {
                    rv = null;
                }

                return(rv);
            }
コード例 #5
0
            private WordEntry CheckDetailsAllCap(int abbv, ref string scw, ref SpellCheckResultType resultType, out string root)
            {
                resultType |= SpellCheckResultType.OrigCap;
                var rv = CheckWord(scw, ref resultType, out root);

                if (rv != null)
                {
                    return(rv);
                }

                if (abbv != 0)
                {
                    rv = CheckWord(scw + ".", ref resultType, out root);
                    if (rv != null)
                    {
                        return(rv);
                    }
                }

                // Spec. prefix handling for Catalan, French, Italian:
                // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
                var textInfo = TextInfo;
                var apos     = scw.IndexOf('\'');

                if (apos >= 0)
                {
                    scw = HunspellTextFunctions.MakeAllSmall(scw, textInfo);

                    // conversion may result in string with different len than before MakeAllSmall2 so re-scan
                    if (apos < scw.Length - 1)
                    {
                        scw = StringEx.ConcatString(scw, 0, apos + 1, HunspellTextFunctions.MakeInitCap(scw.Subslice(apos + 1), textInfo));
                        rv  = CheckWord(scw, ref resultType, out root);
                        if (rv != null)
                        {
                            return(rv);
                        }

                        scw = HunspellTextFunctions.MakeInitCap(scw, textInfo);
                        rv  = CheckWord(scw, ref resultType, out root);
                        if (rv != null)
                        {
                            return(rv);
                        }
                    }
                }

                if (Affix.CheckSharps && scw.Contains("SS"))
                {
                    scw = HunspellTextFunctions.MakeAllSmall(scw, textInfo);
                    var u8buffer = scw;
                    rv = SpellSharps(ref u8buffer, ref resultType, out root);
                    if (rv == null)
                    {
                        scw = HunspellTextFunctions.MakeInitCap(scw, textInfo);
                        rv  = SpellSharps(ref scw, ref resultType, out root);
                    }

                    if (abbv != 0 && rv == null)
                    {
                        u8buffer += ".";
                        rv        = SpellSharps(ref u8buffer, ref resultType, out root);
                        if (rv == null)
                        {
                            u8buffer = scw + ".";
                            rv       = SpellSharps(ref u8buffer, ref resultType, out root);
                        }
                    }
                }

                return(rv);
            }
コード例 #6
0
            public SpellCheckResult CheckDetails()
            {
                var word = WordToCheck;

                if (string.IsNullOrEmpty(word) || word.Length >= MaxWordUtf8Len || !WordList.HasEntries)
                {
                    return(new SpellCheckResult(false));
                }
                if (word == DefaultXmlToken)
                {
                    // Hunspell supports XML input of the simplified API (see manual)
                    return(new SpellCheckResult(true));
                }

                // input conversion
                if (!Affix.InputConversions.HasReplacements || !Affix.InputConversions.TryConvert(word, out string convertedWord))
                {
                    convertedWord = word;
                }

                if (!CleanWord2(out string scw, convertedWord, out CapitalizationType capType, out int abbv))
                {
                    return(new SpellCheckResult(false));
                }

                if (HunspellTextFunctions.IsNumericWord(word))
                {
                    // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
                    return(new SpellCheckResult(true));
                }

                var       resultType = SpellCheckResultType.None;
                string    root       = null;
                WordEntry rv         = null;

                if (capType == CapitalizationType.Huh || capType == CapitalizationType.HuhInit || capType == CapitalizationType.None)
                {
                    if (capType == CapitalizationType.HuhInit)
                    {
                        resultType |= SpellCheckResultType.OrigCap;
                    }

                    rv = CheckWord(scw, ref resultType, out root);
                    if (abbv != 0 && rv == null)
                    {
                        rv = CheckWord(scw + ".", ref resultType, out root);
                    }
                }
                else if (capType == CapitalizationType.All)
                {
                    rv = CheckDetailsAllCap(abbv, ref scw, ref resultType, out root);
                }

                if (capType == CapitalizationType.Init || (capType == CapitalizationType.All && rv == null))
                {
                    rv = CheckDetailsInitCap(abbv, capType, ref scw, ref resultType, out root);
                }

                if (rv != null)
                {
                    if (rv.ContainsFlag(Affix.Warn))
                    {
                        resultType |= SpellCheckResultType.Warn;

                        if (Affix.ForbidWarn)
                        {
                            return(new SpellCheckResult(root, resultType, false));
                        }
                    }

                    return(new SpellCheckResult(root, resultType, true));
                }

                // recursive breaking at break points
                if (Affix.BreakPoints.HasItems && !EnumEx.HasFlag(resultType, SpellCheckResultType.Forbidden))
                {
                    // calculate break points for recursion limit
                    if (Affix.BreakPoints.FindRecursionLimit(scw) >= 10)
                    {
                        return(new SpellCheckResult(root, resultType, false));
                    }

                    // check boundary patterns (^begin and end$)
                    foreach (var breakEntry in Affix.BreakPoints)
                    {
                        if (breakEntry.Length == 1 || breakEntry.Length > scw.Length)
                        {
                            continue;
                        }

                        var pLastIndex = breakEntry.Length - 1;
                        if (
                            breakEntry.StartsWith('^') &&
                            StringEx.EqualsOffset(scw, 0, breakEntry, 1, pLastIndex) &&
                            Check(scw.Substring(pLastIndex))
                            )
                        {
                            return(new SpellCheckResult(root, resultType, true));
                        }

                        if (breakEntry.EndsWith('$'))
                        {
                            var wlLessBreakIndex = scw.Length - breakEntry.Length + 1;
                            if (
                                StringEx.EqualsOffset(scw, wlLessBreakIndex, breakEntry, 0, pLastIndex) &&
                                Check(scw.Substring(0, wlLessBreakIndex))
                                )
                            {
                                return(new SpellCheckResult(root, resultType, true));
                            }
                        }
                    }

                    // other patterns
                    foreach (var breakEntry in Affix.BreakPoints)
                    {
                        var found           = scw.IndexOfOrdinal(breakEntry);
                        var remainingLength = scw.Length - breakEntry.Length;
                        if (found > 0 && found < remainingLength)
                        {
                            var found2 = scw.IndexOfOrdinal(breakEntry, found + 1);
                            // try to break at the second occurance
                            // to recognize dictionary words with wordbreak
                            if (found2 > 0 && (found2 < remainingLength))
                            {
                                found = found2;
                            }

                            if (!Check(scw.Substring(found + breakEntry.Length)))
                            {
                                continue;
                            }

                            // examine 2 sides of the break point
                            if (Check(scw.Substring(0, found)))
                            {
                                return(new SpellCheckResult(root, resultType, true));
                            }

                            // LANG_hu: spec. dash rule
                            if (Affix.IsHungarian && "-".Equals(breakEntry, StringComparison.Ordinal))
                            {
                                if (Check(scw.Substring(0, found + 1)))
                                {
                                    return(new SpellCheckResult(root, resultType, true));
                                }
                            }
                        }
                    }

                    // other patterns (break at first break point)
                    foreach (var breakEntry in Affix.BreakPoints)
                    {
                        var found           = scw.IndexOfOrdinal(breakEntry);
                        var remainingLength = scw.Length - breakEntry.Length;
                        if (found > 0 && found < remainingLength)
                        {
                            if (!Check(scw.Substring(found + breakEntry.Length)))
                            {
                                continue;
                            }

                            // examine 2 sides of the break point
                            if (Check(scw.Substring(0, found)))
                            {
                                return(new SpellCheckResult(root, resultType, true));
                            }

                            // LANG_hu: spec. dash rule
                            if (Affix.IsHungarian && "-".Equals(breakEntry, StringComparison.Ordinal))
                            {
                                if (Check(scw.Substring(0, found + 1)))
                                {
                                    return(new SpellCheckResult(root, resultType, true));
                                }
                            }
                        }
                    }
                }

                return(new SpellCheckResult(root, resultType, false));
            }