public static WordList CreateFromWords(IEnumerable <string> words, AffixConfig affix)
        {
            var wordListBuilder = new Builder(affix ?? new AffixConfig.Builder().MoveToImmutable());

            if (words is IList <string> wordsAsList)
            {
                wordListBuilder.InitializeEntriesByRoot(wordsAsList.Count);
            }
            else
            {
                wordListBuilder.InitializeEntriesByRoot(0);
            }

            foreach (var word in words)
            {
                var wordEntry = new WordEntry(word, FlagSet.Empty, MorphSet.Empty, WordEntryOptions.None);

                WordEntrySet entryList = !wordListBuilder.EntriesByRoot.TryGetValue(word, out entryList)
                    ? WordEntrySet.Create(wordEntry)
                    : WordEntrySet.CopyWithItemAdded(entryList, wordEntry);

                wordListBuilder.EntriesByRoot.Add(word, entryList);
            }

            return(wordListBuilder.MoveToImmutable());
        }
Example #2
0
        /// <summary>
        /// Forbid compoundings when there are special patterns at word bound.
        /// </summary>
        public bool Check(string word, int pos, WordEntry r1, WordEntry r2, bool affixed)
        {
            var wordAfterPos = word.Substring(pos);

            foreach (var patternEntry in items)
            {
                int len;
                if (
                    StringEx.IsSubset(patternEntry.Pattern2, wordAfterPos)
                    &&
                    (
                        r1 == null
                        ||
                        patternEntry.Condition.IsZero
                        ||
                        r1.ContainsFlag(patternEntry.Condition)
                    )
                    &&
                    (
                        r2 == null
                        ||
                        patternEntry.Condition2.IsZero
                        ||
                        r2.ContainsFlag(patternEntry.Condition2)
                    )
                    &&
                    // zero length pattern => only TESTAFF
                    // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
                    (
                        string.IsNullOrEmpty(patternEntry.Pattern)
                        ||
                        (
                            (
                                patternEntry.Pattern.StartsWith('0') &&
                                r1.Word.Length <= pos &&
                                StringEx.EqualsOffset(word, pos - r1.Word.Length, r1.Word, 0, r1.Word.Length)
                            )
                            ||
                            (
                                !patternEntry.Pattern.StartsWith('0')
                                &&
                                (
                                    (
                                        len = patternEntry.Pattern.Length
                                    ) != 0
                                )
                                &&
                                StringEx.EqualsOffset(word, pos - len, patternEntry.Pattern, 0, len)
                            )
                        )
                    )
                    )
                {
                    return(true);
                }
            }

            return(false);
        }
        /// <summary>
        /// Forbid compoundings when there are special patterns at word bound.
        /// </summary>
        internal bool Check(string word, int pos, WordEntry r1, WordEntry r2, bool affixed)
        {
#if DEBUG
            if (word == null)
            {
                throw new ArgumentNullException(nameof(word));
            }
            if (r1 == null)
            {
                throw new ArgumentNullException(nameof(r1));
            }
            if (r2 == null)
            {
                throw new ArgumentNullException(nameof(r2));
            }
#endif

            var wordAfterPos = word.Subslice(pos);

            foreach (var patternEntry in items)
            {
                if (
                    HunspellTextFunctions.IsSubset(patternEntry.Pattern2, wordAfterPos)
                    &&
                    (
                        patternEntry.Condition.IsZero
                        ||
                        r1.ContainsFlag(patternEntry.Condition)
                    )
                    &&
                    (
                        patternEntry.Condition2.IsZero
                        ||
                        r2.ContainsFlag(patternEntry.Condition2)
                    )
                    &&
                    // zero length pattern => only TESTAFF
                    // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
                    (
                        string.IsNullOrEmpty(patternEntry.Pattern)
                        ||
                        PatternWordCheck(word, pos, patternEntry.Pattern.StartsWith('0') ? r1.Word : patternEntry.Pattern)
                    )
                    )
                {
                    return(true);
                }
            }

            return(false);
        }
        public bool EntryContainsRuleFlags(WordEntry rv)
        {
            foreach (var rule in items)
            {
                foreach (var flag in rule)
                {
                    if (!flag.Equals('*') && !flag.Equals('?') && rv.ContainsFlag(flag))
                    {
                        return(true);
                    }
                }
            }

            return(false);
        }
 private bool IsKeepCase(WordEntry rv) => rv.ContainsFlag(Affix.KeepCase);
            public SpellCheckResult CheckDetails()
            {
                var word = WordToCheck;

                if (string.IsNullOrEmpty(word) || word.Length >= MaxWordUtf8Len || !WordList.HasEntries)
                {
                    return(new SpellCheckResult(false));
                }
                if (word == DefaultXmlToken)
                {
                    // Hunspell supports XML input of the simplified API (see manual)
                    return(new SpellCheckResult(true));
                }

                // input conversion
                if (!Affix.InputConversions.HasReplacements || !Affix.InputConversions.TryConvert(word, out string convertedWord))
                {
                    convertedWord = word;
                }

                if (!CleanWord2(out string scw, convertedWord, out CapitalizationType capType, out int abbv))
                {
                    return(new SpellCheckResult(false));
                }

                if (HunspellTextFunctions.IsNumericWord(word))
                {
                    // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
                    return(new SpellCheckResult(true));
                }

                var       resultType = SpellCheckResultType.None;
                string    root       = null;
                WordEntry rv         = null;

                if (capType == CapitalizationType.Huh || capType == CapitalizationType.HuhInit || capType == CapitalizationType.None)
                {
                    if (capType == CapitalizationType.HuhInit)
                    {
                        resultType |= SpellCheckResultType.OrigCap;
                    }

                    rv = CheckWord(scw, ref resultType, out root);
                    if (abbv != 0 && rv == null)
                    {
                        rv = CheckWord(scw + ".", ref resultType, out root);
                    }
                }
                else if (capType == CapitalizationType.All)
                {
                    rv = CheckDetailsAllCap(abbv, ref scw, ref resultType, out root);
                }

                if (capType == CapitalizationType.Init || (capType == CapitalizationType.All && rv == null))
                {
                    rv = CheckDetailsInitCap(abbv, capType, ref scw, ref resultType, out root);
                }

                if (rv != null)
                {
                    if (rv.ContainsFlag(Affix.Warn))
                    {
                        resultType |= SpellCheckResultType.Warn;

                        if (Affix.ForbidWarn)
                        {
                            return(new SpellCheckResult(root, resultType, false));
                        }
                    }

                    return(new SpellCheckResult(root, resultType, true));
                }

                // recursive breaking at break points
                if (Affix.BreakPoints.HasItems && !EnumEx.HasFlag(resultType, SpellCheckResultType.Forbidden))
                {
                    // calculate break points for recursion limit
                    if (Affix.BreakPoints.FindRecursionLimit(scw) >= 10)
                    {
                        return(new SpellCheckResult(root, resultType, false));
                    }

                    // check boundary patterns (^begin and end$)
                    foreach (var breakEntry in Affix.BreakPoints)
                    {
                        if (breakEntry.Length == 1 || breakEntry.Length > scw.Length)
                        {
                            continue;
                        }

                        var pLastIndex = breakEntry.Length - 1;
                        if (
                            breakEntry.StartsWith('^') &&
                            StringEx.EqualsOffset(scw, 0, breakEntry, 1, pLastIndex) &&
                            Check(scw.Substring(pLastIndex))
                            )
                        {
                            return(new SpellCheckResult(root, resultType, true));
                        }

                        if (breakEntry.EndsWith('$'))
                        {
                            var wlLessBreakIndex = scw.Length - breakEntry.Length + 1;
                            if (
                                StringEx.EqualsOffset(scw, wlLessBreakIndex, breakEntry, 0, pLastIndex) &&
                                Check(scw.Substring(0, wlLessBreakIndex))
                                )
                            {
                                return(new SpellCheckResult(root, resultType, true));
                            }
                        }
                    }

                    // other patterns
                    foreach (var breakEntry in Affix.BreakPoints)
                    {
                        var found           = scw.IndexOfOrdinal(breakEntry);
                        var remainingLength = scw.Length - breakEntry.Length;
                        if (found > 0 && found < remainingLength)
                        {
                            var found2 = scw.IndexOfOrdinal(breakEntry, found + 1);
                            // try to break at the second occurance
                            // to recognize dictionary words with wordbreak
                            if (found2 > 0 && (found2 < remainingLength))
                            {
                                found = found2;
                            }

                            if (!Check(scw.Substring(found + breakEntry.Length)))
                            {
                                continue;
                            }

                            // examine 2 sides of the break point
                            if (Check(scw.Substring(0, found)))
                            {
                                return(new SpellCheckResult(root, resultType, true));
                            }

                            // LANG_hu: spec. dash rule
                            if (Affix.IsHungarian && "-".Equals(breakEntry, StringComparison.Ordinal))
                            {
                                if (Check(scw.Substring(0, found + 1)))
                                {
                                    return(new SpellCheckResult(root, resultType, true));
                                }
                            }
                        }
                    }

                    // other patterns (break at first break point)
                    foreach (var breakEntry in Affix.BreakPoints)
                    {
                        var found           = scw.IndexOfOrdinal(breakEntry);
                        var remainingLength = scw.Length - breakEntry.Length;
                        if (found > 0 && found < remainingLength)
                        {
                            if (!Check(scw.Substring(found + breakEntry.Length)))
                            {
                                continue;
                            }

                            // examine 2 sides of the break point
                            if (Check(scw.Substring(0, found)))
                            {
                                return(new SpellCheckResult(root, resultType, true));
                            }

                            // LANG_hu: spec. dash rule
                            if (Affix.IsHungarian && "-".Equals(breakEntry, StringComparison.Ordinal))
                            {
                                if (Check(scw.Substring(0, found + 1)))
                                {
                                    return(new SpellCheckResult(root, resultType, true));
                                }
                            }
                        }
                    }
                }

                return(new SpellCheckResult(root, resultType, false));
            }
Example #7
0
        private bool AddWord(string word, FlagSet flags, MorphSet morphs, bool onlyUpperCase)
        {
            if (Affix.IgnoredChars.HasItems)
            {
                word = word.RemoveChars(Affix.IgnoredChars);
            }

            if (Affix.ComplexPrefixes)
            {
                word = word.Reverse();

                if (morphs.HasItems && !Affix.IsAliasM)
                {
                    var newMorphs = new string[morphs.Count];
                    for (int i = 0; i < morphs.Count; i++)
                    {
                        newMorphs[i] = morphs[morphs.Count - i - 1].Reverse();
                    }

                    morphs = MorphSet.TakeArray(newMorphs);
                }
            }

            WordEntryOptions options;

            if (morphs.HasItems)
            {
                if (Affix.IsAliasM)
                {
                    options = WordEntryOptions.AliasM;
                    var morphBuilder = new List <string>();
                    foreach (var originalValue in morphs)
                    {
                        if (IntEx.TryParseInvariant(originalValue, out int morphNumber) && Affix.TryGetAliasM(morphNumber, out MorphSet aliasedMorph))
                        {
                            morphBuilder.AddRange(aliasedMorph);
                        }
                        else
                        {
                            morphBuilder.Add(originalValue);
                        }
                    }

                    morphs = MorphSet.Create(morphBuilder);
                }
                else
                {
                    options = WordEntryOptions.None;
                }

                if (morphs.AnyStartsWith(MorphologicalTags.Phon))
                {
                    options |= WordEntryOptions.Phon;
                }
            }
            else
            {
                options = WordEntryOptions.None;
            }

            bool saveEntryList = false;

            word = Builder.Dedup(word);
            if (!Builder.EntriesByRoot.TryGetValue(word, out WordEntrySet entryList))
            {
                saveEntryList = true;
                entryList     = WordEntrySet.Empty;
            }

            var upperCaseHomonym = false;

            for (var i = 0; i < entryList.Count; i++)
            {
                var existingEntry = entryList[i];

                if (!onlyUpperCase)
                {
                    if (existingEntry.ContainsFlag(SpecialFlags.OnlyUpcaseFlag))
                    {
                        existingEntry = new WordEntry(
                            existingEntry.Word,
                            flags,
                            existingEntry.Morphs,
                            existingEntry.Options);
                        entryList.DestructiveReplace(i, existingEntry);
                        return(false);
                    }
                }
                else
                {
                    upperCaseHomonym = true;
                }
            }

            if (!upperCaseHomonym)
            {
                saveEntryList = true;
                entryList     = WordEntrySet.CopyWithItemAdded(entryList, new WordEntry(
                                                                   word,
                                                                   flags,
                                                                   Builder.Dedup(morphs),
                                                                   options));
            }

            if (saveEntryList)
            {
                Builder.EntriesByRoot[word] = entryList;
            }

            return(false);
        }