public static WordList CreateFromWords(IEnumerable <string> words, AffixConfig affix) { var wordListBuilder = new Builder(affix ?? new AffixConfig.Builder().MoveToImmutable()); if (words is IList <string> wordsAsList) { wordListBuilder.InitializeEntriesByRoot(wordsAsList.Count); } else { wordListBuilder.InitializeEntriesByRoot(0); } foreach (var word in words) { var wordEntry = new WordEntry(word, FlagSet.Empty, MorphSet.Empty, WordEntryOptions.None); WordEntrySet entryList = !wordListBuilder.EntriesByRoot.TryGetValue(word, out entryList) ? WordEntrySet.Create(wordEntry) : WordEntrySet.CopyWithItemAdded(entryList, wordEntry); wordListBuilder.EntriesByRoot.Add(word, entryList); } return(wordListBuilder.MoveToImmutable()); }
/// <summary> /// Forbid compoundings when there are special patterns at word bound. /// </summary> public bool Check(string word, int pos, WordEntry r1, WordEntry r2, bool affixed) { var wordAfterPos = word.Substring(pos); foreach (var patternEntry in items) { int len; if ( StringEx.IsSubset(patternEntry.Pattern2, wordAfterPos) && ( r1 == null || patternEntry.Condition.IsZero || r1.ContainsFlag(patternEntry.Condition) ) && ( r2 == null || patternEntry.Condition2.IsZero || r2.ContainsFlag(patternEntry.Condition2) ) && // zero length pattern => only TESTAFF // zero pattern (0/flag) => unmodified stem (zero affixes allowed) ( string.IsNullOrEmpty(patternEntry.Pattern) || ( ( patternEntry.Pattern.StartsWith('0') && r1.Word.Length <= pos && StringEx.EqualsOffset(word, pos - r1.Word.Length, r1.Word, 0, r1.Word.Length) ) || ( !patternEntry.Pattern.StartsWith('0') && ( ( len = patternEntry.Pattern.Length ) != 0 ) && StringEx.EqualsOffset(word, pos - len, patternEntry.Pattern, 0, len) ) ) ) ) { return(true); } } return(false); }
/// <summary> /// Forbid compoundings when there are special patterns at word bound. /// </summary> internal bool Check(string word, int pos, WordEntry r1, WordEntry r2, bool affixed) { #if DEBUG if (word == null) { throw new ArgumentNullException(nameof(word)); } if (r1 == null) { throw new ArgumentNullException(nameof(r1)); } if (r2 == null) { throw new ArgumentNullException(nameof(r2)); } #endif var wordAfterPos = word.Subslice(pos); foreach (var patternEntry in items) { if ( HunspellTextFunctions.IsSubset(patternEntry.Pattern2, wordAfterPos) && ( patternEntry.Condition.IsZero || r1.ContainsFlag(patternEntry.Condition) ) && ( patternEntry.Condition2.IsZero || r2.ContainsFlag(patternEntry.Condition2) ) && // zero length pattern => only TESTAFF // zero pattern (0/flag) => unmodified stem (zero affixes allowed) ( string.IsNullOrEmpty(patternEntry.Pattern) || PatternWordCheck(word, pos, patternEntry.Pattern.StartsWith('0') ? r1.Word : patternEntry.Pattern) ) ) { return(true); } } return(false); }
public bool EntryContainsRuleFlags(WordEntry rv) { foreach (var rule in items) { foreach (var flag in rule) { if (!flag.Equals('*') && !flag.Equals('?') && rv.ContainsFlag(flag)) { return(true); } } } return(false); }
private bool IsKeepCase(WordEntry rv) => rv.ContainsFlag(Affix.KeepCase);
public SpellCheckResult CheckDetails() { var word = WordToCheck; if (string.IsNullOrEmpty(word) || word.Length >= MaxWordUtf8Len || !WordList.HasEntries) { return(new SpellCheckResult(false)); } if (word == DefaultXmlToken) { // Hunspell supports XML input of the simplified API (see manual) return(new SpellCheckResult(true)); } // input conversion if (!Affix.InputConversions.HasReplacements || !Affix.InputConversions.TryConvert(word, out string convertedWord)) { convertedWord = word; } if (!CleanWord2(out string scw, convertedWord, out CapitalizationType capType, out int abbv)) { return(new SpellCheckResult(false)); } if (HunspellTextFunctions.IsNumericWord(word)) { // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.) return(new SpellCheckResult(true)); } var resultType = SpellCheckResultType.None; string root = null; WordEntry rv = null; if (capType == CapitalizationType.Huh || capType == CapitalizationType.HuhInit || capType == CapitalizationType.None) { if (capType == CapitalizationType.HuhInit) { resultType |= SpellCheckResultType.OrigCap; } rv = CheckWord(scw, ref resultType, out root); if (abbv != 0 && rv == null) { rv = CheckWord(scw + ".", ref resultType, out root); } } else if (capType == CapitalizationType.All) { rv = CheckDetailsAllCap(abbv, ref scw, ref resultType, out root); } if (capType == CapitalizationType.Init || (capType == CapitalizationType.All && rv == null)) { rv = CheckDetailsInitCap(abbv, capType, ref scw, ref resultType, out root); } if (rv != null) { if (rv.ContainsFlag(Affix.Warn)) { resultType |= SpellCheckResultType.Warn; if (Affix.ForbidWarn) { return(new SpellCheckResult(root, resultType, false)); } } return(new SpellCheckResult(root, resultType, true)); } // recursive breaking at break points if (Affix.BreakPoints.HasItems && !EnumEx.HasFlag(resultType, SpellCheckResultType.Forbidden)) { // calculate break points for recursion limit if (Affix.BreakPoints.FindRecursionLimit(scw) >= 10) { return(new SpellCheckResult(root, resultType, false)); } // check boundary patterns (^begin and end$) foreach (var breakEntry in Affix.BreakPoints) { if (breakEntry.Length == 1 || breakEntry.Length > scw.Length) { continue; } var pLastIndex = breakEntry.Length - 1; if ( breakEntry.StartsWith('^') && StringEx.EqualsOffset(scw, 0, breakEntry, 1, pLastIndex) && Check(scw.Substring(pLastIndex)) ) { return(new SpellCheckResult(root, resultType, true)); } if (breakEntry.EndsWith('$')) { var wlLessBreakIndex = scw.Length - breakEntry.Length + 1; if ( StringEx.EqualsOffset(scw, wlLessBreakIndex, breakEntry, 0, pLastIndex) && Check(scw.Substring(0, wlLessBreakIndex)) ) { return(new SpellCheckResult(root, resultType, true)); } } } // other patterns foreach (var breakEntry in Affix.BreakPoints) { var found = scw.IndexOfOrdinal(breakEntry); var remainingLength = scw.Length - breakEntry.Length; if (found > 0 && found < remainingLength) { var found2 = scw.IndexOfOrdinal(breakEntry, found + 1); // try to break at the second occurance // to recognize dictionary words with wordbreak if (found2 > 0 && (found2 < remainingLength)) { found = found2; } if (!Check(scw.Substring(found + breakEntry.Length))) { continue; } // examine 2 sides of the break point if (Check(scw.Substring(0, found))) { return(new SpellCheckResult(root, resultType, true)); } // LANG_hu: spec. dash rule if (Affix.IsHungarian && "-".Equals(breakEntry, StringComparison.Ordinal)) { if (Check(scw.Substring(0, found + 1))) { return(new SpellCheckResult(root, resultType, true)); } } } } // other patterns (break at first break point) foreach (var breakEntry in Affix.BreakPoints) { var found = scw.IndexOfOrdinal(breakEntry); var remainingLength = scw.Length - breakEntry.Length; if (found > 0 && found < remainingLength) { if (!Check(scw.Substring(found + breakEntry.Length))) { continue; } // examine 2 sides of the break point if (Check(scw.Substring(0, found))) { return(new SpellCheckResult(root, resultType, true)); } // LANG_hu: spec. dash rule if (Affix.IsHungarian && "-".Equals(breakEntry, StringComparison.Ordinal)) { if (Check(scw.Substring(0, found + 1))) { return(new SpellCheckResult(root, resultType, true)); } } } } } return(new SpellCheckResult(root, resultType, false)); }
private bool AddWord(string word, FlagSet flags, MorphSet morphs, bool onlyUpperCase) { if (Affix.IgnoredChars.HasItems) { word = word.RemoveChars(Affix.IgnoredChars); } if (Affix.ComplexPrefixes) { word = word.Reverse(); if (morphs.HasItems && !Affix.IsAliasM) { var newMorphs = new string[morphs.Count]; for (int i = 0; i < morphs.Count; i++) { newMorphs[i] = morphs[morphs.Count - i - 1].Reverse(); } morphs = MorphSet.TakeArray(newMorphs); } } WordEntryOptions options; if (morphs.HasItems) { if (Affix.IsAliasM) { options = WordEntryOptions.AliasM; var morphBuilder = new List <string>(); foreach (var originalValue in morphs) { if (IntEx.TryParseInvariant(originalValue, out int morphNumber) && Affix.TryGetAliasM(morphNumber, out MorphSet aliasedMorph)) { morphBuilder.AddRange(aliasedMorph); } else { morphBuilder.Add(originalValue); } } morphs = MorphSet.Create(morphBuilder); } else { options = WordEntryOptions.None; } if (morphs.AnyStartsWith(MorphologicalTags.Phon)) { options |= WordEntryOptions.Phon; } } else { options = WordEntryOptions.None; } bool saveEntryList = false; word = Builder.Dedup(word); if (!Builder.EntriesByRoot.TryGetValue(word, out WordEntrySet entryList)) { saveEntryList = true; entryList = WordEntrySet.Empty; } var upperCaseHomonym = false; for (var i = 0; i < entryList.Count; i++) { var existingEntry = entryList[i]; if (!onlyUpperCase) { if (existingEntry.ContainsFlag(SpecialFlags.OnlyUpcaseFlag)) { existingEntry = new WordEntry( existingEntry.Word, flags, existingEntry.Morphs, existingEntry.Options); entryList.DestructiveReplace(i, existingEntry); return(false); } } else { upperCaseHomonym = true; } } if (!upperCaseHomonym) { saveEntryList = true; entryList = WordEntrySet.CopyWithItemAdded(entryList, new WordEntry( word, flags, Builder.Dedup(morphs), options)); } if (saveEntryList) { Builder.EntriesByRoot[word] = entryList; } return(false); }