internal static FlagSet Union(FlagSet set, FlagValue value) { var valueIndex = Array.BinarySearch(set.items, value); if (valueIndex >= 0) { return(set); } valueIndex = ~valueIndex; // locate the best insertion point var newItems = new FlagValue[set.items.Length + 1]; if (valueIndex >= set.items.Length) { Array.Copy(set.items, newItems, set.items.Length); newItems[set.items.Length] = value; } else { Array.Copy(set.items, newItems, valueIndex); Array.Copy(set.items, valueIndex, newItems, valueIndex + 1, set.items.Length - valueIndex); newItems[valueIndex] = value; } return(new FlagSet(newItems)); }
public static bool ContainsAny(FlagSet a, FlagSet b) { if (a == null || a.IsEmpty || b == null || b.IsEmpty) { return(false); } if (a.Count == 1) { return(b.Contains(a[0])); } if (b.Count == 1) { return(a.Contains(b[0])); } if (a.Count > b.Count) { Swapper.Swap(ref a, ref b); } foreach (var item in a) { if (b.Contains(item)) { return(true); } } return(false); }
private bool AddWordCapitalized(string word, FlagSet flags, MorphSet morphs, CapitalizationType capType) { // add inner capitalized forms to handle the following allcap forms: // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG // Allcaps with suffixes: CIA's -> CIA'S if ( ( capType == CapitalizationType.Huh || capType == CapitalizationType.HuhInit || (capType == CapitalizationType.All && flags.HasItems) ) && !flags.Contains(Affix.ForbiddenWord) ) { flags = Builder.Dedup(FlagSet.Union(flags, SpecialFlags.OnlyUpcaseFlag)); var textInfo = Affix.Culture.TextInfo; var initCapBuilder = StringBuilderPool.Get(word); if (initCapBuilder.Length > 0) { initCapBuilder[0] = textInfo.ToUpper(initCapBuilder[0]); for (var i = 1; i < initCapBuilder.Length; i++) { initCapBuilder[i] = textInfo.ToLower(initCapBuilder[i]); } } return(AddWord(StringBuilderPool.GetStringAndReturn(initCapBuilder), flags, morphs, true)); } return(false); }
public bool TryGetAliasF(int number, out FlagSet result) { if (number > 0 && number <= aliasF.Count) { result = aliasF[number - 1]; return(true); } else { result = null; return(false); } }
private WordList ToImmutable(bool destructive) { var affix = Affix ?? new AffixConfig.Builder().MoveToImmutable(); var nGramRestrictedFlags = Dedup(FlagSet.Create( new[] { affix.ForbiddenWord, affix.NoSuggest, affix.NoNgramSuggest, affix.OnlyInCompound, SpecialFlags.OnlyUpcaseFlag } .Where(f => f.HasValue))); var result = new WordList(affix) { NGramRestrictedFlags = nGramRestrictedFlags, }; if (destructive) { result.EntriesByRoot = EntriesByRoot ?? new Dictionary <string, WordEntrySet>(); EntriesByRoot = null; } else { result.EntriesByRoot = EntriesByRoot == null ? new Dictionary <string, WordEntrySet>() : new Dictionary <string, WordEntrySet>(EntriesByRoot); } var nGramRestrictedEntries = new HashSet <WordEntry>(); foreach (var rootSet in result.EntriesByRoot) { foreach (var entry in rootSet.Value) { if (nGramRestrictedFlags.ContainsAny(entry.Flags)) { nGramRestrictedEntries.Add(entry); } } } result.NGramRestrictedEntries = nGramRestrictedEntries; return(result); }
private AffixCollection ( Dictionary <FlagValue, AffixEntryGroup <TEntry> > affixesByFlag, Dictionary <char, AffixEntryWithDetailCollection <TEntry> > affixesByIndexedByKey, AffixEntryWithDetailCollection <TEntry> affixesWithDots, AffixEntryWithDetailCollection <TEntry> affixesWithEmptyKeys, FlagSet contClasses ) { this.affixesByFlag = affixesByFlag; this.affixesByIndexedByKey = affixesByIndexedByKey; this.affixesWithDots = affixesWithDots; AffixesWithEmptyKeys = affixesWithEmptyKeys; ContClasses = contClasses; HasAffixes = affixesByFlag.Count != 0; IsEmpty = !HasAffixes; }
public static TEntry Create <TEntry> ( string strip, string affixText, CharacterConditionGroup conditions, MorphSet morph, FlagSet contClass ) where TEntry : AffixEntry, new() { return(new TEntry { Strip = strip, Append = affixText, Conditions = conditions, MorphCode = morph ?? MorphSet.Empty, ContClass = contClass ?? FlagSet.Empty }); }
public static AffixCollection <TEntry> Create(List <AffixEntryGroup.Builder <TEntry> > builders) { if (builders == null || builders.Count == 0) { return(Empty); } var affixesByFlag = new Dictionary <FlagValue, AffixEntryGroup <TEntry> >(builders.Count); var affixesByIndexedByKeyBuilders = new Dictionary <char, List <AffixEntryWithDetail <TEntry> > >(); var affixesWithDots = new List <AffixEntryWithDetail <TEntry> >(); var affixesWithEmptyKeys = new List <AffixEntryWithDetail <TEntry> >(); var contClasses = new HashSet <FlagValue>(); foreach (var builder in builders) { var group = builder.ToGroup(); affixesByFlag.Add(group.AFlag, group); foreach (var entry in group.Entries) { var key = entry.Key; contClasses.UnionWith(entry.ContClass); var entryWithDetail = new AffixEntryWithDetail <TEntry>(group, entry); if (string.IsNullOrEmpty(key)) { affixesWithEmptyKeys.Add(entryWithDetail); } else { if (key.Contains('.')) { affixesWithDots.Add(entryWithDetail); } else { var indexedKey = key[0]; List <AffixEntryWithDetail <TEntry> > keyedAffixes; if (!affixesByIndexedByKeyBuilders.TryGetValue(indexedKey, out keyedAffixes)) { keyedAffixes = new List <AffixEntryWithDetail <TEntry> >(); affixesByIndexedByKeyBuilders.Add(indexedKey, keyedAffixes); } keyedAffixes.Add(entryWithDetail); } } } } var affixesByIndexedByKey = new Dictionary <char, AffixEntryWithDetailCollection <TEntry> >( affixesByIndexedByKeyBuilders.Count); foreach (var keyedBuilder in affixesByIndexedByKeyBuilders) { affixesByIndexedByKey.Add(keyedBuilder.Key, AffixEntryWithDetailCollection <TEntry> .TakeList(keyedBuilder.Value)); } return(new AffixCollection <TEntry> ( affixesByFlag, affixesByIndexedByKey, AffixEntryWithDetailCollection <TEntry> .TakeList(affixesWithDots), AffixEntryWithDetailCollection <TEntry> .TakeList(affixesWithEmptyKeys), FlagSet.Create(contClasses) )); }
internal static FlagSet ParseFlags(StringSlice text, FlagMode mode) => FlagSet.TakeArray(ParseFlagsInOrder(text, mode));
public static FlagSet ParseFlags(string text, int startIndex, int length, FlagMode mode) => FlagSet.TakeArray(ParseFlagsInOrder(text, startIndex, length, mode));
public bool ContainsAny(FlagSet values) => ContainsAny(this, values);
public FlagSet Dedup(FlagSet value) => value == null ? null : FlagSetDeduper.GetEqualOrAdd(value);
private static FlagSet ConvertCharsToFlags(StringSlice text) => FlagSet.TakeArray(ConvertCharsToFlagsInOrder(text));
private static FlagSet ConvertCharsToFlags(string text, int startIndex, int length) => FlagSet.TakeArray(ConvertCharsToFlagsInOrder(text, startIndex, length));
public FlagSet TakeArrayForFlagSet(FlagValue[] values) => FlagSetDeduper.GetEqualOrAdd(FlagSet.TakeArray(values));
public static FlagSet Union(FlagSet a, FlagSet b) => Create(Enumerable.Concat(a, b));
internal static FlagSet ParseLongFlags(StringSlice text) => FlagSet.TakeArray(ParseLongFlagsInOrder(text));
private AffixConfig ToImmutable(bool destructive) { var culture = CultureInfo.ReadOnly(Culture ?? CultureInfo.InvariantCulture); var config = new AffixConfig { Options = Options, FlagMode = FlagMode, KeyString = Dedup(KeyString ?? DefaultKeyString), TryString = Dedup(TryString ?? string.Empty), Language = Dedup(Language ?? string.Empty), Culture = culture, IsHungarian = string.Equals(culture?.TwoLetterISOLanguageName, "HU", StringComparison.OrdinalIgnoreCase), StringComparer = new CulturedStringComparer(culture), CompoundFlag = CompoundFlag, CompoundBegin = CompoundBegin, CompoundEnd = CompoundEnd, CompoundMiddle = CompoundMiddle, CompoundWordMax = CompoundWordMax, CompoundMin = CompoundMin ?? DefaultCompoundMinLength, CompoundRoot = CompoundRoot, CompoundPermitFlag = CompoundPermitFlag, CompoundForbidFlag = CompoundForbidFlag, NoSuggest = NoSuggest, NoNgramSuggest = NoNgramSuggest, ForbiddenWord = ForbiddenWord ?? SpecialFlags.ForbiddenWord, LemmaPresent = LemmaPresent, Circumfix = Circumfix, OnlyInCompound = OnlyInCompound, NeedAffix = NeedAffix, MaxNgramSuggestions = MaxNgramSuggestions ?? DefaultMaxNgramSuggestions, MaxDifferency = MaxDifferency, MaxCompoundSuggestions = MaxCompoundSuggestions ?? DefaultMaxCompoundSuggestions, KeepCase = KeepCase, ForceUpperCase = ForceUpperCase, Warn = Warn, SubStandard = SubStandard, CompoundSyllableNum = CompoundSyllableNum, Encoding = Encoding, CompoundMaxSyllable = CompoundMaxSyllable, CompoundVowels = CompoundVowels ?? CharacterSet.Empty, WordChars = WordChars ?? CharacterSet.Empty, IgnoredChars = IgnoredChars ?? CharacterSet.Empty, Version = Dedup(Version), BreakPoints = BreakSet.Create(BreakPoints) }; if (destructive) { config.Replacements = SingleReplacementSet.TakeList(Steal(ref Replacements)); config.CompoundRules = CompoundRuleSet.TakeList(Steal(ref CompoundRules)); config.CompoundPatterns = PatternSet.TakeList(Steal(ref CompoundPatterns)); config.RelatedCharacterMap = MapTable.TakeList(Steal(ref RelatedCharacterMap)); config.Phone = PhoneTable.TakeList(Steal(ref Phone)); config.InputConversions = MultiReplacementTable.TakeDictionary(Steal(ref InputConversions)); config.OutputConversions = MultiReplacementTable.TakeDictionary(Steal(ref OutputConversions)); config.Warnings = WarningList.TakeList(Steal(ref Warnings)); config.aliasF = AliasF ?? new List <FlagSet>(0); AliasF = null; config.aliasM = AliasM ?? new List <MorphSet>(0); AliasM = null; } else { config.Replacements = SingleReplacementSet.Create(Replacements); config.CompoundRules = CompoundRuleSet.Create(CompoundRules); config.CompoundPatterns = PatternSet.Create(CompoundPatterns); config.RelatedCharacterMap = MapTable.Create(RelatedCharacterMap); config.Phone = PhoneTable.Create(Phone); config.InputConversions = MultiReplacementTable.Create(InputConversions); config.OutputConversions = MultiReplacementTable.Create(OutputConversions); config.Warnings = WarningList.Create(Warnings); config.aliasF = AliasF == null ? new List <FlagSet>(0) : AliasF.ToList(); config.aliasM = AliasM == null ? new List <MorphSet>(0) : AliasM.ToList(); } config.Prefixes = AffixCollection <PrefixEntry> .Create(Prefixes); config.Suffixes = AffixCollection <SuffixEntry> .Create(Suffixes); config.ContClasses = FlagSet.Union(config.Prefixes.ContClasses, config.Suffixes.ContClasses); return(config); }
public static FlagSet ParseFlags(string text, FlagMode mode) => FlagSet.TakeArray(ParseFlagsInOrder(text, mode));
private bool AddWord(string word, FlagSet flags, MorphSet morphs, bool onlyUpperCase) { if (Affix.IgnoredChars.HasItems) { word = word.RemoveChars(Affix.IgnoredChars); } if (Affix.ComplexPrefixes) { word = word.Reverse(); if (morphs.HasItems && !Affix.IsAliasM) { var newMorphs = new string[morphs.Count]; for (int i = 0; i < morphs.Count; i++) { newMorphs[i] = morphs[morphs.Count - i - 1].Reverse(); } morphs = MorphSet.TakeArray(newMorphs); } } WordEntryOptions options; if (morphs.HasItems) { if (Affix.IsAliasM) { options = WordEntryOptions.AliasM; var morphBuilder = new List <string>(); foreach (var originalValue in morphs) { int morphNumber; MorphSet aliasedMorph; if (IntEx.TryParseInvariant(originalValue, out morphNumber) && Affix.TryGetAliasM(morphNumber, out aliasedMorph)) { morphBuilder.AddRange(aliasedMorph); } else { morphBuilder.Add(originalValue); } } morphs = MorphSet.Create(morphBuilder); } else { options = WordEntryOptions.None; } if (morphs.AnyStartsWith(MorphologicalTags.Phon)) { options |= WordEntryOptions.Phon; } } else { options = WordEntryOptions.None; } bool saveEntryList = false; WordEntrySet entryList; word = Builder.Dedup(word); if (!Builder.EntriesByRoot.TryGetValue(word, out entryList)) { saveEntryList = true; entryList = WordEntrySet.Empty; } var upperCaseHomonym = false; for (var i = 0; i < entryList.Count; i++) { var existingEntry = entryList[i]; if (!onlyUpperCase) { if (existingEntry.ContainsFlag(SpecialFlags.OnlyUpcaseFlag)) { existingEntry = new WordEntry( existingEntry.Word, flags, existingEntry.Morphs, existingEntry.Options); entryList.DestructiveReplace(i, existingEntry); return(false); } } else { upperCaseHomonym = true; } } if (!upperCaseHomonym) { saveEntryList = true; entryList = WordEntrySet.CopyWithItemAdded(entryList, new WordEntry( word, flags, Builder.Dedup(morphs), options)); } if (saveEntryList) { Builder.EntriesByRoot[word] = entryList; } return(false); }
private bool AddWord(string word, FlagSet flags, MorphSet morphs) { return(AddWord(word, flags, morphs, false) || AddWordCapitalized(word, flags, morphs, CapitalizationTypeEx.GetCapitalizationType(word, Affix))); }
internal static FlagSet ParseNumberFlags(StringSlice text) => FlagSet.Create(ParseNumberFlagsInOrder(text));
public static FlagSet ParseNumberFlags(string text, int startIndex, int length) => FlagSet.Create(ParseNumberFlagsInOrder(text, startIndex, length));