Beispiel #1
0
        internal static FlagSet Union(FlagSet set, FlagValue value)
        {
            var valueIndex = Array.BinarySearch(set.items, value);

            if (valueIndex >= 0)
            {
                return(set);
            }

            valueIndex = ~valueIndex; // locate the best insertion point

            var newItems = new FlagValue[set.items.Length + 1];

            if (valueIndex >= set.items.Length)
            {
                Array.Copy(set.items, newItems, set.items.Length);
                newItems[set.items.Length] = value;
            }
            else
            {
                Array.Copy(set.items, newItems, valueIndex);
                Array.Copy(set.items, valueIndex, newItems, valueIndex + 1, set.items.Length - valueIndex);
                newItems[valueIndex] = value;
            }

            return(new FlagSet(newItems));
        }
Beispiel #2
0
        public static bool ContainsAny(FlagSet a, FlagSet b)
        {
            if (a == null || a.IsEmpty || b == null || b.IsEmpty)
            {
                return(false);
            }
            if (a.Count == 1)
            {
                return(b.Contains(a[0]));
            }
            if (b.Count == 1)
            {
                return(a.Contains(b[0]));
            }

            if (a.Count > b.Count)
            {
                Swapper.Swap(ref a, ref b);
            }

            foreach (var item in a)
            {
                if (b.Contains(item))
                {
                    return(true);
                }
            }

            return(false);
        }
        private bool AddWordCapitalized(string word, FlagSet flags, MorphSet morphs, CapitalizationType capType)
        {
            // add inner capitalized forms to handle the following allcap forms:
            // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
            // Allcaps with suffixes: CIA's -> CIA'S

            if (
                (
                    capType == CapitalizationType.Huh ||
                    capType == CapitalizationType.HuhInit ||
                    (capType == CapitalizationType.All && flags.HasItems)
                )
                &&
                !flags.Contains(Affix.ForbiddenWord)
                )
            {
                flags = Builder.Dedup(FlagSet.Union(flags, SpecialFlags.OnlyUpcaseFlag));

                var textInfo       = Affix.Culture.TextInfo;
                var initCapBuilder = StringBuilderPool.Get(word);
                if (initCapBuilder.Length > 0)
                {
                    initCapBuilder[0] = textInfo.ToUpper(initCapBuilder[0]);

                    for (var i = 1; i < initCapBuilder.Length; i++)
                    {
                        initCapBuilder[i] = textInfo.ToLower(initCapBuilder[i]);
                    }
                }

                return(AddWord(StringBuilderPool.GetStringAndReturn(initCapBuilder), flags, morphs, true));
            }

            return(false);
        }
Beispiel #4
0
 public bool TryGetAliasF(int number, out FlagSet result)
 {
     if (number > 0 && number <= aliasF.Count)
     {
         result = aliasF[number - 1];
         return(true);
     }
     else
     {
         result = null;
         return(false);
     }
 }
Beispiel #5
0
            private WordList ToImmutable(bool destructive)
            {
                var affix = Affix ?? new AffixConfig.Builder().MoveToImmutable();

                var nGramRestrictedFlags = Dedup(FlagSet.Create(
                                                     new[]
                {
                    affix.ForbiddenWord,
                    affix.NoSuggest,
                    affix.NoNgramSuggest,
                    affix.OnlyInCompound,
                    SpecialFlags.OnlyUpcaseFlag
                }
                                                     .Where(f => f.HasValue)));

                var result = new WordList(affix)
                {
                    NGramRestrictedFlags = nGramRestrictedFlags,
                };

                if (destructive)
                {
                    result.EntriesByRoot = EntriesByRoot ?? new Dictionary <string, WordEntrySet>();
                    EntriesByRoot        = null;
                }
                else
                {
                    result.EntriesByRoot = EntriesByRoot == null
                        ? new Dictionary <string, WordEntrySet>()
                        : new Dictionary <string, WordEntrySet>(EntriesByRoot);
                }

                var nGramRestrictedEntries = new HashSet <WordEntry>();

                foreach (var rootSet in result.EntriesByRoot)
                {
                    foreach (var entry in rootSet.Value)
                    {
                        if (nGramRestrictedFlags.ContainsAny(entry.Flags))
                        {
                            nGramRestrictedEntries.Add(entry);
                        }
                    }
                }

                result.NGramRestrictedEntries = nGramRestrictedEntries;

                return(result);
            }
 private AffixCollection
 (
     Dictionary <FlagValue, AffixEntryGroup <TEntry> > affixesByFlag,
     Dictionary <char, AffixEntryWithDetailCollection <TEntry> > affixesByIndexedByKey,
     AffixEntryWithDetailCollection <TEntry> affixesWithDots,
     AffixEntryWithDetailCollection <TEntry> affixesWithEmptyKeys,
     FlagSet contClasses
 )
 {
     this.affixesByFlag         = affixesByFlag;
     this.affixesByIndexedByKey = affixesByIndexedByKey;
     this.affixesWithDots       = affixesWithDots;
     AffixesWithEmptyKeys       = affixesWithEmptyKeys;
     ContClasses = contClasses;
     HasAffixes  = affixesByFlag.Count != 0;
     IsEmpty     = !HasAffixes;
 }
Beispiel #7
0
 public static TEntry Create <TEntry>
 (
     string strip,
     string affixText,
     CharacterConditionGroup conditions,
     MorphSet morph,
     FlagSet contClass
 )
     where TEntry : AffixEntry, new()
 {
     return(new TEntry
     {
         Strip = strip,
         Append = affixText,
         Conditions = conditions,
         MorphCode = morph ?? MorphSet.Empty,
         ContClass = contClass ?? FlagSet.Empty
     });
 }
        public static AffixCollection <TEntry> Create(List <AffixEntryGroup.Builder <TEntry> > builders)
        {
            if (builders == null || builders.Count == 0)
            {
                return(Empty);
            }

            var affixesByFlag = new Dictionary <FlagValue, AffixEntryGroup <TEntry> >(builders.Count);
            var affixesByIndexedByKeyBuilders = new Dictionary <char, List <AffixEntryWithDetail <TEntry> > >();
            var affixesWithDots      = new List <AffixEntryWithDetail <TEntry> >();
            var affixesWithEmptyKeys = new List <AffixEntryWithDetail <TEntry> >();
            var contClasses          = new HashSet <FlagValue>();

            foreach (var builder in builders)
            {
                var group = builder.ToGroup();
                affixesByFlag.Add(group.AFlag, group);

                foreach (var entry in group.Entries)
                {
                    var key = entry.Key;
                    contClasses.UnionWith(entry.ContClass);
                    var entryWithDetail = new AffixEntryWithDetail <TEntry>(group, entry);
                    if (string.IsNullOrEmpty(key))
                    {
                        affixesWithEmptyKeys.Add(entryWithDetail);
                    }
                    else
                    {
                        if (key.Contains('.'))
                        {
                            affixesWithDots.Add(entryWithDetail);
                        }
                        else
                        {
                            var indexedKey = key[0];
                            List <AffixEntryWithDetail <TEntry> > keyedAffixes;
                            if (!affixesByIndexedByKeyBuilders.TryGetValue(indexedKey, out keyedAffixes))
                            {
                                keyedAffixes = new List <AffixEntryWithDetail <TEntry> >();
                                affixesByIndexedByKeyBuilders.Add(indexedKey, keyedAffixes);
                            }

                            keyedAffixes.Add(entryWithDetail);
                        }
                    }
                }
            }

            var affixesByIndexedByKey = new Dictionary <char, AffixEntryWithDetailCollection <TEntry> >(
                affixesByIndexedByKeyBuilders.Count);

            foreach (var keyedBuilder in affixesByIndexedByKeyBuilders)
            {
                affixesByIndexedByKey.Add(keyedBuilder.Key, AffixEntryWithDetailCollection <TEntry> .TakeList(keyedBuilder.Value));
            }

            return(new AffixCollection <TEntry>
                   (
                       affixesByFlag,
                       affixesByIndexedByKey,
                       AffixEntryWithDetailCollection <TEntry> .TakeList(affixesWithDots),
                       AffixEntryWithDetailCollection <TEntry> .TakeList(affixesWithEmptyKeys),
                       FlagSet.Create(contClasses)
                   ));
        }
Beispiel #9
0
 internal static FlagSet ParseFlags(StringSlice text, FlagMode mode) => FlagSet.TakeArray(ParseFlagsInOrder(text, mode));
Beispiel #10
0
 public static FlagSet ParseFlags(string text, int startIndex, int length, FlagMode mode) => FlagSet.TakeArray(ParseFlagsInOrder(text, startIndex, length, mode));
Beispiel #11
0
 public bool ContainsAny(FlagSet values) => ContainsAny(this, values);
Beispiel #12
0
 public FlagSet Dedup(FlagSet value) =>
 value == null ? null : FlagSetDeduper.GetEqualOrAdd(value);
Beispiel #13
0
 private static FlagSet ConvertCharsToFlags(StringSlice text) => FlagSet.TakeArray(ConvertCharsToFlagsInOrder(text));
Beispiel #14
0
 private static FlagSet ConvertCharsToFlags(string text, int startIndex, int length) => FlagSet.TakeArray(ConvertCharsToFlagsInOrder(text, startIndex, length));
 public FlagSet TakeArrayForFlagSet(FlagValue[] values) =>
 FlagSetDeduper.GetEqualOrAdd(FlagSet.TakeArray(values));
Beispiel #16
0
 public static FlagSet Union(FlagSet a, FlagSet b) => Create(Enumerable.Concat(a, b));
Beispiel #17
0
 internal static FlagSet ParseLongFlags(StringSlice text) =>
 FlagSet.TakeArray(ParseLongFlagsInOrder(text));
            private AffixConfig ToImmutable(bool destructive)
            {
                var culture = CultureInfo.ReadOnly(Culture ?? CultureInfo.InvariantCulture);

                var config = new AffixConfig
                {
                    Options                = Options,
                    FlagMode               = FlagMode,
                    KeyString              = Dedup(KeyString ?? DefaultKeyString),
                    TryString              = Dedup(TryString ?? string.Empty),
                    Language               = Dedup(Language ?? string.Empty),
                    Culture                = culture,
                    IsHungarian            = string.Equals(culture?.TwoLetterISOLanguageName, "HU", StringComparison.OrdinalIgnoreCase),
                    StringComparer         = new CulturedStringComparer(culture),
                    CompoundFlag           = CompoundFlag,
                    CompoundBegin          = CompoundBegin,
                    CompoundEnd            = CompoundEnd,
                    CompoundMiddle         = CompoundMiddle,
                    CompoundWordMax        = CompoundWordMax,
                    CompoundMin            = CompoundMin ?? DefaultCompoundMinLength,
                    CompoundRoot           = CompoundRoot,
                    CompoundPermitFlag     = CompoundPermitFlag,
                    CompoundForbidFlag     = CompoundForbidFlag,
                    NoSuggest              = NoSuggest,
                    NoNgramSuggest         = NoNgramSuggest,
                    ForbiddenWord          = ForbiddenWord ?? SpecialFlags.ForbiddenWord,
                    LemmaPresent           = LemmaPresent,
                    Circumfix              = Circumfix,
                    OnlyInCompound         = OnlyInCompound,
                    NeedAffix              = NeedAffix,
                    MaxNgramSuggestions    = MaxNgramSuggestions ?? DefaultMaxNgramSuggestions,
                    MaxDifferency          = MaxDifferency,
                    MaxCompoundSuggestions = MaxCompoundSuggestions ?? DefaultMaxCompoundSuggestions,
                    KeepCase               = KeepCase,
                    ForceUpperCase         = ForceUpperCase,
                    Warn                = Warn,
                    SubStandard         = SubStandard,
                    CompoundSyllableNum = CompoundSyllableNum,
                    Encoding            = Encoding,
                    CompoundMaxSyllable = CompoundMaxSyllable,
                    CompoundVowels      = CompoundVowels ?? CharacterSet.Empty,
                    WordChars           = WordChars ?? CharacterSet.Empty,
                    IgnoredChars        = IgnoredChars ?? CharacterSet.Empty,
                    Version             = Dedup(Version),
                    BreakPoints         = BreakSet.Create(BreakPoints)
                };

                if (destructive)
                {
                    config.Replacements        = SingleReplacementSet.TakeList(Steal(ref Replacements));
                    config.CompoundRules       = CompoundRuleSet.TakeList(Steal(ref CompoundRules));
                    config.CompoundPatterns    = PatternSet.TakeList(Steal(ref CompoundPatterns));
                    config.RelatedCharacterMap = MapTable.TakeList(Steal(ref RelatedCharacterMap));
                    config.Phone             = PhoneTable.TakeList(Steal(ref Phone));
                    config.InputConversions  = MultiReplacementTable.TakeDictionary(Steal(ref InputConversions));
                    config.OutputConversions = MultiReplacementTable.TakeDictionary(Steal(ref OutputConversions));
                    config.Warnings          = WarningList.TakeList(Steal(ref Warnings));

                    config.aliasF = AliasF ?? new List <FlagSet>(0);
                    AliasF        = null;
                    config.aliasM = AliasM ?? new List <MorphSet>(0);
                    AliasM        = null;
                }
                else
                {
                    config.Replacements        = SingleReplacementSet.Create(Replacements);
                    config.CompoundRules       = CompoundRuleSet.Create(CompoundRules);
                    config.CompoundPatterns    = PatternSet.Create(CompoundPatterns);
                    config.RelatedCharacterMap = MapTable.Create(RelatedCharacterMap);
                    config.Phone             = PhoneTable.Create(Phone);
                    config.InputConversions  = MultiReplacementTable.Create(InputConversions);
                    config.OutputConversions = MultiReplacementTable.Create(OutputConversions);
                    config.Warnings          = WarningList.Create(Warnings);

                    config.aliasF = AliasF == null ? new List <FlagSet>(0) : AliasF.ToList();
                    config.aliasM = AliasM == null ? new List <MorphSet>(0) : AliasM.ToList();
                }

                config.Prefixes = AffixCollection <PrefixEntry> .Create(Prefixes);

                config.Suffixes = AffixCollection <SuffixEntry> .Create(Suffixes);

                config.ContClasses = FlagSet.Union(config.Prefixes.ContClasses, config.Suffixes.ContClasses);

                return(config);
            }
Beispiel #19
0
 public static FlagSet ParseFlags(string text, FlagMode mode) => FlagSet.TakeArray(ParseFlagsInOrder(text, mode));
        private bool AddWord(string word, FlagSet flags, MorphSet morphs, bool onlyUpperCase)
        {
            if (Affix.IgnoredChars.HasItems)
            {
                word = word.RemoveChars(Affix.IgnoredChars);
            }

            if (Affix.ComplexPrefixes)
            {
                word = word.Reverse();

                if (morphs.HasItems && !Affix.IsAliasM)
                {
                    var newMorphs = new string[morphs.Count];
                    for (int i = 0; i < morphs.Count; i++)
                    {
                        newMorphs[i] = morphs[morphs.Count - i - 1].Reverse();
                    }

                    morphs = MorphSet.TakeArray(newMorphs);
                }
            }

            WordEntryOptions options;

            if (morphs.HasItems)
            {
                if (Affix.IsAliasM)
                {
                    options = WordEntryOptions.AliasM;
                    var morphBuilder = new List <string>();
                    foreach (var originalValue in morphs)
                    {
                        int      morphNumber;
                        MorphSet aliasedMorph;
                        if (IntEx.TryParseInvariant(originalValue, out morphNumber) && Affix.TryGetAliasM(morphNumber, out aliasedMorph))
                        {
                            morphBuilder.AddRange(aliasedMorph);
                        }
                        else
                        {
                            morphBuilder.Add(originalValue);
                        }
                    }

                    morphs = MorphSet.Create(morphBuilder);
                }
                else
                {
                    options = WordEntryOptions.None;
                }

                if (morphs.AnyStartsWith(MorphologicalTags.Phon))
                {
                    options |= WordEntryOptions.Phon;
                }
            }
            else
            {
                options = WordEntryOptions.None;
            }

            bool         saveEntryList = false;
            WordEntrySet entryList;

            word = Builder.Dedup(word);
            if (!Builder.EntriesByRoot.TryGetValue(word, out entryList))
            {
                saveEntryList = true;
                entryList     = WordEntrySet.Empty;
            }

            var upperCaseHomonym = false;

            for (var i = 0; i < entryList.Count; i++)
            {
                var existingEntry = entryList[i];

                if (!onlyUpperCase)
                {
                    if (existingEntry.ContainsFlag(SpecialFlags.OnlyUpcaseFlag))
                    {
                        existingEntry = new WordEntry(
                            existingEntry.Word,
                            flags,
                            existingEntry.Morphs,
                            existingEntry.Options);
                        entryList.DestructiveReplace(i, existingEntry);
                        return(false);
                    }
                }
                else
                {
                    upperCaseHomonym = true;
                }
            }

            if (!upperCaseHomonym)
            {
                saveEntryList = true;
                entryList     = WordEntrySet.CopyWithItemAdded(entryList, new WordEntry(
                                                                   word,
                                                                   flags,
                                                                   Builder.Dedup(morphs),
                                                                   options));
            }

            if (saveEntryList)
            {
                Builder.EntriesByRoot[word] = entryList;
            }

            return(false);
        }
 private bool AddWord(string word, FlagSet flags, MorphSet morphs)
 {
     return(AddWord(word, flags, morphs, false) ||
            AddWordCapitalized(word, flags, morphs, CapitalizationTypeEx.GetCapitalizationType(word, Affix)));
 }
Beispiel #22
0
 internal static FlagSet ParseNumberFlags(StringSlice text) =>
 FlagSet.Create(ParseNumberFlagsInOrder(text));
Beispiel #23
0
 public static FlagSet ParseNumberFlags(string text, int startIndex, int length) =>
 FlagSet.Create(ParseNumberFlagsInOrder(text, startIndex, length));