Example #1
0
 public SuffixEntry(
     string strip,
     string affixText,
     CharacterConditionGroup conditions,
     MorphSet morph,
     FlagSet contClass)
     : base(strip, affixText, conditions, morph, contClass)
 {
     Key = affixText.GetReversed();
 }
 protected AffixEntry(
     string strip,
     string affixText,
     CharacterConditionGroup conditions,
     MorphSet morph,
     FlagSet contClass)
 {
     Strip      = strip ?? string.Empty;
     Append     = affixText ?? string.Empty;
     Conditions = conditions ?? CharacterConditionGroup.Empty;
     MorphCode  = morph ?? MorphSet.Empty;
     ContClass  = contClass ?? FlagSet.Empty;
 }
Example #3
0
        private bool TryParseAliasM(string parameterText, List <MorphSet> entries)
        {
            if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes))
            {
                parameterText = parameterText.Reverse();
            }

            var parts = parameterText.SplitOnTabOrSpace();

            Builder.DedupInPlace(parts);

            entries.Add(Builder.Dedup(MorphSet.TakeArray(parts)));

            return(true);
        }
 public static TEntry Create <TEntry>(
     string strip,
     string affixText,
     CharacterConditionGroup conditions,
     MorphSet morph,
     FlagSet contClass)
     where TEntry : AffixEntry, new()
 =>
 new TEntry
 {
     Strip      = strip,
     Append     = affixText,
     Conditions = conditions,
     MorphCode  = morph ?? MorphSet.Empty,
     ContClass  = contClass ?? FlagSet.Empty
 };
Example #5
0
 public MorphSet Dedup(MorphSet value) =>
 value == null ? null : MorphSetDeduper.GetEqualOrAdd(value);
 public MorphSet Dedup(MorphSet value) =>
 value == null
     ? value
     : value.Count == 0
     ? MorphSet.Empty
     : MorphSetDeduper.GetEqualOrAdd(value);
Example #7
0
        private bool AddWord(string word, FlagSet flags, MorphSet morphs, bool onlyUpperCase)
        {
            if (Affix.IgnoredChars.HasItems)
            {
                word = word.RemoveChars(Affix.IgnoredChars);
            }

            if (Affix.ComplexPrefixes)
            {
                word = word.Reverse();

                if (morphs.HasItems && !Affix.IsAliasM)
                {
                    var newMorphs = new string[morphs.Count];
                    for (int i = 0; i < morphs.Count; i++)
                    {
                        newMorphs[i] = morphs[morphs.Count - i - 1].Reverse();
                    }

                    morphs = MorphSet.TakeArray(newMorphs);
                }
            }

            WordEntryOptions options;

            if (morphs.HasItems)
            {
                if (Affix.IsAliasM)
                {
                    options = WordEntryOptions.AliasM;
                    var morphBuilder = new List <string>();
                    foreach (var originalValue in morphs)
                    {
                        if (IntEx.TryParseInvariant(originalValue, out int morphNumber) && Affix.TryGetAliasM(morphNumber, out MorphSet aliasedMorph))
                        {
                            morphBuilder.AddRange(aliasedMorph);
                        }
                        else
                        {
                            morphBuilder.Add(originalValue);
                        }
                    }

                    morphs = MorphSet.Create(morphBuilder);
                }
                else
                {
                    options = WordEntryOptions.None;
                }

                if (morphs.AnyStartsWith(MorphologicalTags.Phon))
                {
                    options |= WordEntryOptions.Phon;
                }
            }
            else
            {
                options = WordEntryOptions.None;
            }

            bool saveEntryList = false;

            word = Builder.Dedup(word);
            if (!Builder.EntriesByRoot.TryGetValue(word, out WordEntrySet entryList))
            {
                saveEntryList = true;
                entryList     = WordEntrySet.Empty;
            }

            var upperCaseHomonym = false;

            for (var i = 0; i < entryList.Count; i++)
            {
                var existingEntry = entryList[i];

                if (!onlyUpperCase)
                {
                    if (existingEntry.ContainsFlag(SpecialFlags.OnlyUpcaseFlag))
                    {
                        existingEntry = new WordEntry(
                            existingEntry.Word,
                            flags,
                            existingEntry.Morphs,
                            existingEntry.Options);
                        entryList.DestructiveReplace(i, existingEntry);
                        return(false);
                    }
                }
                else
                {
                    upperCaseHomonym = true;
                }
            }

            if (!upperCaseHomonym)
            {
                saveEntryList = true;
                entryList     = WordEntrySet.CopyWithItemAdded(entryList, new WordEntry(
                                                                   word,
                                                                   flags,
                                                                   Builder.Dedup(morphs),
                                                                   options));
            }

            if (saveEntryList)
            {
                Builder.EntriesByRoot[word] = entryList;
            }

            return(false);
        }
Example #8
0
 private bool AddWord(string word, FlagSet flags, MorphSet morphs) =>
 AddWord(word, flags, morphs, false) ||
 AddWordCapitalized(word, flags, morphs, CapitalizationTypeEx.GetCapitalizationType(word, Affix));
Example #9
0
        private bool ParseLine(string line)
        {
            if (string.IsNullOrEmpty(line))
            {
                return(true);
            }

            if (!hasInitialized && AttemptToProcessInitializationLine(line))
            {
                return(true);
            }

            if (Builder.EntriesByRoot == null)
            {
                Builder.InitializeEntriesByRoot(-1);
            }

            var parsed = ParsedWordLine.Parse(line);

            if (string.IsNullOrEmpty(parsed.Word))
            {
                return(false);
            }

            FlagSet flags;

            if (!string.IsNullOrEmpty(parsed.Flags))
            {
                if (Affix.IsAliasF)
                {
                    if (IntEx.TryParseInvariant(parsed.Flags, out int flagAliasNumber) && Affix.TryGetAliasF(flagAliasNumber, out FlagSet aliasedFlags))
                    {
                        flags = aliasedFlags;
                    }
                    else
                    {
                        // TODO: warn
                        return(false);
                    }
                }
                else if (Affix.FlagMode == FlagMode.Uni)
                {
                    var encodedBytes = Affix.Encoding.GetBytes(parsed.Flags);
                    var utf8Flags    = Encoding.UTF8.GetString(encodedBytes, 0, encodedBytes.Length);
                    flags = Builder.Dedup(FlagValue.ParseFlags(utf8Flags, FlagMode.Char));
                }
                else
                {
                    flags = Builder.Dedup(FlagValue.ParseFlags(parsed.Flags, Affix.FlagMode));
                }
            }
            else
            {
                flags = FlagSet.Empty;
            }

            MorphSet morphs;

            if (parsed.Morphs != null && parsed.Morphs.Length != 0)
            {
                var morphValues = new string[parsed.Morphs.Length];
                for (int i = 0; i < parsed.Morphs.Length; i++)
                {
                    morphValues[i] = parsed.Morphs[i];
                }

                morphs = Builder.Dedup(MorphSet.TakeArray(morphValues));
            }
            else
            {
                morphs = MorphSet.Empty;
            }

            return(AddWord(parsed.Word, flags, morphs));
        }
Example #10
0
        private bool TryParseAffixIntoList <TEntry>(string parameterText, ref List <AffixEntryGroup.Builder <TEntry> > groups)
            where TEntry : AffixEntry, new()
        {
            if (groups == null)
            {
                groups = new List <AffixEntryGroup.Builder <TEntry> >();
            }

            var lineMatch = AffixLineRegex.Match(parameterText);

            if (!lineMatch.Success)
            {
                Builder.LogWarning("Failed to parse affix line: " + parameterText);
                return(false);
            }

            var lineMatchGroups = lineMatch.Groups;

            if (!TryParseFlag(lineMatchGroups[1].Value, out FlagValue characterFlag))
            {
                Builder.LogWarning($"Failed to parse affix flag for {lineMatchGroups[1].Value} from: {parameterText}");
                return(false);
            }

            var affixGroup = groups.FindLast(g => g.AFlag == characterFlag);
            var contClass  = FlagSet.Empty;

            if (lineMatchGroups[2].Success && lineMatchGroups[3].Success)
            {
                if (affixGroup != null)
                {
                    Builder.LogWarning($"Duplicate affix group definition for {affixGroup.AFlag} from: {parameterText}");
                    return(false);
                }

                var options = AffixEntryOptions.None;
                if (lineMatchGroups[2].Value.StartsWith('Y'))
                {
                    options |= AffixEntryOptions.CrossProduct;
                }
                if (Builder.IsAliasM)
                {
                    options |= AffixEntryOptions.AliasM;
                }
                if (Builder.IsAliasF)
                {
                    options |= AffixEntryOptions.AliasF;
                }

                IntEx.TryParseInvariant(lineMatchGroups[3].Value, out int expectedEntryCount);

                affixGroup = new AffixEntryGroup.Builder <TEntry>
                {
                    AFlag   = characterFlag,
                    Options = options,
                    Entries = new List <TEntry>(expectedEntryCount)
                };

                groups.Add(affixGroup);

                return(true);
            }

            if (lineMatchGroups[4].Success && lineMatchGroups[5].Success && lineMatchGroups[6].Success)
            {
                // piece 3 - is string to strip or 0 for null
                var strip = lineMatchGroups[4].Value;
                if (strip == "0")
                {
                    strip = string.Empty;
                }
                else if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes))
                {
                    strip = strip.Reverse();
                }

                // piece 4 - is affix string or 0 for null
                var           affixInput      = lineMatchGroups[5].Value;
                var           affixSlashIndex = affixInput.IndexOf('/');
                StringBuilder affixText;
                if (affixSlashIndex >= 0)
                {
                    var slashPartOffset = affixSlashIndex + 1;
                    var slashPartLength = affixInput.Length - slashPartOffset;
                    affixText = StringBuilderPool.Get(affixInput, 0, affixSlashIndex);

                    if (Builder.IsAliasF)
                    {
                        if (IntEx.TryParseInvariant(affixInput.Subslice(slashPartOffset, slashPartLength), out int aliasNumber) && aliasNumber > 0 && aliasNumber <= Builder.AliasF.Count)
                        {
                            contClass = Builder.AliasF[aliasNumber - 1];
                        }
                        else
                        {
                            Builder.LogWarning($"Failed to parse contclasses from : {parameterText}");
                            return(false);
                        }
                    }
                    else
                    {
                        contClass = Builder.Dedup(FlagSet.TakeArray(ParseFlagsInOrder(affixInput.Subslice(slashPartOffset, slashPartLength))));
                    }
                }
                else
                {
                    affixText = StringBuilderPool.Get(affixInput);
                }

                if (Builder.IgnoredChars != null && Builder.IgnoredChars.HasItems)
                {
                    affixText.RemoveChars(Builder.IgnoredChars);
                }

                if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes))
                {
                    affixText.Reverse();
                }

                if (affixText.Length == 1 && affixText[0] == '0')
                {
                    affixText.Clear();
                }

                // piece 5 - is the conditions descriptions
                var conditionText = lineMatchGroups[6].Value;
                if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes))
                {
                    conditionText = ReverseCondition(conditionText);
                }

                var conditions = CharacterCondition.Parse(conditionText);
                if (!string.IsNullOrEmpty(strip) && !conditions.AllowsAnySingleCharacter)
                {
                    bool isRedundant;
                    if (typeof(TEntry) == typeof(PrefixEntry))
                    {
                        isRedundant = conditions.IsOnlyPossibleMatch(strip);
                    }
                    else if (typeof(TEntry) == typeof(SuffixEntry))
                    {
                        isRedundant = conditions.IsOnlyPossibleMatch(strip);
                    }
                    else
                    {
                        throw new NotSupportedException();
                    }

                    if (isRedundant)
                    {
                        conditions = CharacterConditionGroup.AllowAnySingleCharacter;
                    }
                }

                // piece 6
                MorphSet morph;
                if (lineMatchGroups[7].Success)
                {
                    var morphAffixText = lineMatchGroups[7].Value;
                    if (Builder.IsAliasM)
                    {
                        if (IntEx.TryParseInvariant(morphAffixText, out int morphNumber) && morphNumber > 0 && morphNumber <= Builder.AliasM.Count)
                        {
                            morph = Builder.AliasM[morphNumber - 1];
                        }
                        else
                        {
                            Builder.LogWarning($"Failed to parse morph {morphAffixText} from: {parameterText}");
                            return(false);
                        }
                    }
                    else
                    {
                        if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes))
                        {
                            morphAffixText = morphAffixText.Reverse();
                        }

                        morph = Builder.Dedup(MorphSet.TakeArray(Builder.DedupInPlace(morphAffixText.SplitOnTabOrSpace())));
                    }
                }
                else
                {
                    morph = MorphSet.Empty;
                }

                if (affixGroup == null)
                {
                    affixGroup = new AffixEntryGroup.Builder <TEntry>
                    {
                        AFlag   = characterFlag,
                        Options = AffixEntryOptions.None,
                        Entries = new List <TEntry>()
                    };
                }

                if (!Builder.HasContClass && contClass.HasItems)
                {
                    Builder.HasContClass = true;
                }

                affixGroup.Entries.Add(AffixEntry.Create <TEntry>(
                                           Builder.Dedup(strip),
                                           Builder.Dedup(StringBuilderPool.GetStringAndReturn(affixText)),
                                           Builder.Dedup(conditions),
                                           morph,
                                           contClass));

                return(true);
            }

            Builder.LogWarning("Affix line not fully parsed: " + parameterText);
            return(false);
        }
Example #11
0
 public WordEntryDetail(FlagSet flags, MorphSet morphs, WordEntryOptions options)
 {
     Flags   = flags ?? FlagSet.Empty;
     Morphs  = morphs ?? MorphSet.Empty;
     Options = options;
 }
Example #12
0
 public WordEntry(string word, FlagSet flags, MorphSet morphs, WordEntryOptions options)
     : this(word, new WordEntryDetail(flags, morphs, options))
 {
 }