public bool TryGetAliasM(int number, out MorphSet result) { if (number > 0 && number <= aliasM.Count) { result = aliasM[number - 1]; return(true); } else { result = MorphSet.Empty; return(false); } }
private bool TryParseAliasM(string parameterText, List <MorphSet> entries) { if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes)) { parameterText = parameterText.Reverse(); } var parts = parameterText.SplitOnTabOrSpace(); Builder.DedupInPlace(parts); entries.Add(Builder.Dedup(MorphSet.TakeArray(parts))); return(true); }
public static TEntry Create <TEntry> ( string strip, string affixText, CharacterConditionGroup conditions, MorphSet morph, FlagSet contClass ) where TEntry : AffixEntry, new() { return(new TEntry { Strip = strip, Append = affixText, Conditions = conditions, MorphCode = morph ?? MorphSet.Empty, ContClass = contClass ?? FlagSet.Empty }); }
public MorphSet Dedup(MorphSet value) => value == null ? null : MorphSetDeduper.GetEqualOrAdd(value);
private bool TryParseAffixIntoList <TEntry>(string parameterText, ref List <AffixEntryGroup.Builder <TEntry> > groups) where TEntry : AffixEntry, new() { if (groups == null) { groups = new List <AffixEntryGroup.Builder <TEntry> >(); } var lineMatch = AffixLineRegex.Match(parameterText); if (!lineMatch.Success) { Builder.LogWarning("Failed to parse affix line: " + parameterText); return(false); } var lineMatchGroups = lineMatch.Groups; FlagValue characterFlag; if (!TryParseFlag(lineMatchGroups[1].Value, out characterFlag)) { Builder.LogWarning($"Failed to parse affix flag for {lineMatchGroups[1].Value} from: {parameterText}"); return(false); } var affixGroup = groups.FindLast(g => g.AFlag == characterFlag); var contClass = FlagSet.Empty; if (lineMatchGroups[2].Success && lineMatchGroups[3].Success) { if (affixGroup != null) { Builder.LogWarning($"Duplicate affix group definition for {affixGroup.AFlag} from: {parameterText}"); return(false); } var options = AffixEntryOptions.None; if (lineMatchGroups[2].Value.StartsWith('Y')) { options |= AffixEntryOptions.CrossProduct; } if (Builder.IsAliasM) { options |= AffixEntryOptions.AliasM; } if (Builder.IsAliasF) { options |= AffixEntryOptions.AliasF; } int expectedEntryCount; IntEx.TryParseInvariant(lineMatchGroups[3].Value, out expectedEntryCount); affixGroup = new AffixEntryGroup.Builder <TEntry> { AFlag = characterFlag, Options = options, Entries = new List <TEntry>(expectedEntryCount) }; groups.Add(affixGroup); return(true); } else if (lineMatchGroups[4].Success && lineMatchGroups[5].Success && lineMatchGroups[6].Success) { // piece 3 - is string to strip or 0 for null var strip = lineMatchGroups[4].Value; if (strip == "0") { strip = string.Empty; } else if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes)) { strip = strip.Reverse(); } // piece 4 - is affix string or 0 for null var affixInput = lineMatchGroups[5].Value; var affixSlashIndex = affixInput.IndexOf('/'); StringBuilder affixText; if (affixSlashIndex >= 0) { var slashPartOffset = affixSlashIndex + 1; var slashPartLength = affixInput.Length - slashPartOffset; affixText = StringBuilderPool.Get(affixInput, 0, affixSlashIndex); if (Builder.IsAliasF) { int aliasNumber; if (IntEx.TryParseInvariant(affixInput.Subslice(slashPartOffset, slashPartLength), out aliasNumber) && aliasNumber > 0 && aliasNumber <= Builder.AliasF.Count) { contClass = Builder.AliasF[aliasNumber - 1]; } else { Builder.LogWarning($"Failed to parse contclasses from : {parameterText}"); return(false); } } else { contClass = ParseFlags(affixInput.Subslice(slashPartOffset, slashPartLength)); } } else { affixText = StringBuilderPool.Get(affixInput); } if (Builder.IgnoredChars != null && Builder.IgnoredChars.HasItems) { affixText.RemoveChars(Builder.IgnoredChars); } if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes)) { affixText.Reverse(); } if (affixText.Length == 1 && affixText[0] == '0') { affixText.Clear(); } // piece 5 - is the conditions descriptions var conditionText = lineMatchGroups[6].Value; if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes)) { conditionText = ReverseCondition(conditionText); } var conditions = CharacterCondition.Parse(conditionText); if (!string.IsNullOrEmpty(strip) && !conditions.AllowsAnySingleCharacter) { bool isRedundant; if (typeof(TEntry) == typeof(PrefixEntry)) { isRedundant = RedundantConditionPrefix(strip, conditions); } else if (typeof(TEntry) == typeof(SuffixEntry)) { isRedundant = RedundantConditionSuffix(strip, conditions); } else { throw new NotSupportedException(); } if (isRedundant) { conditions = CharacterConditionGroup.AllowAnySingleCharacter; } } // piece 6 MorphSet morph; if (lineMatchGroups[7].Success) { var morphAffixText = lineMatchGroups[7].Value; if (Builder.IsAliasM) { int morphNumber; if (IntEx.TryParseInvariant(morphAffixText, out morphNumber) && morphNumber > 0 && morphNumber <= Builder.AliasM.Count) { morph = Builder.AliasM[morphNumber - 1]; } else { Builder.LogWarning($"Failed to parse morph {morphAffixText} from: {parameterText}"); return(false); } } else { if (EnumEx.HasFlag(Builder.Options, AffixConfigOptions.ComplexPrefixes)) { morphAffixText = morphAffixText.Reverse(); } morph = Builder.Dedup(MorphSet.TakeArray(Builder.DedupInPlace(morphAffixText.SplitOnTabOrSpace()))); } } else { morph = MorphSet.Empty; } if (affixGroup == null) { affixGroup = new AffixEntryGroup.Builder <TEntry> { AFlag = characterFlag, Options = AffixEntryOptions.None, Entries = new List <TEntry>() }; } if (!Builder.HasContClass && contClass.HasItems) { Builder.HasContClass = true; } affixGroup.Entries.Add(AffixEntry.Create <TEntry>( Builder.Dedup(strip), Builder.Dedup(StringBuilderPool.GetStringAndReturn(affixText)), Builder.Dedup(conditions), morph, contClass)); return(true); } else { Builder.LogWarning("Affix line not fully parsed: " + parameterText); return(false); } }
private bool AddWord(string word, FlagSet flags, MorphSet morphs, bool onlyUpperCase) { if (Affix.IgnoredChars.HasItems) { word = word.RemoveChars(Affix.IgnoredChars); } if (Affix.ComplexPrefixes) { word = word.Reverse(); if (morphs.HasItems && !Affix.IsAliasM) { var newMorphs = new string[morphs.Count]; for (int i = 0; i < morphs.Count; i++) { newMorphs[i] = morphs[morphs.Count - i - 1].Reverse(); } morphs = MorphSet.TakeArray(newMorphs); } } WordEntryOptions options; if (morphs.HasItems) { if (Affix.IsAliasM) { options = WordEntryOptions.AliasM; var morphBuilder = new List <string>(); foreach (var originalValue in morphs) { int morphNumber; MorphSet aliasedMorph; if (IntEx.TryParseInvariant(originalValue, out morphNumber) && Affix.TryGetAliasM(morphNumber, out aliasedMorph)) { morphBuilder.AddRange(aliasedMorph); } else { morphBuilder.Add(originalValue); } } morphs = MorphSet.Create(morphBuilder); } else { options = WordEntryOptions.None; } if (morphs.AnyStartsWith(MorphologicalTags.Phon)) { options |= WordEntryOptions.Phon; } } else { options = WordEntryOptions.None; } bool saveEntryList = false; WordEntrySet entryList; word = Builder.Dedup(word); if (!Builder.EntriesByRoot.TryGetValue(word, out entryList)) { saveEntryList = true; entryList = WordEntrySet.Empty; } var upperCaseHomonym = false; for (var i = 0; i < entryList.Count; i++) { var existingEntry = entryList[i]; if (!onlyUpperCase) { if (existingEntry.ContainsFlag(SpecialFlags.OnlyUpcaseFlag)) { existingEntry = new WordEntry( existingEntry.Word, flags, existingEntry.Morphs, existingEntry.Options); entryList.DestructiveReplace(i, existingEntry); return(false); } } else { upperCaseHomonym = true; } } if (!upperCaseHomonym) { saveEntryList = true; entryList = WordEntrySet.CopyWithItemAdded(entryList, new WordEntry( word, flags, Builder.Dedup(morphs), options)); } if (saveEntryList) { Builder.EntriesByRoot[word] = entryList; } return(false); }
private bool AddWord(string word, FlagSet flags, MorphSet morphs) { return(AddWord(word, flags, morphs, false) || AddWordCapitalized(word, flags, morphs, CapitalizationTypeEx.GetCapitalizationType(word, Affix))); }
private bool ParseLine(string line) { if (string.IsNullOrEmpty(line)) { return(true); } if (!hasInitialized && AttemptToProcessInitializationLine(line)) { return(true); } if (Builder.EntriesByRoot == null) { Builder.InitializeEntriesByRoot(-1); } var parsed = ParsedWordLine.Parse(line); if (string.IsNullOrEmpty(parsed.Word)) { return(false); } FlagSet flags; if (!string.IsNullOrEmpty(parsed.Flags)) { if (Affix.IsAliasF) { int flagAliasNumber; FlagSet aliasedFlags; if (IntEx.TryParseInvariant(parsed.Flags, out flagAliasNumber) && Affix.TryGetAliasF(flagAliasNumber, out aliasedFlags)) { flags = aliasedFlags; } else { // TODO: warn return(false); } } else if (Affix.FlagMode == FlagMode.Uni) { var encodedBytes = Affix.Encoding.GetBytes(parsed.Flags); var utf8Flags = Encoding.UTF8.GetString(encodedBytes, 0, encodedBytes.Length); flags = Builder.Dedup(FlagValue.ParseFlags(utf8Flags, FlagMode.Char)); } else { flags = Builder.Dedup(FlagValue.ParseFlags(parsed.Flags, Affix.FlagMode)); } } else { flags = FlagSet.Empty; } MorphSet morphs; if (parsed.Morphs != null && parsed.Morphs.Length != 0) { var morphValues = new string[parsed.Morphs.Length]; for (int i = 0; i < parsed.Morphs.Length; i++) { morphValues[i] = parsed.Morphs[i]; } morphs = Builder.Dedup(MorphSet.TakeArray(morphValues)); } else { morphs = MorphSet.Empty; } return(AddWord(parsed.Word, flags, morphs)); }