public static int GetWordNetCodeStart(this pos_type type) { switch (type) { case pos_type.A: return(CODE_ADJECTIVE); break; case pos_type.N: return(CODE_NOUN); break; case pos_type.V: return(CODE_VERB); break; case pos_type.ADV: return(CODE_ADVERB); break; default: return(CODE_ADJECTIVE_SATELLITE); break; } }
/// <summary> /// Gets multi-lemma in type query graph /// </summary> /// <param name="word">The word to look for</param> /// <param name="limit">The limit - limit on number of entries to take</param> /// <param name="logger">The logger - to log out on error</param> /// <returns></returns> public lexicQuery GetLexicQuery(string word, int limit = -1, ILogBuilder logger = null) { if (limit == -1) { limit = GENERAL_LEXIC_QUERY_SEARCH_LIMIT; } lexicQuery output = new lexicQuery(word); String reg = GetSearchRegex(word, "", ""); var primResult = resourceFileOperater.Search(reg, true, limit, RegexOptions.IgnoreCase); foreach (string line in primResult.getLineContentList()) { string inflectForm = ""; string lemma = ""; string gramTag = ""; SelectFromLine(line, out inflectForm, out lemma, out gramTag); var gramTagColl = grammTagConverter.ConvertFromString(gramTag); pos_type posType = gramTagColl.Get <pos_type>(pos_type.none); lexicLemmaInTypeNode lemmaInType = output.AddLemmaInType(lemma, posType); lexicInflection inflection = lemmaInType.AddInflection(inflectForm); inflection.AddGrammarCase(gramTagColl); } return(output); }
/// <summary> /// Gets the grammar context flags: degree, gender, number, gramaticalCase, definitness, animatness, negation... everything that is not direct subtype flag. /// </summary> /// <param name="mainType">Main type flag.</param> /// <param name="graphTags">The graph tags to filter out</param> /// <returns>List of tags</returns> /// <seealso cref="getSubTypes(pos_type, List{object})"/> public static List <Object> getGrammarContext(this pos_type mainType, List <Object> graphTags) { var output = new List <Object>(); switch (mainType) { case pos_type.A: graphTags.getAllOfType <pos_degree>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_gender>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_number>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_gramaticalCase>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_definitness>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_animatness>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.N: graphTags.getAllOfType <pos_gender>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_number>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_gramaticalCase>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_animatness>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.NUM: graphTags.getAllOfType <pos_gender>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_number>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_gramaticalCase>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_degree>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_animatness>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.PRO: graphTags.getAllOfType <pos_gender>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_number>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_gramaticalCase>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_person>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_animatness>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.V: graphTags.getAllOfType <pos_person>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_gender>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_number>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_negation>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.ADV: graphTags.getAllOfType <pos_person>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_gender>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_number>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_negation>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_degree>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.PREP: graphTags.getAllOfType <pos_gramaticalCase>(false).ForEach(x => output.AddUnique(x)); break; } return(output); }
public termExploreModel makeTempModel(string term, pos_type type) { termExploreModel output = new termExploreModel(term); output.lemma = new termExploreItem(term); output.gramSet.Add(new gramFlags(type.ToString())); output.flags = termExploreItemEnumFlag.temp; return(output); }
public morphRuleSet Add <T>(string __regex, pos_type __type) where T : morphRuleSet, new() { morphRuleSet output = new T(); output.setup(__regex, __type); //output.SetRules(); rules.Add(output); return(output); }
public lexicLemmaInTypeNode(String lemmaForm, pos_type _posType, String __name = "") : base() { if (__name == "") { __name = lemmaForm + "|" + posType.ToString(); } name = __name; lemmaForm = lemmaForm; nodeType = lexicGraphNodeType.lemma; posType = _posType; }
public wordnetSymsetResults query_srb(List <String> srb_tokens, ILogBuilder response, Boolean buildModel = true) { wordnetSymsetResults output = new wordnetSymsetResults(); getReady(); List <DataRow> matches = new List <DataRow>(); foreach (String tkn in srb_tokens) { matches.AddRangeUnique(table.Select(SRB_COLUMN_TOKEN + " LIKE '" + tkn + "'")); } foreach (DataRow dr in matches) { String eng = dr[SRB_COLUMN_TOKEN].toStringSafe(); String code = dr[SRB_COLUMN_CODE].toStringSafe(); output.Add(code, eng); } if (buildModel) { foreach (var pair in output) { termExploreModel md = null; String srb = pair.Key; if (!output.models.ContainsKey(srb)) { md = new termExploreModel(srb); output.models.Add(srb, md); } else { md = output.models[srb]; } String symc = pair.Value[0].ToString(); pos_type pt = posConverter.wordNetFirstNumToPosType.getValue(symc, pos_type.none); gramFlags gr = new gramFlags(); gr.Set(pt); md.gramSet.Add(gr); md.wordnetPrimarySymsets.AddUnique(pair.Value); } } return(output); }
public lexicLemmaInTypeNode AddLemmaInType(String lemmaForm, pos_type posType) { var __name = lemmaForm + "|" + posType.ToString(); if (mychildren.ContainsKey(__name)) { return(mychildren[__name] as lexicLemmaInTypeNode); } var output = new lexicLemmaInTypeNode(lemmaForm, posType, __name); Add(output); return(output); }
/// <summary> /// Initialize converter with specification from <see cref="DataTable" />s /// </summary> /// <param name="transTable">The translation table - must have columns: <see cref="TABLECOLUMN_MEMBERNAME" /> and <see cref="TABLECOLUMN_CODE" />.</param> /// <param name="formatTable">The format table - must have columns: <see cref="TABLECOLUMN_MEMBERNAME" /> and <see cref="TABLECOLUMN_FORMAT" />.</param> /// <param name="logger">The logger.</param> public void LoadSpecification(DataTable transTable, DataTable formatTable, ILogBuilder logger = null) { // <--------- interpretation of the translation table foreach (DataRow row in transTable.Rows) { string code = row[TABLECOLUMN_CODE].toStringSafe(); string memberName = row[TABLECOLUMN_MEMBERNAME].toStringSafe(); object memberValue = null; Type memberType = interpretMemberName(memberName, out memberValue); if (memberValue != null) { posEnumVsString.Add(memberValue, code); } } int c = 0; // <---------- interpretation of format table foreach (DataRow row in formatTable.Rows) { c++; string format_string = row[TABLECOLUMN_FORMAT].toStringSafe(); string memberName = row[TABLECOLUMN_MEMBERNAME].toStringSafe(); object memberValue = null; Type memberType = interpretMemberName(memberName, out memberValue); List <string> typeNames = format_string.SplitSmart(",", "", true, true); List <Type> typeList = new List <Type>(); foreach (string tn in typeNames) { Type t = pos_enum_types[tn]; typeList.Add(t); } pos_type pt = (pos_type)memberValue; posTypeVsPattern[pt] = typeList; } }
public chunkMatchRule AddTypeAndFlagRule(pos_type[] posTypePattern, Type[] flagTypesToMatch, pos_type chunkType) { chunkMatchRule rule = new chunkMatchRule(); rule.renderMode = contentTokenSubjectRenderMode.posTypeTagForm; rule.chunkType = chunkType; String rgx = ""; for (int i = 0; i < posTypePattern.Length; i++) { var ps = posTypePattern[i]; rgx = rgx + textMapBase.SEPARATOR + String.Format(itemFormat, ps.toString()); } rgx = rgx + textMapBase.SEPARATOR; rule._regexPattern = rgx; rule.flagTypesToMatch.AddRange(flagTypesToMatch); rule.flagTypesToMatch.ForEach(x => rule.flagTypesToMatchNames.AddUnique(x.Name)); Add(rule); return(rule); }
/// <summary> /// Gets the sub type flags from <c>graphTags</c>, for the <c>mainType</c> specified /// </summary> /// <param name="mainType">Type of the main.</param> /// <param name="graphTags">The graph tags.</param> /// <returns></returns> public static List <Object> getSubTypes(this pos_type mainType, List <Object> graphTags) { var output = new List <Object>(); switch (mainType) { case pos_type.A: graphTags.getAllOfType <pos_adjectiveType>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_degree>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.ABB: break; case pos_type.ADV: graphTags.getAllOfType <pos_adverbType>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.CONJ: graphTags.getAllOfType <pos_conjunctionType>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_conjunctionFormation>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.INT: break; case pos_type.N: graphTags.getAllOfType <pos_nounGroup>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_nounType>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.none: break; case pos_type.NUMnumerical: case pos_type.NUM: graphTags.getAllOfType <pos_numeralType>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_numeralForm>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_number>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.PAR: graphTags.getAllOfType <pos_particleType>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.PREF: break; case pos_type.PREP: break; case pos_type.PRO: graphTags.getAllOfType <pos_pronounType>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.PUNCT: break; case pos_type.RES: graphTags.getAllOfType <pos_residualType>(false).ForEach(x => output.AddUnique(x)); break; case pos_type.TEMP: break; case pos_type.V: graphTags.getAllOfType <pos_verbform>(false).ForEach(x => output.AddUnique(x)); graphTags.getAllOfType <pos_verbType>(false).ForEach(x => output.AddUnique(x)); break; } return(output); }
/// <summary> /// Gets grammatic tag collections for specified form, optionally narrowing the search by <see cref="pos_type"/> and/or lemma of preference /// </summary> /// <param name="input">The input.</param> /// <param name="lemmaOfPreference">The lemma of preference.</param> /// <param name="posTypePreference">The position type preference.</param> /// <param name="limit">The limit - limit on number of entries to take</param> /// <param name="logger">The logger.</param> /// <returns></returns> public List <grammaticTagCollection> GetGramTagsFor(string input, string lemmaOfPreference = "", pos_type posTypePreference = pos_type.none, int limit = -1, ILogBuilder logger = null) { if (limit == -1) { limit = GENERAL_SEARCH_LIMIT; } List <grammaticTagCollection> output = new List <grammaticTagCollection>(); string posTypeString = grammTagConverter.GetStringFor(posTypePreference, true); string reg = GetSearchRegex(input, lemmaOfPreference, posTypeString); var result = resourceFileOperater.Search(reg, true, limit, RegexOptions.IgnoreCase); if (result.CountThreadSafe == 0) { if (logger != null) { logger.log("No entry found to fit regex [" + reg + "] using " + GetType().Name); } } foreach (string line in result.getLineContentList()) { string inflectForm = ""; string lemmaForm = ""; string gramTag = ""; SelectFromLine(line, out inflectForm, out lemmaForm, out gramTag); output.Add(grammTagConverter.ConvertFromString(gramTag)); } return(output); }
public void setup(string __regex, pos_type __type) { regexCriteria = __regex; regex = new Regex(__regex); type = __type; }
/// <summary> /// Builds <see cref="grammaticTagCollection"/> instance from string form /// </summary> /// <param name="tag">The string encoding that is to be interpreted into grammatic tag</param> /// <returns>Instance of grammaticTagCollection built from the string input</returns> /// <exception cref="ArgumentOutOfRangeException">Tag flag [" + s+ "] not resolved in [" +t.Name + "] - tag</exception> public grammaticTagCollection ConvertFromString(string tag) { grammaticTagCollection output = new grammaticTagCollection(); if (cachedTags.ContainsKey(tag)) { cachedTags[tag].ForEach(x => output.Add(x)); return(output); } if (tag.isNullOrEmpty()) { return(output); } string pos_type_str = tag[0].ToString(); object pt = posEnumVsString.GetOfTypeByValue(pos_type_str, typeof(pos_type)); List <Object> flags = new List <object>(); try { if (pt != null) { pos_type pos_t = (pos_type)pt; //.GetByValue(pos_type_str); flags.Add(pos_t); List <Type> typeList = posTypeVsPattern[pos_t]; for (int i = 1; i < tag.Length; i++) { string s = tag[i].toStringSafe(""); if (s != "-") { Type t = typeList[i - 1]; object f = posEnumVsString.GetOfTypeByValue(s, t); if (f == null) { List <Object> values = posEnumVsString.GetByValue(s); foreach (object vl in values) { if (!flags.Any(x => x.GetType() == vl.GetType())) { flags.Add(vl); break; } } } else { flags.Add(f); } //if (f == null) //{ // throw new ArgumentOutOfRangeException("Tag flag [" + s + "] not resolved in [" + t.Name + "]", nameof(tag)); //} } } flags.ForEach(x => output.Add(x)); cachedTags.TryAdd(tag, flags); } } catch (Exception ex) { String msg = "[" + ex.Message + "] ---> [" + ex.GetType().Name + "] "; output.comment = msg; aceLog.log(msg); } return(output); }