/// <summary> /// Creates the explore item. /// </summary> /// <param name="token">The token.</param> /// <returns></returns> public termExploreModel createExploreItem(string token) { //termExploreItem output = new termExploreItem(token); Match m = ruleSet.regex.Match(token); root = m.Groups[1].Value; stem = m.Groups[2].Value; sufix = m.Groups[3].Value; //output.gramSet = ruleSet.GetGramSet(this); morphRule rule = ruleSet.rules.First(); string lemmaForm = rule.GetForm(this); termExploreModel model = new termExploreModel(lemmaForm); model.gramSet = ruleSet.GetGramSet(rule.sufix); model.instances.AddRangeUnique(ruleSet.GetItems(this, lemmaForm)); model.lemma = new termExploreItem(lemmaForm); model.lemma.gramSet = model.gramSet; model.rootWord = root; return(model); }
/// <summary> /// Saves the term model. Prefix is clean filename prefix without spacing character /// </summary> /// <param name="termModel">The term model.</param> /// <param name="prefix">The prefix.</param> public void saveTermModel(termExploreModel termModel, string prefix = "") { string filepath = projectFolderStructure[lexiconConstructorProjectFolder.metadata].path + "\\" + prefix + termModel.filename(".xml"); FileInfo fi = filepath.getWritableFile(getWritableFileMode.overwrite); objectSerialization.saveObjectToXML(termModel, fi.FullName); }
protected override void stageExecute(ILogBuilder response) { foreach (string word in state.entryList) { // <------------------------------------------------------------------------------------------------ Exploration termExploreModel output = null; if (state.verbose) { output = termExploreProcedures.exploreWithUnitex(word, response); } else { output = termExploreProcedures.exploreWithUnitex(word, null); } if (output.wasExploreFailed) { if (state.debug) { response.consoleAltColorToggle(); response.AppendLine("--- running debug search for [" + word + "]"); var exp = languageManagerUnitex.manager.operatorDelaf.Search(word, false, 25); exp.ToString(response, true); string debugLines = exp.ToString(); string debugPath = semanticLexiconManager.manager.constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].pathFor(word + "_failDebug.txt"); debugLines.saveStringToFile(debugPath, getWritableFileMode.overwrite); response.consoleAltColorToggle(); } state.shadowBuffer.Add(word); state.failedBuffer.Add(word); } else { if (state.saveModel) { semanticLexiconManager.manager.constructor.saveTermModel(output); } state.shadowBuffer.Add(word); state.shadowBuffer.AddRangeUnique(output.GetShadow()); try { semanticLexiconManager.manager.constructor.addTermModelToLexicon(output); response.AppendLine("Lexicon update: Lemma [" + output.lemma.inputForm + "][" + output.instances.Count() + "]"); state.processedBuffer.Add(output.lemma.inputForm); } catch (Exception ex) { state.failedBuffer.Add(word); response.AppendLine("Lexicon term update failed for [" + word + "][" + output.lemmaForm + "]"); output.ToString(response, true); } } } }
/// <summary> /// Adds the term model to the lexicon /// </summary> /// <param name="termModel">The term model.</param> public void addTermModelToLexicon(termExploreModel termModel) { //var tlt = manager.lexiconContext.TermLemmas.Where<ITermLemma>(x => x.name.Equals(termModel.lemma.inputForm)&& x.type.Equals(termModel.lemma.gramSet.getPosType().toString())); ITermLemma tl = manager.lexiconContext.TermLemmas.Where(x => x.name.Equals(termModel.lemmaForm)).FirstOrDefault(); //IQueryable<ITermLemma> tlt = manager.lexiconContext.TermLemmas.Where(x => x.name.Equals(termModel.lemma.inputForm)); //List<ITermLemma> tlt_l = tlt.ToList(); //ITermLemma tl = null; if (tl != null) { // nista nije uradjeno } else { tl = manager.lexiconContext.TermLemmas.Create(); tl.name = termModel.lemmaForm.or(termModel.inputForm); tl.gramSet = termModel.getGramSet(); tl.type = termModel.getPosType().toStringSafe("TEMP"); //if (termModel tl.type = termModel.gramSet.getPosType().toString(); foreach (termExploreItem item in termModel.instances) { ITermInstance ti = manager.lexiconContext.TermInstances.Where(x => x.name.Equals(item.inputForm)).FirstOrDefault(); if (ti == null) { ti = manager.lexiconContext.TermInstances.Create(); ti.name = item.inputForm; // ti.gramSet = item.getGramSet(); //.gramSet.GetAll(); ti.type = item.getPosType().toStringSafe(); //.// .gramSet.getPosType().toString(); ti.lemma = tl; } else { if (manager.settings.doResolveWordsInDebugMode) { output.AppendLine("Item [" + item.inputForm + "] of lemma [" + tl.name + "] was already defined."); //foreach (var lm in ti.) // output.AppendLine("[" + item.inputForm + "]->lemma [" + tl.name + "] was already defined."); } } manager.lexiconContext.TermInstances.Add(ti); } } manager.lexiconContext.SaveChanges(); }
public wordnetSymsetResults query_srb(List <String> srb_tokens, ILogBuilder response, Boolean buildModel = true) { wordnetSymsetResults output = new wordnetSymsetResults(); getReady(); List <DataRow> matches = new List <DataRow>(); foreach (String tkn in srb_tokens) { matches.AddRangeUnique(table.Select(SRB_COLUMN_TOKEN + " LIKE '" + tkn + "'")); } foreach (DataRow dr in matches) { String eng = dr[SRB_COLUMN_TOKEN].toStringSafe(); String code = dr[SRB_COLUMN_CODE].toStringSafe(); output.Add(code, eng); } if (buildModel) { foreach (var pair in output) { termExploreModel md = null; String srb = pair.Key; if (!output.models.ContainsKey(srb)) { md = new termExploreModel(srb); output.models.Add(srb, md); } else { md = output.models[srb]; } String symc = pair.Value[0].ToString(); pos_type pt = posConverter.wordNetFirstNumToPosType.getValue(symc, pos_type.none); gramFlags gr = new gramFlags(); gr.Set(pt); md.gramSet.Add(gr); md.wordnetPrimarySymsets.AddUnique(pair.Value); } } return(output); }
/// <summary> /// Gets model from the Lexicon lemma entry -- loads from file, or if file do not exist - reconstructs it from the TermLema /// </summary> /// <param name="lemma">The lemma.</param> /// <returns></returns> public termExploreModel getTermModel(ITermLemma lemma, bool dontLoadFromFile = false) { termExploreModel output = new termExploreModel(); if (!dontLoadFromFile) { string filepath = projectFolderStructure[lexiconConstructorProjectFolder.metadata].pathFor(lemma.name + "_" + lemma.type.ToString() + ".xml"); if (File.Exists(filepath)) { output = objectSerialization.loadObjectFromXML <termExploreModel>(filepath); output.modelSource = termExploreModelSource.fromFile; return(output); } } output = new termExploreModel(lemma); return(output); }
public void addSynonymsAndConceptLinks(termExploreModel termModel, bool saveModel = false) { // <----------- ADDING SYNONYMS ----------- > var lemmas = manager.getLemma(termModel.lemmaForm); var lemmasyns = manager.getLemmas(termModel.synonyms); builderForLog linkLog = new builderForLog(); linkLog.open("Creating synonym-2-lemma links"); termModel.links_synonym = 0; foreach (ITermLemma lemma in lemmas) { foreach (ITermLemma lemsyn in lemmasyns) { bool added = false; if (!lemma.relatedTo.Contains(lemsyn)) { if (!lemma.relatedFrom.Contains(lemsyn)) { lemma.relatedTo.Add(lemsyn); added = true; } } if (added) { termModel.links_synonym++; linkLog.AppendLine("[" + termModel.links_synonym.ToString("D5") + "] " + lemma.name + " -> " + lemsyn.name); } else { linkLog.AppendLine("[Link exists] " + lemma.name + " -> " + lemsyn.name); } } } linkLog.close(); linkLog.open("Creating concept 2 lemma links"); // <----------- ADDING SYNSETS List <Concept> concepts = new List <Concept>(); Concept conHead = null; termModel.links_lemmaConcept = 0; foreach (string code in termModel.wordnetPrimarySymsets) { if (code.isCleanWord()) { aceLog.log("wrong symset code -- [" + code + "] -- ignored!"); continue; } Concept con = manager.getConcept(code, true, "WordNet Code"); bool added = false; foreach (TermLemma lemma in lemmas) { if (!con.lemmas.Contains(lemma)) { con.lemmas.Add(lemma); added = true; } if (added) { termModel.links_lemmaConcept++; linkLog.AppendLine("[" + termModel.links_lemmaConcept.ToString("D5") + "] " + con.name + " -> " + lemma.name); } else { linkLog.AppendLine("[Link exists] " + con.name + " -> " + lemma.name); } } concepts.Add(con); conHead = con; } linkLog.close(); linkLog.open("Creating concept 2 concept links"); // <--------------------------- linking SYNSET concepts termModel.links_conceptConcept = 0; foreach (Concept con in concepts) { foreach (Concept con2 in concepts) { bool added = false; if (!con2.relatedTo.Contains(con)) { if (!con2.relatedFrom.Contains(con)) { var sharedLemmas = con2.lemmas.Where(x => con.lemmas.Contains(x)); if (sharedLemmas.Count() > 0) { con2.relatedTo.Add(con); added = true; } } } if (added) { termModel.links_conceptConcept++; linkLog.AppendLine("[" + termModel.links_conceptConcept.ToString("D5") + "] " + con2.name + " -> " + con.name); } else { linkLog.AppendLine("[Link exists] " + con2.name + " -> " + con.name); } } } linkLog.close(); manager.lexiconContext.SaveChanges(); string pth = projectFolderStructure[lexiconConstructorProjectFolder.links].pathFor(termModel.filename(".txt")); linkLog.ToString().saveStringToFile(pth, getWritableFileMode.overwrite); if (saveModel) { saveTermModel(termModel); } }
public termExploreItem exploreEntities(String form, termExploreModel exploreModel) { tokenQuery tq = new tokenQuery(form, null, tokenQuerySourceEnum.imb_namedentities); tokenQueryResponse responseEntity = languageManagerDBNamedEntities.manager.exploreToken(tq); termExploreItem item = exploreModel.instances[form]; if (item == null) { item = new termExploreItem(form); } if (responseEntity.status == tokenQueryResultEnum.accept) { exploreModel.lemmaForm = item.inputForm; String gset = ""; Boolean __found = true; foreach (namedEntity ctf in responseEntity.flags) { Boolean keepSearching = false; __found = true; switch (ctf) { //case namedEntity.businessDomain: // gset = gset.add("N+Comp", "|"); ; // exploreModel.wordnetPrimarySymsets.Add("MC04-CON-BD"); // break; case namedEntity.countryName: gset = gset.add("N+Top", "|");; exploreModel.wordnetPrimarySymsets.Add("MC04-GEO-C"); break; case namedEntity.languageName: gset = gset.add("N+Lang", "|");; exploreModel.wordnetPrimarySymsets.Add("MC04-GEO-LN"); break; case namedEntity.personalName: gset = gset.add("N+First", "|");; exploreModel.wordnetPrimarySymsets.Add("MC04-HUM-PN"); break; case namedEntity.personalPosition: gset = gset.add("N+Hum", "|");; exploreModel.wordnetPrimarySymsets.Add("MC04-HUM-PP"); break; case namedEntity.presonalLastName: gset = gset.add("N+Last", "|");; exploreModel.wordnetPrimarySymsets.Add("MC04-HUM-LN"); break; case namedEntity.title: gset = gset.add("N", "|");; exploreModel.wordnetPrimarySymsets.Add("MC04-CON-AD-TITLE"); break; case namedEntity.townName: gset = gset.add("N+PGr1", "|"); exploreModel.wordnetPrimarySymsets.Add("MC04-CON-AD-TN"); break; case namedEntity.townZip: gset = gset.add("NUMnumerical+Top", "|");; exploreModel.wordnetPrimarySymsets.Add("MC04-CON-AD-TZ"); break; default: keepSearching = true; __found = false; break; } if (!keepSearching) { break; } } item.gramSet.SetAll(gset); if (__found) { exploreModel.instances.Add(item); if (exploreModel.lemma == null) { exploreModel.lemma = item; } exploreModel.synonyms.AddRange(responseEntity.dataTokens.getTokens()); exploreModel.flags = termExploreItemEnumFlag.namedEntity; } return(item); } return(item); }