/// <summary> /// Describes the specified logger. /// </summary> /// <param name="logger">The logger.</param> public void Describe(ILogBuilder logger) { logger.AppendLine("Feature Weighting model"); if (DoUseLocalFunction) { logger.AppendLine("Local weight model"); logger.nextTabLevel(); LocalFunction.Describe(logger); logger.prevTabLevel(); } if (DoUseLocalFunction) { logger.AppendLine("Global weight model(s)"); } logger.nextTabLevel(); foreach (var lf in GlobalFactors) { lf.Describe(logger); } logger.prevTabLevel(); }
protected override void stageExecute(ILogBuilder response) { foreach (string word in state.entryList) { // <------------------------------------------------------------------------------------------------ Exploration termExploreModel output = null; if (state.verbose) { output = termExploreProcedures.exploreWithUnitex(word, response); } else { output = termExploreProcedures.exploreWithUnitex(word, null); } if (output.wasExploreFailed) { if (state.debug) { response.consoleAltColorToggle(); response.AppendLine("--- running debug search for [" + word + "]"); var exp = languageManagerUnitex.manager.operatorDelaf.Search(word, false, 25); exp.ToString(response, true); string debugLines = exp.ToString(); string debugPath = semanticLexiconManager.manager.constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].pathFor(word + "_failDebug.txt"); debugLines.saveStringToFile(debugPath, getWritableFileMode.overwrite); response.consoleAltColorToggle(); } state.shadowBuffer.Add(word); state.failedBuffer.Add(word); } else { if (state.saveModel) { semanticLexiconManager.manager.constructor.saveTermModel(output); } state.shadowBuffer.Add(word); state.shadowBuffer.AddRangeUnique(output.GetShadow()); try { semanticLexiconManager.manager.constructor.addTermModelToLexicon(output); response.AppendLine("Lexicon update: Lemma [" + output.lemma.inputForm + "][" + output.instances.Count() + "]"); state.processedBuffer.Add(output.lemma.inputForm); } catch (Exception ex) { state.failedBuffer.Add(word); response.AppendLine("Lexicon term update failed for [" + word + "][" + output.lemmaForm + "]"); output.ToString(response, true); } } } }
public static termExploreModelSet exploreWithApertiumAndWordnet(string word, ILogBuilder response) { //List<TermLemma> lemmas = semanticLexiconManager.manager.resolve(word); termExploreModelSet outset = semanticLexiconManager.manager.constructor.loadTermModels(word); response.AppendLine("term[" + word + "]->models[" + Enumerable.Count(outset) + "]"); if (Enumerable.Count(outset) == 0) { outset.missingLemmas.Add(word); response.AppendLine("term[" + word + "]->missingLemma[]"); return(outset); } int c = 0; foreach (termExploreModel model in outset) { var result = languageManagerApertium.manager.queryForSynonyms(model.lemma.inputForm, apertiumDictQueryScope.exact); var srb = result.GetNativeWords(); var eng = result.GetTranslatedWords(); model.translations.AddRange(eng); model.synonyms.AddRange(srb); response.AppendLine("term[" + word + "]->model[" + c.ToString() + "]->lemma[" + model.lemma.inputForm + "] --> Apertium.dic ==> srb[" + srb.Count() + "] eng[" + eng.Count() + "]"); // <-- wordnet wordnetSymsetResults wordnet_res = languageManagerWordnet.manager.query_eng(model.translations, response); model.wordnetSecondarySymsets.AddRange(wordnet_res.GetKeys()); wordnetSymsetResults wordnet_2nd = languageManagerWordnet.manager.query_eng_symset(model.wordnetSecondarySymsets, response); model.wordnetSynonyms.AddRange(wordnet_2nd.GetEnglish()); var synTrans = languageManagerApertium.manager.query(model.wordnetSynonyms, apertiumDictQueryScope.exact, apertiumDictNeedleSide.translated); model.wordnetSynonymSerbian.AddRange(synTrans.GetNativeWords()); response.AppendLine("WordNet(" + eng.Count() + ") ==> synsets[" + Enumerable.Count(model.wordnetSecondarySymsets) + "] synEng[" + Enumerable.Count(model.wordnetSynonyms) + "] ==> synSrb[" + Enumerable.Count(model.wordnetSynonymSerbian) + "]"); semanticLexiconManager.manager.constructor.saveTermModel(model); c++; } //termExploreModel output = semanticLexiconManager.manager.constructor.mode return(outset); }
public override void Describe(ILogBuilder logger) { if (IsEnabled) { logger.AppendLine("Document filter enabled"); logger.AppendPair("Ranking function", function.GetType().Name, true, "\t\t\t"); logger.AppendPair("Select top", limit, true, "\t\t\t"); } else { logger.AppendLine("Document filter disabled"); } }
/// <summary> /// Reduces the document set. /// </summary> /// <param name="docSet">The document set - web site.</param> /// <param name="settings">The settings.</param> /// <param name="logger">The logger.</param> /// <returns>Rate of reduction</returns> public Double ReduceDocumentSet(WebSiteDocuments docSet, HtmlDocumentReductionSettings settings, ILogBuilder logger) { Int32 input = 0; Int32 output = 0; foreach (WebSiteDocument document in docSet.documents) { input += document.HTMLSource.Length; String newHtml = ReduceDocument(document.HTMLSource, settings, logger); output += newHtml.Length; document.HTMLSource = newHtml; } Double reduction = output.GetRatio(input); if (settings.logSiteLevel) { logger.AppendLine("[" + docSet.domain + "] reduced to: " + reduction.ToString("P2")); } return(reduction); }
public void Describe(ILogBuilder logger) { if (logger != null) { logger.AppendPair("Stemmer", stemmer.GetType().Name, true, "\t\t\t"); logger.AppendPair("Tokenizer", tokenizer.GetType().Name, true, "\t\t\t"); if (transliteration != null) { logger.AppendPair("Transliteration", transliteration.lang_A_id + "-" + transliteration.lang_B_id, true, "\t\t\t"); } else { logger.AppendPair("Transliteration", "Disabled", true, "\t\t\t"); } filter.Describe(logger); weightModel.Describe(logger); if (constructorSettings != null) { constructorSettings.Describe(logger); } else { logger.AppendLine("Vector model constructor - not set"); } } }
public void Describe(ILogBuilder logger) { logger.AppendLine("Factor [" + GlobalFunction.shortName + "] W[" + weight.ToString("F2") + "]"); logger.nextTabLevel(); GlobalFunction.Describe(logger); logger.prevTabLevel(); }
public virtual void report(ILogBuilder output) { output.AppendLine("Node - click depth[" + clickDepth + "] - links[" + links.Count + "]"); reportLink(output); links.reportCollection(output); }
/// <summary> /// Describes the specified logger. /// </summary> /// <param name="logger">The logger.</param> public override void Describe(ILogBuilder logger) { if (instructions.Any()) { logger.AppendLine("Rendering instructions:"); Int32 c = 1; foreach (var inst in instructions) { logger.AppendLine("[" + c.ToString("D2") + "] " + inst.name + " Code=[" + inst.code + "] Flags=[" + inst.instructionFlags.ToString() + "]"); c++; } } else { logger.AppendLine("No rendering instructions"); } }
/// <summary> /// Logs a multiline description of the gramCaseSet /// </summary> /// <param name="log">The log.</param> public void ToString(ILogBuilder log, bool expanded = false) { //StringBuilder sb = new StringBuilder(); log.AppendLine("Grammatical sets"); int c = 0; foreach (KeyValuePair <string, gramFlags> gf in items) { log.AppendLine("[" + c + "] " + gf.Key); if (expanded) { log.AppendLine(gf.Value.ToString(!expanded)); log.AppendLine(""); } c++; } }
/// <summary> /// Describes the cross validation settings /// </summary> /// <param name="logger">The logger.</param> public void Describe(ILogBuilder logger) { logger.AppendLine("--- k-fold crossvalidation settings---"); logger.AppendPair("Number of folds [k]", K, true, "\t\t\t"); logger.AppendPair("Training folds [t]", TrainingFolds, true, "\t\t\t"); logger.AppendPair("Fold randomization", randomFolds.ToString(), true, "\t\t\t"); }
/// <summary> /// Logs a multiline description of the gramCaseSet /// </summary> /// <param name="log">The log.</param> public void ToString(ILogBuilder log, string itemName = "Instance", bool expanded = false) { //StringBuilder sb = new StringBuilder(); log.AppendLine(itemName + ": " + inputForm); log.consoleAltColorToggle(); gramSet.ToString(log, expanded); log.consoleAltColorToggle(); }
/// <summary> /// Creates message collections /// </summary> /// <param name="loger">The loger.</param> /// <param name="workspace">The workspace.</param> /// <param name="showMessage">if set to <c>true</c> [show message].</param> /// <returns></returns> public List <emailMessageCollection> createCollections(ILogBuilder loger, folderNode workspace, Boolean showMessage = true) { List <emailMessageCollection> output = new List <emailMessageCollection>(); stringTemplate subject = new stringTemplate(subjectTemplate); stringTemplate content = new stringTemplate(contentTemplate); stringTemplate address = new stringTemplate(addressTemplate); emailMessageCollection messageCollection = new emailMessageCollection(); messageCollection.name = "block01"; output.Add(messageCollection); foreach (DataRow row in dataSource.Rows) { emailMessage message = new emailMessage(); message.from = fromAddress; message.content = content.applyToContent(row); message.subject = subject.applyToContent(row); message.address = address.applyToContent(row); if (showMessage) { loger.AppendLine("Subject: " + message.subject); loger.AppendLine("Content: " + message.content); } messageCollection.Add(message); if (messageCollection.Count >= collectionSize) { messageCollection.Save(workspace.pathFor(messageCollection.name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Block of messages", true)); messageCollection = new emailMessageCollection(); messageCollection.name = "block" + output.Count.ToString("D2"); output.Add(messageCollection); loger.log("Message block created: " + messageCollection.name); } } messageCollection.Save(workspace.pathFor(messageCollection.name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Block of messages", true)); return(output); }
public void Describe(ILogBuilder logger) { if (IsEnabled) { logger.AppendLine("Feature Selection"); logger.AppendPair("Limit", limit, true, "\t\t\t"); logger.AppendLine("Ranking method for n-dimensional Feature Weights: " + nVectorValueSelectionOperation.ToString()); logger.AppendPair("Function", WeightModel.GetSignature(), true, "\t\t\t"); WeightModel.Describe(logger); } else { logger.AppendLine("Feature Selection method is disabled"); } }
/// <summary> /// To the string. /// </summary> /// <param name="loger">The loger.</param> /// <param name="expanded">if set to <c>true</c> [expanded].</param> public void ToString(ILogBuilder loger, bool expanded = false, bool showInstances = true) { if (loger == null) { return; } loger.AppendLine("Input term: " + inputForm); if (!rootWord.isNullOrEmpty()) { loger.log("Root: " + rootWord); } if (lemma != null) { lemma.ToString(loger, "Lemma"); } if (showInstances) { foreach (termExploreItem sug in instances) { sug.ToString(loger, "Instance", expanded); } } if (synonyms.Any()) { loger.AppendLine("Related terms: " + synonyms.Join(',')); } if (wordnetSynonyms.Any()) { loger.AppendLine("Not accepted: " + wordnetSynonyms.Join(',')); } if (translations.Any()) { loger.AppendLine("Translations: " + translations.Join(',')); } if (translationRelated.Any()) { loger.AppendLine("Not accepted: " + translationRelated.Join(',')); } // if (wordnetSynonymSerbian.Any()) loger.AppendLine("WordNet translated: " + wordnetSynonymSerbian.Join(',')); if (wordnetPrimarySymsets.Any()) { loger.AppendLine("Symsets: " + wordnetPrimarySymsets.Join(',')); } if (wordnetSecondarySymsets.Any()) { loger.AppendLine("Not accepted: " + wordnetSecondarySymsets.Join(',')); } }
public override void Describe(ILogBuilder logger) { constructor.Describe(logger); if (!ExportClassifierMemory.isNullOrEmpty()) { logger.AppendLine("Export memory to: " + ExportClassifierMemory); } if (classifierSettings != null) { } }
protected void DescribeDimensions(ILogBuilder logger, List <dimensionSpecification> dimensions, string heading) { if (dimensions.Any()) { logger.AppendLine(heading); logger.nextTabLevel(); for (int i = 0; i < labelDimensions.Count; i++) { dimensionSpecification ds = labelDimensions[i]; logger.AppendPair("[" + i.ToString("D2") + "]", ds.functionName, true, "\t\t\t"); } logger.prevTabLevel(); } }
/// <summary> /// Describes the unit via specified loger /// </summary> /// <param name="loger">The loger.</param> public void describe(ILogBuilder loger) { // loger.log("deliveryUnit describe() call started"); loger.AppendHeading("Delivery unit (" + GetType().Name + ")", 2); loger.AppendLine("Logical name: " + name); loger.open("items", "Delivery items", "List of all deliveryUnit items contained here"); foreach (IDeliveryUnitItem item in items) { //loger.AppendHeading(this.name + " (" + this.GetType().Name + ")", 3); loger.AppendLine(" > " + item.name + ":" + item.itemType.ToString()); loger.AppendLine(" > > Location: " + item.location.ToString()); loger.AppendLine(" > > Description: " + item.description); } loger.close(); loger.open("items", "Items by level", "Showing items triggered by scope level"); reportElementLevel lev = reportElementLevel.none; foreach (KeyValuePair <reportElementLevel, List <deliveryUnitItem> > pair in itemByLevel) { lev = pair.Key; foreach (deliveryUnitItem it in pair.Value) { loger.AppendLine(lev.ToString() + " --> " + it.name + " (" + it.GetType().Name + ")"); } } loger.close(); loger.open("items", "Output by level", "Showing items designated as output items and triggered by scope level"); foreach (KeyValuePair <reportElementLevel, List <deliveryUnitItem> > pair in outputByLevel) { lev = pair.Key; foreach (deliveryUnitItem it in pair.Value) { loger.AppendLine(lev.ToString() + " --> " + it.name + " (" + it.GetType().Name + ")"); } } loger.close(); // loger.log("deliveryUnit describe() call finished"); }
/// <summary> /// Generates a compiled version of TF-IDF table. <see cref="weightTableCompiled"/> /// </summary> /// <param name="loger">The loger - for diagnostics</param> /// <returns></returns> public weightTableCompiled GetCompiledTable(ILogBuilder loger = null) { weightTableCompiled output = new weightTableCompiled(name); int ti = 0; int ts = 10; int c = 0; int tc = Count(); int input_c = 0; int output_c = 0; double io_r = 0; updateMaxValues(); foreach (IWeightTableTerm t in terms.Values) { double tp = ti.GetRatio(tc); weightTableTermCompiled cterm = GetCompiledTerm(t); //output.Add(t, GetAFreq(t.nominalForm)) as weightTableTermCompiled; output.AddOrUpdate(cterm); if (c > 10) { c = 0; io_r = input_c.GetRatio(output_c); if (loger != null) { loger.AppendLine("TF-IDF [" + name + "] table compiled [" + tp.ToString("P2") + "]"); } } } output.updateMaxValues(); return(output); }
/// <summary> /// Method: word -- translation --- synset ---- other synsets --- collecting all words --- translation --- word /// </summary> /// <param name="model">The model.</param> /// <param name="response">The response.</param> /// <param name="disableCodePrefixFilter">if set to <c>true</c> [disable code prefix filter].</param> /// <param name="disableCodeBranchFilter">if set to <c>true</c> [disable code branch filter].</param> /// <returns></returns> public static termExploreModel getSynonymsWithWordnetViaApertium(termExploreModel model, ILogBuilder response, bool disableCodePrefixFilter = false, bool disableCodeBranchFilter = false) { tokenGraph result = model.graph; result = languageManagerApertium.manager.queryForGraph(model.lemma.inputForm, apertiumDictQueryScope.exact); model.translations.AddRange(result.getAllLeafs().getNames()); if (response != null) { response.consoleAltColorToggle(); string rst = result.ToStringTreeview(); response.Append(rst); response.consoleAltColorToggle(); } languageManagerWordnet.manager.queryWithGraph(result, response, WordnetSource.english, WordnetQueryType.getSymsetCodesByWord); if (response != null) { response.consoleAltColorToggle(); string st = result.ToStringTreeview(); response.Append(st); response.consoleAltColorToggle(); } model.wordnetSecondarySymsets.AddRange(result.getAllLeafs().getDeepest().getNames()); if (!disableCodePrefixFilter) { string codeStart = model.lemma.gramSet.getPosType().GetWordNetCodeStart().ToString(); Regex codeCriteria = new Regex("^" + codeStart + ""); var badCodes = result.getAllLeafs(codeCriteria, true); if (response != null) { response.AppendHorizontalLine(); response.AppendLine("Reducing to proper codes [" + codeStart + "]->filtered-out[" + badCodes.Count() + "]"); } badCodes.removeFromParent(); if (response != null) { response.consoleAltColorToggle(); string rst = result.ToStringTreeview(); response.Append(rst); response.consoleAltColorToggle(); } } model.wordnetPrimarySymsets.AddRange(result.getAllLeafs().getDeepest().getNames()); languageManagerWordnet.manager.queryWithGraph(result, response, WordnetSource.english, WordnetQueryType.getWordsBySymsetCode); model.translationRelated.AddRange(result.getAllLeafs().getDeepest().getNames()); if (response != null) { response.AppendHorizontalLine(); response.AppendLine("Getting English words by symsetcodes via WordNet"); response.consoleAltColorToggle(); string rst = result.ToStringTreeview(); response.Append(rst); response.consoleAltColorToggle(); } languageManagerApertium.manager.queryByGraphNode(result, apertiumDictQueryScope.exact, apertiumDictNeedleSide.translated); model.wordnetSynonyms.AddRange(result.getAllLeafs().getDeepest().getNames()); if (response != null) { response.AppendHorizontalLine(); response.AppendLine("Translating back to Serbian via Apertium"); response.consoleAltColorToggle(); string rst = result.ToStringTreeview(); response.Append(rst); response.consoleAltColorToggle(); } if (!disableCodeBranchFilter) // <------ removes the symset nodes that contain none of first-level translation words { var codeLevel = result.getAllChildren().getOnLevel(3); List <IObjectWithPathAndChildren> toTakeOut = new List <IObjectWithPathAndChildren>(); foreach (var clb in codeLevel) { foreach (var clb_c in clb) { bool takeOut = true; foreach (var clb_cc in clb_c) { if (clb_cc.name == model.lemma.inputForm) { takeOut = false; break; } } if (takeOut) { if (response != null) { response.AppendLine("-- take out: " + clb.path); } toTakeOut.Add(clb); break; } } } toTakeOut.removeFromParent(); int wps = Enumerable.Count(model.wordnetSecondarySymsets); int tr = Enumerable.Count(model.translationRelated); int ws = Enumerable.Count(model.wordnetSynonyms); if (response != null) { response.AppendLine("----- Branch-node filter ----"); response.AppendLine("Symsets: " + wps); response.AppendLine("Translations: " + tr); response.AppendLine("Terms: " + ws); response.consoleAltColorToggle(); string rst = result.ToStringTreeview(); response.Append(rst); response.consoleAltColorToggle(); } model.wordnetPrimarySymsets = result.getAllChildren().getOnLevel(3).getNames(true); model.translations = result.getAllChildren().getOnLevel(4).getNames(true); model.synonyms = result.getAllChildren().getOnLevel(5).getNames(true); wps = wps - Enumerable.Count(model.wordnetPrimarySymsets); tr = tr - Enumerable.Count(model.translations); ws = ws - Enumerable.Count(model.synonyms); if (response != null) { //response.AppendLine("----- Branch-node filter ----"); response.AppendLine("Reduction of Symsets: " + wps); response.AppendLine("Reduction of Translations: " + tr); response.AppendLine("Reduction of Terms: " + ws); response.consoleAltColorToggle(); string rst = result.ToStringTreeview(); response.Append(rst); response.consoleAltColorToggle(); } } /* * String rgex_pat = "^([\\w]*\\\\[\\w]*\\\\[\\w]*\\\\[\\w]*\\\\{0}$)"; * * Regex rgex = new Regex(String.Format(rgex_pat, model.lemma.inputForm)); * * var onlyWithLemma = result.getAllLeafs().getFilterOut(rgex); */ //languageManagerApertium.manager.queryByGraphNode(result, apertiumDictQueryScope.exact, apertiumDictNeedleSide.english); model.graph = result; return(model); }
/// <summary> /// Loads the lexic resource. /// </summary> /// <param name="output">The output.</param> /// <param name="resourceFilePath">The resource file path.</param> public void LoadLexicResource(ILogBuilder output, String resourceFilePath) { List <String> lines = new List <String>(); // <---------------------------------------------- [ if (isLoaded) { return; } String pt = ""; if (!localCache.isNullOrEmpty()) { pt = localCache; lines.AddRange(File.ReadLines(localCache)); } if (lines.Count < 100) { pt = resourceFilePath; lines = new List <string>(); lines.AddRange(File.ReadAllLines(resourceFilePath)); } Int32 i = 0; Int32 iCycle = lines.Count() / 20; Int32 l = lines.Count(); Int32 c = 0; Double p = 0; output.logStartPhase("Loading", "Loading the lexic resource - with mode: " + mode.ToString()); output.log("Start of loading lexic resource [" + pt + "]"); // Parallel.ForEach(lines, new ParallelOptions { MaxDegreeOfParallelism=1 }, (line) => Parallel.ForEach(lines, new ParallelOptions { MaxDegreeOfParallelism = 1 }, (line) => // Parallel.ForEach(lines, (line) => { string inflectForm = ""; string lemma = ""; string gramTag = ""; SelectFromLine(line, out inflectForm, out lemma, out gramTag); lexicInflection inflect = null; if (!inflectForm.isNullOrEmpty()) { if (!ContainsKey(inflectForm)) { inflect = new lexicInflection(line); inflect.lemmaForm = lemma; inflect.name = inflectForm; inflect.inflectedForm = inflectForm; inflect.lexicalDefinitionLine = line; if (spellAlternator.IsInitiated) { String altInflectedForm = spellAlternator.ConvertFromAtoB(inflectForm); spellAlternatives.GetOrAdd(altInflectedForm, inflectForm); } Add(inflectForm, inflect); } else { inflect = base[inflectForm]; } lexicGrammarCase gramCase = null; if (mode == textResourceIndexResolveMode.resolveOnLoad) { var gramTagColl = grammTagConverter.ConvertFromString(gramTag); gramCase = inflect.AddGrammarCase(gramTagColl); gramCase.lexicalDefinitionLine = gramTag; } else { gramCase = new lexicGrammarCase(); gramCase.lexicalDefinitionLine = gramTag; gramCase.name = "gc" + i.ToString(); inflect.Add(gramCase); } // <----------------- construction of Lemma centered dictionary lexicGraphSetWithLemma lxSet = null; if (!registratedLemmaIndex.ContainsKey(lemma)) { lock (LemmaIndexLock) { if (!registratedLemmaIndex.ContainsKey(lemma)) { lxSet = new lexicGraphSetWithLemma(); lxSet.lemmaForm = lemma; registratedLemmaIndex.TryAdd(lemma, lxSet); } } } lxSet = registratedLemmaIndex[lemma]; if (!lxSet.ContainsKey(inflectForm)) { lock (SetLock) { if (!lxSet.ContainsKey(inflectForm)) { lxSet.TryAdd(inflect.name, inflect); } } } Interlocked.Increment(ref c); Interlocked.Increment(ref i); if (c > iCycle) { lock (loadStatusLock) { if (c > iCycle) { c = 0; p = i.GetRatio(l); output.AppendLine("Done: _" + p.ToString("P2") + "_"); } } } } }); output.logEndPhase(); output.log("End of loading process"); isLoaded = true; }
/// <summary> /// Connects to the hooks /// </summary> /// <param name="response">The response.</param> /// <param name="simulation">if set to <c>true</c> [simulation].</param> public void connectToHooks(ILogBuilder response, Boolean simulation = true) { response.AppendLine("Connecting concepts by the hooks"); if (simulation) { response.AppendLine("Running in the simulation mode -- no changes will be saved. "); } foreach (domainConceptEntry parent in toConnect) { List <Concept> concepts = new List <Concept>(); List <TermLemma> lemmas = new List <TermLemma>(); foreach (String needle in parent.needles) { var reLem = semanticLexiconManager.manager.resolve(needle); List <IConcept> re_concepts = new List <IConcept>(); List <ITermLemma> re_synonyms = new List <ITermLemma>(); Boolean found = false; foreach (var re in reLem) { found = true; //re_concepts.AddRange(re.concepts); foreach (IConcept rec in re.concepts) { if (!re_concepts.Any(x => x.name == rec.name)) { re_concepts.Add(rec); } } foreach (TermLemma rel in re.relatedTo) { if (!re_synonyms.Any(x => x.name == rel.name)) { re_synonyms.Add(rel); } } foreach (TermLemma rel in re.relatedFrom) { if (!re_synonyms.Any(x => x.name == rel.name)) { re_synonyms.Add(rel); } } re_synonyms.Add(re); } if (!simulation) { if (re_concepts.Contains(parent.concept)) { response.log("[" + parent.concept.name + "] [" + needle + "] hook is already deployed "); continue; } } if (found) { String pname = parent.name; if (!simulation) { pname = parent.concept.name; } response.log("[" + pname + "] [" + needle + "] lemmas[" + re_synonyms.Count + "] concepts[" + re_concepts.Count + "] "); if (re_concepts.Any()) { response.log("Connecting [" + pname + "] --> concepts[" + re_concepts.Count + "] "); foreach (Concept c in re_concepts) { if (c != parent.concept) { response.log("--- [" + pname + "] --> concept[" + c.name + "] "); if (!simulation) { parent.concept.hypoConcepts.Add(c); } } } } else { response.log("Connecting [" + pname + "] --> lemmas[" + re_synonyms.Count() + "] "); foreach (var c in re_synonyms) { response.log("--- [" + pname + "] --> lemma[" + c.name + "] "); if (!simulation) { parent.concept.lemmas.Add(c); } } } } else { response.log("Hook [" + needle + "] failed as no lemma found"); } } } if (simulation) { } else { Boolean doSave = aceTerminalInput.askYesNo("Do you want to save changes to the triplestore?"); if (doSave) { semanticLexiconManager.manager.lexiconContext.SaveChanges(); } } }
/// <summary> /// Builds the conceptual mesh. /// </summary> /// <param name="response">The response.</param> public void buildConceptualMesh(ILogBuilder response, Boolean simulation = true) { List <graphWrapNode <domainConceptEntry> > output = new List <graphWrapNode <domainConceptEntry> >(); List <graphWrapNode <domainConceptEntry> > stack = new List <graphWrapNode <domainConceptEntry> >(); response.AppendLine("Building the conceptual mesh"); if (simulation) { response.AppendLine("Running in the simulation mode -- no changes will be saved. "); } stack.AddRange(this); while (stack.Any()) { var n_stack = new List <graphWrapNode <domainConceptEntry> >(); foreach (graphWrapNode <domainConceptEntry> parent in stack) { var pConcept = semanticLexiconManager.manager.getConcept(parent.path, !simulation, parent.item.description, false); parent.item.concept = pConcept; // foreach (graphWrapNode <domainConceptEntry> child in parent) { if (!child.isNullNode) { var cConcept = semanticLexiconManager.manager.getConcept(child.path, !simulation, child.item.description, false); child.item.concept = cConcept; if (!simulation) { pConcept.hypoConcepts.Add(cConcept); } response.AppendLine("[" + parent.name + "]->[" + child.name + "]"); } else { domainConceptEntry newit = new domainConceptEntry(child.name, "", ""); child.SetItem(newit); var cConcept = semanticLexiconManager.manager.getConcept(child.path, !simulation, child.item.description, false); child.item.concept = cConcept; if (!simulation) { pConcept.hypoConcepts.Add(cConcept); } response.AppendLine("[" + parent.name + "]->[" + child.name + "] is a null node"); } } if (!parent.isNullNode) { if (parent.item.needles.Any()) { toConnect.Add(parent); } } n_stack.AddRange(parent); } stack = n_stack; } response.AppendLine("Concepts with hooks to connect: " + toConnect.Count()); if (simulation) { } else { semanticLexiconManager.manager.lexiconContext.SaveChanges(); } }
protected void AppendStatus(ILogBuilder loger) { loger.AppendLine("Semantic Lexicon => Lemma[" + lemmaCount + "] Instance[" + instanceCount + "] Concept[" + conceptCount + "]"); loger.AppendLine("Semantic Lexicon Cache => FQueries[" + failedCount + "]:[" + failed.Count + "] encoding twins[" + twins.LoadCount + "]"); }
public bool discoverGram(termExploreItem item, ILogBuilder loger, bool debug = true) { //List<termExploreItem> inst = new List<termExploreItem>(); //exploreModel.instances.ForEach(x => inst.Add(x)); //inst.Add(exploreModel); // instanceCountCollection<pos_type> pct = new instanceCountCollection<pos_type>(); bool failed = false; //// <--------------- Trying to resolve alone //foreach (termExploreItem item in inst) //{ if (loger != null) { loger.AppendLine("Item:" + item.inputForm); } instanceCountCollection <object> res = termDiscoveryResolver.resolveQuery(item.inputForm); res.reCalculate(); if (res.Count > 0) { List <object> sorted = res.getSorted(); if (item.gramSet.getPosType() != pos_type.none) { sorted.RemoveAll(x => x is pos_type); } gramFlags gf = new gramFlags(); if (sorted.Any(x => x is pos_type)) { gf.Set((pos_type)sorted.First(x => x is pos_type)); } //pct.AddInstance(gf.type, 1); var tl = posConverter.posTypeVsPattern[gf.type]; sorted.RemoveAll(x => !tl.Contains(x.GetType())); if (loger != null) { loger.AppendLine("Votes:"); for (int i = 0; i < Math.Max(sorted.Count(), 20); i++) { loger.Append(sorted[i].ToString() + "; "); } } if (sorted.Any(x => x is pos_gender)) { gf.Set((pos_gender)sorted.First(x => x is pos_gender)); } if (sorted.Any(x => x is pos_gramaticalCase)) { gf.Set((pos_gramaticalCase)sorted.First(x => x is pos_gramaticalCase)); } if (sorted.Any(x => x is pos_verbform)) { gf.Set((pos_verbform)sorted.First(x => x is pos_verbform)); } if (sorted.Any(x => x is pos_number)) { gf.Set((pos_number)sorted.First(x => x is pos_number)); } if (sorted.Any(x => x is pos_degree)) { gf.Set((pos_degree)sorted.First(x => x is pos_degree)); } if (sorted.Any(x => x is pos_person)) { gf.Set((pos_person)sorted.First(x => x is pos_person)); } if (loger != null) { loger.AppendLine("Final gram:" + gf.ToString()); } item.gramSet.Add(gf); } else { if (item.inputForm.Length < 4) { return(false); } //item.flags = termExploreItemEnumFlag.none; failed = true; } return(failed); }
/// <summary> /// Explores definition on an unknown term /// </summary> /// <param name="term">The term.</param> /// <param name="loger">The loger.</param> /// <param name="shortExplore">if set to <c>true</c> [short explore].</param> /// <param name="debug">if set to <c>true</c> [debug].</param> /// <returns></returns> public List <termExploreModel> explore(string term, ILogBuilder loger, bool shortExplore = true, bool debug = true, termExploreModel exploreModel = null) { term = term.Trim(); List <termExploreModel> output = new List <termExploreModel>(); if (modelRegistry.ContainsKey(term)) { return(modelRegistry[term]); } if (missing.Contains(term)) { return(GetModels(term)); } if (term.isNumber()) { termExploreModel tmp = makeTempModel(term, pos_type.NUMnumerical); tmp.flags = termExploreItemEnumFlag.datapoint; if (loger != null) { loger.AppendLine("Term [" + term + "] detected to be number."); } AddModel(tmp); return(GetModels(term)); } // <----- drugi test exploreModel = termExploreProcedures.exploreWithHunspell(new termExploreItem(term), loger); List <string> suggests = new List <string>(); exploreModel.instances.ForEach(x => suggests.Add(x.inputForm)); //languageManagerDBNamedEntities.manager.exploreEntities(exploreModel.rootWord, exploreModel); suggests.Add(exploreModel.rootWord); // s apertiumDictionaryResult result = languageManagerApertium.manager.query(suggests, apertiumDictQueryScope.exact, apertiumDictNeedleSide.serbian); if (result.Any()) { List <termExploreItem> gramCheck = new List <termExploreItem>(); gramFlags gr = null; if (result.termVsGramFlags.ContainsKey(exploreModel.inputForm)) { exploreModel.gramSet.Add(new gramFlags(result.termVsGramFlags[exploreModel.inputForm])); if (exploreModel.lemma == null) { exploreModel.lemma = exploreModel.instances[exploreModel.inputForm]; } gramCheck.Add(exploreModel); if (debug) { if (loger != null) { loger.AppendLine("Apertium discovered model [" + exploreModel.inputForm + "]"); } } } else { //if (loger != null) loger.AppendLine("Apertium failed to discover [" + exploreModel.inputForm + "]"); } foreach (termExploreItem item in exploreModel.instances) { if (result.termVsGramFlags.ContainsKey(item.inputForm)) { item.gramSet.Add(new gramFlags(result.termVsGramFlags[item.inputForm])); gramCheck.Add(exploreModel); exploreModel.lemmaForm = item.inputForm; if (exploreModel.lemma == null) { exploreModel.lemma = item; } if (debug) { if (loger != null) { loger.AppendLine("Apertium discovered model [" + item.inputForm + "]"); } } } else { //if (loger != null) loger.AppendLine("Apertium failed to discover [" + item.inputForm + "]"); } } exploreModel.translations.AddRange(result.GetEnglish()); gramCheck.RemoveAll(x => posConverter.posTypeVsPattern[x.gramSet.getPosType()].Count() == 0); int disc = 0; foreach (var gram in gramCheck) { if (discoverGram(gram, loger, debug)) { disc++; } } if (loger != null) { loger.AppendLine("Gram [" + term + "] autodiscovered for [" + disc + "] / [" + gramCheck.Count() + "]"); } if (debug) { if (loger != null) { exploreModel.ToString(loger, true, false); manager.constructor.saveTermModel(exploreModel, "Apertium_"); } } AddModel(exploreModel); exploreModel.flags = termExploreItemEnumFlag.aper; if (shortExplore) { return(GetModels(term)); } } else { if (loger != null) { loger.AppendLine("Apertium failed to discover any information on [" + term + "]"); } } if (loger != null) { loger.consoleAltColorToggle(); } // <------------------ APERTIUM ^^ foreach (string s in suggests) { languageManagerDBNamedEntities.manager.exploreEntities(s, exploreModel); } if (exploreModel.flags == termExploreItemEnumFlag.namedEntity) { AddModel(exploreModel); if (debug) { if (loger != null) { exploreModel.ToString(loger, true, false); manager.constructor.saveTermModel(exploreModel, "NamedEntity_"); loger.AppendLine("Named entities discovered model [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString()); } } if (shortExplore) { return(GetModels(term)); } } else { if (loger != null) { if (debug) { loger.AppendLine("Named entities found nothing for [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString()); } } } if (loger != null) { loger.consoleAltColorToggle(); } // <------------------ NAMED ENTITY ^^ // <----------------- Wordnet wordnetSymsetResults resSrWordnet = languageManagerWordnet.manager.query_srb(suggests, loger); bool found = false; if (resSrWordnet.Any()) { foreach (termExploreItem item in exploreModel.instances) { if (resSrWordnet.GetByKey(item.inputForm).Any()) { exploreModel.lemma = item; exploreModel.lemmaForm = item.inputForm; exploreModel.translations.AddRange(resSrWordnet.GetValues()); exploreModel.synonyms.AddRange(resSrWordnet.GetKeys()); exploreModel.flags = termExploreItemEnumFlag.srWNet; found = true; item.gramSet.Add(new gramFlags(new Enum[] { resSrWordnet.models[item.inputForm].gramSet.getPosType() })); } } foreach (termExploreItem item in exploreModel.instances) { discoverGram(item, loger, debug); } } if (found) { if (loger != null) { loger.AppendLine("SerbianWordNet discovered model [" + term + "]:" + exploreModel.gramSet.ToString()); } if (debug) { if (loger != null) { exploreModel.ToString(loger, true, false); manager.constructor.saveTermModel(exploreModel, "SrWordNet_");; } } AddModel(exploreModel); exploreModel.flags = termExploreItemEnumFlag.srWNet; if (shortExplore) { return(GetModels(term)); } } else { if (loger != null) { if (debug) { loger.AppendLine("Serbian wordnet found nothing for [" + term + "]"); } } } // <------------------ SERBIAN WORD NET ^^ bool failed = discoverGram(exploreModel, loger, debug); exploreModel.instances.ForEach(x => discoverGram(x, loger, debug)); int d = 0; List <termExploreItem> lastCheck = new List <termExploreItem>(); foreach (var gram in lastCheck) { if (discoverGram(gram, loger, debug)) { d++; } } if (debug) { if (loger != null) { loger.AppendLine("The last check [" + term + "] autodiscovered for [" + d + "] / [" + lastCheck.Count() + "]"); } } if (d == 0) { failed = true; } if (loger != null) { loger.consoleAltColorToggle(); } // <------------------ LAST CHECK ^^ if (!failed) { exploreModel.flags = termExploreItemEnumFlag.termExplorer; AddModel(exploreModel); return(GetModels(term)); } else { if (debug) { if (loger != null) { loger.AppendLine("Exploration failed for [" + term + "] -- creating temporary term model"); } } output.Add(makeTempModel(term, pos_type.TEMP)); missing.Add(term); return(output); } }
public List <ILexiconItem> exploreAndSave(string term, ILogBuilder loger, bool shortExplore = true, bool debug = true, params termExploreItemEnumFlag[] flagsToSave) { List <ILexiconItem> output = new List <ILexiconItem>(); if (lexicalItemRegistry.ContainsKey(term)) { return(lexicalItemRegistry[term]); } if (!manager.settings.doAutoexplore) { output.Add(makeTempLemma(term)); if (loger != null) { loger.AppendLine("Autoexplore off [" + term + "] is temporarly created."); } return(output); } var res = failedList.Search(term, false, 1); if (res.getLineContentList().Contains(term)) { output.Add(makeTempLemma(term)); if (loger != null) { loger.AppendLine("Term [" + term + "] is on black list - making temp term."); } return(output); } List <termExploreModel> models = explore(term, loger, shortExplore, debug); if (flagsToSave == null) { flagsToSave = new termExploreItemEnumFlag[] { termExploreItemEnumFlag.aper, termExploreItemEnumFlag.namedEntity, termExploreItemEnumFlag.srWNet } } ; foreach (termExploreModel model in models) { if (debug) { manager.constructor.saveTermModel(model); } if (flagsToSave.Contains(model.flags)) { manager.constructor.output.AppendLine("New term for Lexicon: " + model.inputForm); manager.constructor.addTermModelToLexicon(model); if (manager.settings.doAutoMakeSynonymRelationship) { manager.constructor.addSynonymsAndConceptLinks(model, true); } output.AddRange(manager.getLexiconItems(model.inputForm, loger, false)); } else { output.Add(modelToLemma(model)); } } if (!output.Any()) { var md = new TempLemma(term); output.Add(makeTempLemma(term)); md.type = pos_type.TEMP.ToString(); failedList.Append(new string[] { md.name }); if (loger != null) { loger.AppendLine("Term [" + term + "] not found. Using single-instance spark."); } } lexicalItemRegistry.Add(term, output); return(output); }
/// <summary> /// Describes the sample group to <c>output</c> /// </summary> /// <param name="output">The output object</param> public void describe(ILogBuilder output = null) { if (output == null) { return; } //output.log(); int tl = output.tabLevel; output.rootTabLevel(); // output.AppendHeading("SampleGroup description", 1); output.open("desc", name, ""); // output.AppendHeading(name, 2); // output.AppendHorizontalLine(); int ci = 1; foreach (sampleGroupItem item in this) { item.parent = this; output.open("group", item.groupTitle, item.groupDescription); //output.AppendHeading(, 3); // output.AppendPair("Description", ); output.AppendPair("ID", ci); output.AppendPair("Tag", item.groupTag); if (item.groupSizeLimit == -1) { output.AppendPair("Size (ratio)", item.weight + " / " + totalWeight.ToString()); } else { output.AppendPair("Size (limit)", item.groupSizeLimit); } output.AppendPair("Count", item.count); output.AppendPair("Border", item.groupBorder); if (item.isClosed) { output.AppendLine("The group is closed for new members"); } else { output.AppendLine("The group may receive new members"); } ci++; output.close(); } output.AppendHorizontalLine(); output.open("info", "Info", ""); output.AppendPair("Counted collection", countedCollectionName); output.AppendPair("Items with group tag/s", countHasGroup); output.AppendPair("Items without group tag", countNoGroup); output.close(); output.log("-- end"); output.close(); output.tabLevel = tl; }
/// <summary> /// Builds a term model out from Word input /// </summary> /// <param name="word">The word.</param> /// <param name="response">The response.</param> /// <returns></returns> public static termExploreModel exploreWithUnitex(string word, ILogBuilder response, bool wordIsLemma = false) { termExploreModel output = new termExploreModel(); output.modelSource = termExploreModelSource.fromToken; output.inputForm = word; string lemma = word; var tls = semanticLexiconManager.manager.resolve(word); if (tls != null) { if (Enumerable.Count(tls) > 0) { if (response != null) { response.AppendLine("#1 Lemma already defined in the triplestore [" + word + "] "); } output = semanticLexiconManager.manager.constructor.getTermModel(Enumerable.First(tls)); return(output); } } if (!wordIsLemma) { if (response != null) { response.AppendLine("#1 Finding Lemma for [" + word + "] "); } string query = string.Format(posConverter.REGEX_UNITEX_InstanceToLemmaFormat, word); fileTextSearchResult reslt = languageManagerUnitex.manager.operatorDelaf.Search(query, true, 1, RegexOptions.IgnoreCase); if (response != null) { reslt.ToString(response, true); } Regex instanceToLemmaReg = new Regex(query); if (reslt.Count() > 0) { var lnp = reslt.First(); Match mch = instanceToLemmaReg.Match(lnp.Value); lemma = mch.Groups[1].Value; } } else { if (response != null) { response.AppendLine("#1 The word is trusted to be a lemma [" + word + "] - skipping search"); } } // <------------------------------------------------------------------- preparing chache --------------- var cache = languageManagerUnitex.manager.operatorDelaf.Search(lemma, false, 300); if (response != null) { response.AppendLine("Cached definitions [" + cache.Count() + "] "); } // <------------------------------------------------------------ 2. finding lemma definition output.lemmaForm = lemma; output.lemma = new termExploreItem(lemma); if (response != null) { response.AppendLine("#2 Finding Lemma definition [" + lemma + "] "); } string lemmaQuery = string.Format(posConverter.REGEX_UNITEX_DeclarationForLemma, lemma); Regex lemmaQueryRegex = new Regex(lemmaQuery); fileTextSearchResult lemmaResult = languageManagerUnitex.manager.operatorDelaf.Search(cache, lemmaQuery, true, 5, RegexOptions.IgnoreCase); if (response != null) { lemmaResult.ToString(response, true); } if (lemmaResult.Count() == 0) { if (response != null) { response.consoleAltColorToggle(); response.AppendLine("Failed to find lemma definition for [" + word + "]. Aborting exploration."); response.consoleAltColorToggle(); } output.wasExploreFailed = true; return(output); } foreach (var lr_lnp in lemmaResult) { Match lmch = lemmaQueryRegex.Match(lr_lnp.Value); if (lmch.Success) { output.lemma.gramSet.Add(lmch.Groups[1].Value); } } if (response != null) { output.lemma.ToString(response); } // <------------------------------------------------------------ 3. getting all instances for the lemma if (response != null) { response.AppendLine("#3 Extracting all instances for the Lemma [" + lemma + "] "); } string instanceQuery = string.Format(posConverter.REGEX_UNITEX_LemmaToInstanceFormat, lemma); string instanceUnitexQuery = "," + lemma + "."; Regex instanceQueryRegex = new Regex(instanceQuery); fileTextSearchResult instanceResult = languageManagerUnitex.manager.operatorDelaf.Search(cache, instanceUnitexQuery, false, 100, RegexOptions.IgnoreCase); if (response != null) { instanceResult.ToString(response, true); } foreach (var lr_lnp in instanceResult) { Match lmch = instanceQueryRegex.Match(lr_lnp.Value); output.instances.Add(lmch.Groups[1].Value, lmch.Groups[2].Value); } // <------------------------------------------------------------ 4. Resulting term model if (response != null) { response.AppendLine("#4 Resulting term model [" + lemma + "] "); output.ToString(response); } return(output); }
public static termExploreModel explore(string word, ILogBuilder response, termExploreMode mode, bool verbose = false) { termExploreModel model = new termExploreModel(word); termExploreModelSet outset = semanticLexiconManager.manager.constructor.loadTermModels(word, true); if (response != null) { response.consoleAltColorToggle(); response.AppendHorizontalLine(); response.AppendLine("Exploring term[" + model.inputForm + "] with [" + mode.ToString() + "]"); response.consoleAltColorToggle(); } if (Enumerable.Any(outset)) { model = Enumerable.First(outset); if (response != null) { response.AppendLine("term[" + model.inputForm + "]->lemma[" + model.lemma.inputForm + "]"); } } else { model.lemmaForm = ""; if (response != null) { response.AppendLine("term[" + word + "]->missingLemma[]"); } } var output = response; if (!verbose) { response = null; } switch (mode) { case termExploreMode.apertium_direct: model = getSynonymsWithApertium(model, response); break; case termExploreMode.apertium_wordnet_eng: model = getSynonymsWithWordnetViaApertium(model, response); break; case termExploreMode.apertium_wordnet_srb: model = getSynonymsWithSerbianWordNetAndApertium(model, response); break; case termExploreMode.corpus: model = getSynonymsByCorpus(model, response); break; case termExploreMode.hunspell_srb: model = getSynonymsWithHunspell(model, response); break; case termExploreMode.none: break; case termExploreMode.wordnet_srb: model = getSynonymsWithSerbianWordNet(model, response); break; case termExploreMode.unitex: model = exploreWithUnitex(word, response); break; } model.PostProcess(); if (output != null) { model.ToString(output, verbose, false); } return(model); }