public void ToString(builderForLog loger) { foreach (plugInGroupEnum gr in Keys) { if (this[gr].Count > 0) { loger.AppendLine("--- " + gr.ToString() + " [" + this[gr].Count + "]"); foreach (IPlugInCommonBase pl in this[gr]) { loger.AppendLine(pl.name); } } } }
/// <summary> /// Deprecated /// </summary> /// <param name="plugin_className">Name of the plugin class.</param> /// <param name="loger">The loger.</param> /// <param name="collection">The collection.</param> /// <returns></returns> public static plugIn_base GetPluginInstance(string plugin_className, builderForLog loger, IAPlugInCollectionBase collection) { plugIn_base plug = null; if (imbWEMManager.settings.supportEngine.plugins.Keys.Contains(plugin_className)) { plug = imbWEMManager.settings.supportEngine.plugins[plugin_className].getInstance() as plugIn_base; if (plug is indexPlugIn_base) { indexPlugIn_base plug_indexPlugIn_base = plug as indexPlugIn_base; loger.log("Plugin instance [" + plug.name + "] for Index Engine created"); } else if (plug is enginePlugIn_base) { loger.log("Plugin instance [" + plug.name + "] for Crawl Job Engine created"); //imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase); } else if (plug is crawlerPlugIn_base) { loger.log("Plugin instance [" + plug.name + "] for Crawler created"); //imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase); } else if (plug is reportPlugIn_base) { loger.log("Plugin instance [" + plug.name + "] for Reporting created"); // imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase); } else { loger.log("Plugin instance [" + plug.name + "] of unknown category created... "); } if (collection != null) { collection.installPlugIn(plug); } } else { loger.AppendLine("Plugin [" + plugin_className + "] not found... "); } return(plug); }
public void addSynonymsAndConceptLinks(termExploreModel termModel, bool saveModel = false) { // <----------- ADDING SYNONYMS ----------- > var lemmas = manager.getLemma(termModel.lemmaForm); var lemmasyns = manager.getLemmas(termModel.synonyms); builderForLog linkLog = new builderForLog(); linkLog.open("Creating synonym-2-lemma links"); termModel.links_synonym = 0; foreach (ITermLemma lemma in lemmas) { foreach (ITermLemma lemsyn in lemmasyns) { bool added = false; if (!lemma.relatedTo.Contains(lemsyn)) { if (!lemma.relatedFrom.Contains(lemsyn)) { lemma.relatedTo.Add(lemsyn); added = true; } } if (added) { termModel.links_synonym++; linkLog.AppendLine("[" + termModel.links_synonym.ToString("D5") + "] " + lemma.name + " -> " + lemsyn.name); } else { linkLog.AppendLine("[Link exists] " + lemma.name + " -> " + lemsyn.name); } } } linkLog.close(); linkLog.open("Creating concept 2 lemma links"); // <----------- ADDING SYNSETS List <Concept> concepts = new List <Concept>(); Concept conHead = null; termModel.links_lemmaConcept = 0; foreach (string code in termModel.wordnetPrimarySymsets) { if (code.isCleanWord()) { aceLog.log("wrong symset code -- [" + code + "] -- ignored!"); continue; } Concept con = manager.getConcept(code, true, "WordNet Code"); bool added = false; foreach (TermLemma lemma in lemmas) { if (!con.lemmas.Contains(lemma)) { con.lemmas.Add(lemma); added = true; } if (added) { termModel.links_lemmaConcept++; linkLog.AppendLine("[" + termModel.links_lemmaConcept.ToString("D5") + "] " + con.name + " -> " + lemma.name); } else { linkLog.AppendLine("[Link exists] " + con.name + " -> " + lemma.name); } } concepts.Add(con); conHead = con; } linkLog.close(); linkLog.open("Creating concept 2 concept links"); // <--------------------------- linking SYNSET concepts termModel.links_conceptConcept = 0; foreach (Concept con in concepts) { foreach (Concept con2 in concepts) { bool added = false; if (!con2.relatedTo.Contains(con)) { if (!con2.relatedFrom.Contains(con)) { var sharedLemmas = con2.lemmas.Where(x => con.lemmas.Contains(x)); if (sharedLemmas.Count() > 0) { con2.relatedTo.Add(con); added = true; } } } if (added) { termModel.links_conceptConcept++; linkLog.AppendLine("[" + termModel.links_conceptConcept.ToString("D5") + "] " + con2.name + " -> " + con.name); } else { linkLog.AppendLine("[Link exists] " + con2.name + " -> " + con.name); } } } linkLog.close(); manager.lexiconContext.SaveChanges(); string pth = projectFolderStructure[lexiconConstructorProjectFolder.links].pathFor(termModel.filename(".txt")); linkLog.ToString().saveStringToFile(pth, getWritableFileMode.overwrite); if (saveModel) { saveTermModel(termModel); } }
/// <summary> /// Performs full domain reevaluation /// </summary> /// <param name="settings">The settings.</param> /// <param name="loger">The loger.</param> /// <param name="__wRecord">The w record.</param> /// <param name="evaluator">The evaluator.</param> public void doDomainEvaluation(IndexEngineConfiguration settings, builderForLog loger, modelSpiderSiteRecord __wRecord, multiLanguageEvaluator evaluator, weightTableCompiled mTFIDF) { indexDomain idomain = null; //lock (updateIndexLockD) //{ idomain = imbWEMManager.index.domainIndexTable.GetDomain(__wRecord.domainInfo.domainName); // } idomain.url = __wRecord.domain; //if (mTFIDF == null) mTFIDF = GetTFIDF_Master(loger, true, true); double dIP = 0; int p = 0; List <string> dTerms = new List <string>(); List <string> dDistinctTerms = new List <string>(); List <string> dLemmas = new List <string>(); List <string> dWords = new List <string>(); List <string> urls = new List <string>(); bool doEvalD = true; foreach (spiderTarget target in __wRecord.context.targets.GetLoaded()) { indexPage ipage = null; // lock (updateIndexLock) // { ipage = imbWEMManager.index.deployTarget(target, __wRecord, idomain); // } bool doEval = true; int dLc = 0; if (settings.plugIn_indexDBUpdater_optimizedMode) { if ((ipage.InfoPrize > 0) && (ipage.Lemmas > 0) && (ipage.relevancyText == nameof(indexPageRelevancyEnum.isRelevant))) { doEval = false; if (ipage.AllWords.isNullOrEmpty()) { doEval = true; } if (ipage.AllLemmas.isNullOrEmpty()) { doEval = true; } } } if (doEval) { List <string> terms = new List <string>(); if (ipage.AllWords.isNullOrEmpty()) { terms = GetTermsForPage(target, idomain, ipage, evaluator, loger); } else { terms = ipage.AllWords.SplitSmart(",", "", true); } ipage.AllWords = terms.toCsvInLine(); double IP = 0; List <string> lemmas = new List <string>(); List <IWeightTableTerm> mchl = mTFIDF.GetMatches(terms); if (ipage.AllLemmas.isNullOrEmpty()) { // terms = GetTermsForPage(target, idomain, ipage, evaluator, loger); lemmas.AddRange(mchl.Select(x => x.nominalForm)); } else { lemmas = ipage.AllLemmas.SplitSmart(",", "", true); } foreach (weightTableTermCompiled cterm in mchl) { IP += cterm.tf_idf; //dTerms.AddUnique(cterm.nominalForm); if (cterm.df == 1) { dDistinctTerms.AddUnique(cterm.nominalForm); } } ipage.InfoPrize = IP; dIP += IP; ipage.Lemmas = lemmas.Count; ipage.AllLemmas = lemmas.toCsvInLine(); dWords.AddRange(terms); dLemmas.AddRange(lemmas); ipage.Note = "indexUpdate" + SessionID; // lock (updateIndexLockB) // { imbWEMManager.index.pageIndexTable.AddOrUpdate(ipage); // } // if (loger!=null) loger.AppendLine(String.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", idomain.domain, ipage.url.TrimToMaxLength(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5"))); } else { dIP += ipage.InfoPrize; doEvalD = false; // if (loger != null) loger.AppendLine(String.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", " ^---- using existing ", ipage.url.TrimToMaxLength(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5"))); } urls.Add(ipage.url); p++; loger.AppendLine(string.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", idomain.domain, ipage.url.toWidthMaximum(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5"))); target.Dispose(); } if (imbWEMManager.settings.indexEngine.plugIn_indexDBUpdater_updateDomainEntry) { if (!doEvalD) { var dlc_tf = imbWEMManager.index.experimentEntry.GetTFIDF_DLC(idomain); int dlc_c = dlc_tf.Count; idomain.TFIDFcompiled = (dlc_c > 0); idomain.Lemmas = dlc_c; } else { idomain.Lemmas = dLemmas.Count; idomain.Words = dWords.Count; idomain.TFIDFcompiled = (dLemmas.Count > 0); idomain.DistinctLemmas = dDistinctTerms.toCsvInLine(); idomain.AllLemmas = dLemmas.toCsvInLine(); idomain.AllWords = dWords.toCsvInLine(); } idomain.InfoPrize = dIP; //if (doEvalD) var urlAssert = imbWEMManager.index.pageIndexTable.GetUrlAssertion(urls); idomain.relevantPages = urlAssert[indexPageEvaluationEntryState.isRelevant].Count; idomain.notRelevantPages = urlAssert[indexPageEvaluationEntryState.notRelevant].Count; idomain.detected = urlAssert[indexPageEvaluationEntryState.haveNoEvaluationEntry].Count; idomain.Crawled = urlAssert.certainty; idomain.RelevantContentRatio = urlAssert.relevant; string rpp = string.Format("[{0,25}] Pages [{1,10}] IP[{2,10}] LM[{3,10}]", idomain.domain, p, idomain.InfoPrize.ToString("F5"), idomain.Lemmas.ToString("D7")); if (loger != null) { loger.AppendLine(rpp); } } if (imbWEMManager.settings.indexEngine.plugIn_indexDBUpdater_updateDomainEntry) { imbWEMManager.index.domainIndexTable.AddOrUpdate(idomain); } imbWEMManager.index.wRecordsDeployed++; __wRecord.Dispose(); }