/// <summary> /// Recomputes the specified table. /// </summary> /// <param name="table">The table.</param> /// <param name="logger">The logger.</param> /// <param name="forSingleWebSite">if set to <c>true</c> [for single web site].</param> /// <param name="lemmas">The lemmas.</param> /// <returns></returns> /// <exception cref="aceGeneralException">Permanent Add() lemma problem at [" + table.name + "] - Permanent Lemma TF-IDF Add(Lemma) failure</exception> public virtual webLemmaTermTable recompute(webLemmaTermTable table, ILogBuilder logger, Boolean forSingleWebSite, List <webLemmaTerm> lemmas) { kernelComputeWeightTask kernelTask = new kernelComputeWeightTask(lemmas, logger, forSingleWebSite, settings); ITermWeightKernel kernel = kernelManager.GetKernel(weightKernelName); kernel.compute(kernelTask); #region OLD_CODE /* * Double documentSetFrequencyMax = 0; * Double documentFrequencyMax = 0; * Double termFrequencyMax = 0; * * // List<webLemmaTerm> lemmas = tabl//e.GetList(); * * if (lemmas.Count == 0) * { * logger.log("ERROR: NO ENTRIES IN TF-TDF TABLE [" + table.name + "] - is for single web site [" + forSingleWebSite.ToString() + "]"); * } * * foreach (webLemmaTerm lemma in lemmas) * { * documentSetFrequencyMax = Math.Max(documentSetFrequencyMax, lemma.documentSetFrequency); * documentFrequencyMax = Math.Max(documentFrequencyMax, lemma.documentFrequency); * termFrequencyMax = Math.Max(termFrequencyMax, lemma.termFrequency); * } * * if (forSingleWebSite) { * if (settings.doAdjustIDFForCase) * { * documentFrequencyMax = (documentFrequencyMax * settings.documentFrequencyMaxFactor) + settings.documentFrequencyMaxCorrection; * } else * { * documentFrequencyMax = documentFrequencyMax + settings.documentFrequencyMaxCorrection; * } * } else * { * documentFrequencyMax = (documentFrequencyMax * settings.documentFrequencyMaxFactor) + settings.documentFrequencyMaxCorrection; * } * * /// COMPUTING NON NORMALIZED WEIGHTs * /// * Double weightMax = Double.MinValue; * * foreach (webLemmaTerm lemma in lemmas) * { * lemma.termFrequency = lemma.termFrequency.GetRatio(termFrequencyMax); * * if (settings.doUseIDF) * { * if (settings.doUseNaturalLog) * { * lemma.documentFactor = Math.Log(documentFrequencyMax.GetRatio(lemma.documentFrequency)); * } else * { * lemma.documentFactor = Math.Log10(documentFrequencyMax.GetRatio(lemma.documentFrequency)); * } * } else * { * lemma.documentFactor = 1; * } * * lemma.weight = lemma.termFrequency * lemma.documentFactor; * * if (settings.doUseDocumentSet) * { * if ((documentSetFrequencyMax != 1) || !forSingleWebSite) * { * if (lemma.documentSetFrequency == 0) * { * lemma.weight = 0; * } * else * { * Double docSetFactor = (1 - Math.Log10(documentSetFrequencyMax / lemma.documentSetFrequency)); * lemma.weight = lemma.weight * docSetFactor; * } * } * } * * weightMax = Math.Max(weightMax, lemma.weight); * } * * /// WEIGHT NORMALIZATION * foreach (webLemmaTerm lemma in lemmas) * { * lemma.weight = lemma.weight.GetRatio(weightMax); * } */ #endregion OLD_CODE /// SAVING THE RESULTS Int32 globalRetry = retry_global_limit; foreach (webLemmaTerm lemma in lemmas) { Int32 retry = retry_limit; while (retry > 0) { try { table.Add(lemma); retry = 0; } catch (Exception ex) { retry--; globalRetry--; if (doBeep) { logger.log("WFT [" + table.name + "] add lemma [" + lemma.name + "] retries left [" + retry + "] global[" + globalRetry + "]"); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 200, 1); } Thread.Sleep(250); if (globalRetry < 0) { throw new aceGeneralException("Permanent Add() lemma problem at [" + table.name + "]", ex, this, "Permanent Lemma TF-IDF Add(Lemma) failure"); } } } } logger.log("WFT [" + table.name + "] recomputed TFmax[" + kernelTask.weightMax + "] : DFmax[" + kernelTask.documentFrequencyMax + "] TC[" + lemmas.Count + "]"); return(table); }
public virtual webLemmaTermTable recompute(webLemmaTermTable table, ILogBuilder logger, Boolean forSingleWebSite, List <webLemmaTerm> lemmas) { Double documentSetFrequencyMax = 0; Double documentFrequencyMax = 0; Double termFrequencyMax = 0; // List<webLemmaTerm> lemmas = tabl//e.GetList(); if (lemmas.Count == 0) { logger.log("ERROR: NO ENTRIES IN TF-TDF TABLE [" + table.name + "] - is for single web site [" + forSingleWebSite.ToString() + "]"); } foreach (webLemmaTerm lemma in lemmas) { documentSetFrequencyMax = Math.Max(documentSetFrequencyMax, lemma.documentSetFrequency); documentFrequencyMax = Math.Max(documentFrequencyMax, lemma.documentFrequency); termFrequencyMax = Math.Max(termFrequencyMax, lemma.termFrequency); } if (forSingleWebSite) { if (settings.doAdjustIDFForCase) { documentFrequencyMax = (documentFrequencyMax * settings.documentFrequencyMaxFactor) + settings.documentFrequencyMaxCorrection; } else { documentFrequencyMax = documentFrequencyMax + settings.documentFrequencyMaxCorrection; } } else { documentFrequencyMax = (documentFrequencyMax * settings.documentFrequencyMaxFactor) + settings.documentFrequencyMaxCorrection; } Double weightMax = Double.MinValue; foreach (webLemmaTerm lemma in lemmas) { lemma.termFrequency = lemma.termFrequency.GetRatio(termFrequencyMax); if (settings.doUseIDF) { if (settings.doUseNaturalLog) { lemma.documentFactor = Math.Log(documentFrequencyMax.GetRatio(lemma.documentFrequency)); } else { lemma.documentFactor = Math.Log10(documentFrequencyMax.GetRatio(lemma.documentFrequency)); } } else { lemma.documentFactor = 1; } lemma.weight = lemma.termFrequency * lemma.documentFactor; if (settings.doUseDocumentSet) { if ((documentSetFrequencyMax != 1) || !forSingleWebSite) { if (lemma.documentSetFrequency == 0) { lemma.weight = 0; } else { Double docSetFactor = (1 - Math.Log10(documentSetFrequencyMax / lemma.documentSetFrequency)); lemma.weight = lemma.weight * docSetFactor; } } } weightMax = Math.Max(weightMax, lemma.weight); } foreach (webLemmaTerm lemma in lemmas) { lemma.weight = lemma.weight.GetRatio(weightMax); } Int32 globalRetry = retry_global_limit; foreach (webLemmaTerm lemma in lemmas) { Int32 retry = retry_limit; while (retry > 0) { try { table.Add(lemma); retry = 0; } catch (Exception ex) { retry--; globalRetry--; if (doBeep) { logger.log("WFT [" + table.name + "] add lemma [" + lemma.name + "] retries left [" + retry + "] global[" + globalRetry + "]"); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 200, 1); } Thread.Sleep(250); if (globalRetry < 0) { throw new aceGeneralException("Permanent Add() lemma problem at [" + table.name + "]", ex, this, "Permanent Lemma TF-IDF Add(Lemma) failure"); } } } } logger.log("WFT [" + table.name + "] recomputed TFmax[" + weightMax + "] : DFmax[" + documentFrequencyMax + "] TC[" + lemmas.Count + "]"); return(table); }