Ejemplo n.º 1
0
        /// <summary>
        /// Gets kernel instance for specified kernel name. If kernel not recognized it returns default: <see cref="kernelTFcIDF"/>
        /// </summary>
        /// <param name="kernelName">Name of the kernel.</param>
        /// <returns></returns>
        public static ITermWeightKernel GetKernel(String kernelName)
        {
            if (registry.ContainsKey(kernelName))
            {
                ITermWeightKernel output = registry[kernelName].getInstance() as ITermWeightKernel;
                return(output);
            }

            switch (kernelName)
            {
            default:
                return(new kernelTFcIDF());

                break;
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Registers the specified kernel type
        /// </summary>
        /// <param name="kernelType">Type of the kernel.</param>
        /// <exception cref="System.ArgumentException">
        /// Kernel Type must have parameterless constructor! - kernelType
        /// or
        /// Kernel Type must implement ITermWeightKernel interface! - kernelType
        /// </exception>
        public static void RegisterKernel(Type kernelType)
        {
            if (!kernelType.hasParameterlessConstructor())
            {
                throw new ArgumentException("Kernel Type must have parameterless constructor!", nameof(kernelType));
            }

            ITermWeightKernel output = kernelType.getInstance() as ITermWeightKernel;

            if (output == null)
            {
                throw new ArgumentException("Kernel Type must implement ITermWeightKernel interface!", nameof(kernelType));
            }

            if (!registry.ContainsKey(output.kernelName))
            {
                registry.Add(output.kernelName, kernelType);
            }
        }
        /// <summary>
        /// Recomputes the specified table.
        /// </summary>
        /// <param name="table">The table.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="forSingleWebSite">if set to <c>true</c> [for single web site].</param>
        /// <param name="lemmas">The lemmas.</param>
        /// <returns></returns>
        /// <exception cref="aceGeneralException">Permanent Add() lemma problem at [" + table.name + "] - Permanent Lemma TF-IDF Add(Lemma) failure</exception>
        public virtual webLemmaTermTable recompute(webLemmaTermTable table, ILogBuilder logger, Boolean forSingleWebSite, List <webLemmaTerm> lemmas)
        {
            kernelComputeWeightTask kernelTask = new kernelComputeWeightTask(lemmas, logger, forSingleWebSite, settings);

            ITermWeightKernel kernel = kernelManager.GetKernel(weightKernelName);

            kernel.compute(kernelTask);

            #region OLD_CODE

            /*
             * Double documentSetFrequencyMax = 0;
             * Double documentFrequencyMax = 0;
             * Double termFrequencyMax = 0;
             *
             * // List<webLemmaTerm> lemmas = tabl//e.GetList();
             *
             * if (lemmas.Count == 0)
             * {
             *  logger.log("ERROR: NO ENTRIES IN TF-TDF TABLE [" + table.name + "] - is for single web site [" + forSingleWebSite.ToString() + "]");
             * }
             *
             * foreach (webLemmaTerm lemma in lemmas)
             * {
             *  documentSetFrequencyMax = Math.Max(documentSetFrequencyMax, lemma.documentSetFrequency);
             *  documentFrequencyMax = Math.Max(documentFrequencyMax, lemma.documentFrequency);
             *  termFrequencyMax = Math.Max(termFrequencyMax, lemma.termFrequency);
             * }
             *
             * if (forSingleWebSite) {
             *  if (settings.doAdjustIDFForCase)
             *  {
             *      documentFrequencyMax = (documentFrequencyMax * settings.documentFrequencyMaxFactor) + settings.documentFrequencyMaxCorrection;
             *  } else
             *  {
             *      documentFrequencyMax = documentFrequencyMax + settings.documentFrequencyMaxCorrection;
             *  }
             * } else
             * {
             *  documentFrequencyMax = (documentFrequencyMax * settings.documentFrequencyMaxFactor) + settings.documentFrequencyMaxCorrection;
             * }
             *
             * /// COMPUTING NON NORMALIZED WEIGHTs
             * ///
             * Double weightMax = Double.MinValue;
             *
             * foreach (webLemmaTerm lemma in lemmas)
             * {
             *  lemma.termFrequency = lemma.termFrequency.GetRatio(termFrequencyMax);
             *
             *  if (settings.doUseIDF)
             *  {
             *      if (settings.doUseNaturalLog)
             *      {
             *          lemma.documentFactor = Math.Log(documentFrequencyMax.GetRatio(lemma.documentFrequency));
             *      } else
             *      {
             *          lemma.documentFactor = Math.Log10(documentFrequencyMax.GetRatio(lemma.documentFrequency));
             *      }
             *  } else
             *  {
             *      lemma.documentFactor = 1;
             *  }
             *
             *  lemma.weight = lemma.termFrequency * lemma.documentFactor;
             *
             *  if (settings.doUseDocumentSet)
             *  {
             *      if ((documentSetFrequencyMax != 1) || !forSingleWebSite)
             *      {
             *          if (lemma.documentSetFrequency == 0)
             *          {
             *              lemma.weight = 0;
             *          }
             *          else
             *          {
             *              Double docSetFactor = (1 - Math.Log10(documentSetFrequencyMax / lemma.documentSetFrequency));
             *              lemma.weight = lemma.weight * docSetFactor;
             *          }
             *      }
             *  }
             *
             *  weightMax = Math.Max(weightMax, lemma.weight);
             * }
             *
             * /// WEIGHT NORMALIZATION
             * foreach (webLemmaTerm lemma in lemmas)
             * {
             *  lemma.weight = lemma.weight.GetRatio(weightMax);
             * }
             */

            #endregion OLD_CODE

            /// SAVING THE RESULTS
            Int32 globalRetry = retry_global_limit;
            foreach (webLemmaTerm lemma in lemmas)
            {
                Int32 retry = retry_limit;
                while (retry > 0)
                {
                    try
                    {
                        table.Add(lemma);
                        retry = 0;
                    }
                    catch (Exception ex)
                    {
                        retry--;
                        globalRetry--;

                        if (doBeep)
                        {
                            logger.log("WFT [" + table.name + "] add lemma [" + lemma.name + "]  retries left [" + retry + "] global[" + globalRetry + "]");
                            imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 200, 1);
                        }
                        Thread.Sleep(250);

                        if (globalRetry < 0)
                        {
                            throw new aceGeneralException("Permanent Add() lemma problem at [" + table.name + "]", ex, this, "Permanent Lemma TF-IDF Add(Lemma) failure");
                        }
                    }
                }
            }

            logger.log("WFT [" + table.name + "] recomputed TFmax[" + kernelTask.weightMax + "] : DFmax[" + kernelTask.documentFrequencyMax + "]  TC[" + lemmas.Count + "]");

            return(table);
        }