Пример #1
0
        protected weightTableTermCompiled GetCompiledTerm(IWeightTableTerm term)
        {
            weightTableTermCompiled cterm = new weightTableTermCompiled();

            cterm.termName        = term.nominalForm;
            cterm.termInflections = term.GetAllForms(false).toCsvInLine();
            cterm.df       = GetBDFreq((string)term.name);
            cterm.idf      = GetIDF((string)term.name);
            cterm.freqAbs  = termsAFreq[term.name];
            cterm.freqNorm = ((double)cterm.freqAbs / (double)max);
            cterm.tf_idf   = cterm.idf * cterm.freqNorm;
            cterm.cw       = cterm.weight;
            cterm.ncw      = cterm.weight / maxWeight;
            return(cterm);
        }
Пример #2
0
        public virtual IWeightTableTerm Add(weightTableTermCompiled term)
        {
            if (term == null)
            {
                return(null);
            }
            var t = GetMatchTerm(term);

            if (t == null)
            {
                t      = new TWeightTableTerm();
                t.name = term.name;
                t.SetOtherForms(term.GetAllForms(false));

                terms.TryAdd(term.name, t);

                termsAFreq.TryAdd(term.name, term.AFreqPoints);
            }
            else
            {
                //  if (DFPoints == -1) DFPoints = 0;
                // if (DFPoints > 1) DFPoints--;
                if (!termSingleAddAllowed)
                {
                    //term.weight += term.weight;
                    //if (AFreqPoints == -1) AFreqPoints = 1;

                    termsAFreq[t.name] = termsAFreq[t.name] + term.AFreqPoints;
                }
            }

            if (parent != null)
            {
                parent.Add(this, t, false);
            }

            InvokeChanged();
            return(t);
        }
Пример #3
0
        /// <summary>
        /// Generates a compiled version of TF-IDF table. <see cref="weightTableCompiled"/>
        /// </summary>
        /// <param name="loger">The loger - for diagnostics</param>
        /// <returns></returns>
        public weightTableCompiled GetCompiledTable(ILogBuilder loger = null)
        {
            weightTableCompiled output = new weightTableCompiled(name);

            int    ti       = 0;
            int    ts       = 10;
            int    c        = 0;
            int    tc       = Count();
            int    input_c  = 0;
            int    output_c = 0;
            double io_r     = 0;

            updateMaxValues();

            foreach (IWeightTableTerm t in terms.Values)
            {
                double tp = ti.GetRatio(tc);

                weightTableTermCompiled cterm = GetCompiledTerm(t); //output.Add(t, GetAFreq(t.nominalForm)) as weightTableTermCompiled;

                output.AddOrUpdate(cterm);

                if (c > 10)
                {
                    c    = 0;
                    io_r = input_c.GetRatio(output_c);
                    if (loger != null)
                    {
                        loger.AppendLine("TF-IDF [" + name + "] table compiled [" + tp.ToString("P2") + "]");
                    }
                }
            }

            output.updateMaxValues();

            return(output);
        }