protected weightTableTermCompiled GetCompiledTerm(IWeightTableTerm term) { weightTableTermCompiled cterm = new weightTableTermCompiled(); cterm.termName = term.nominalForm; cterm.termInflections = term.GetAllForms(false).toCsvInLine(); cterm.df = GetBDFreq((string)term.name); cterm.idf = GetIDF((string)term.name); cterm.freqAbs = termsAFreq[term.name]; cterm.freqNorm = ((double)cterm.freqAbs / (double)max); cterm.tf_idf = cterm.idf * cterm.freqNorm; cterm.cw = cterm.weight; cterm.ncw = cterm.weight / maxWeight; return(cterm); }
public virtual IWeightTableTerm Add(weightTableTermCompiled term) { if (term == null) { return(null); } var t = GetMatchTerm(term); if (t == null) { t = new TWeightTableTerm(); t.name = term.name; t.SetOtherForms(term.GetAllForms(false)); terms.TryAdd(term.name, t); termsAFreq.TryAdd(term.name, term.AFreqPoints); } else { // if (DFPoints == -1) DFPoints = 0; // if (DFPoints > 1) DFPoints--; if (!termSingleAddAllowed) { //term.weight += term.weight; //if (AFreqPoints == -1) AFreqPoints = 1; termsAFreq[t.name] = termsAFreq[t.name] + term.AFreqPoints; } } if (parent != null) { parent.Add(this, t, false); } InvokeChanged(); return(t); }
/// <summary> /// Generates a compiled version of TF-IDF table. <see cref="weightTableCompiled"/> /// </summary> /// <param name="loger">The loger - for diagnostics</param> /// <returns></returns> public weightTableCompiled GetCompiledTable(ILogBuilder loger = null) { weightTableCompiled output = new weightTableCompiled(name); int ti = 0; int ts = 10; int c = 0; int tc = Count(); int input_c = 0; int output_c = 0; double io_r = 0; updateMaxValues(); foreach (IWeightTableTerm t in terms.Values) { double tp = ti.GetRatio(tc); weightTableTermCompiled cterm = GetCompiledTerm(t); //output.Add(t, GetAFreq(t.nominalForm)) as weightTableTermCompiled; output.AddOrUpdate(cterm); if (c > 10) { c = 0; io_r = input_c.GetRatio(output_c); if (loger != null) { loger.AppendLine("TF-IDF [" + name + "] table compiled [" + tp.ToString("P2") + "]"); } } } output.updateMaxValues(); return(output); }