/// <summary> /// Adds the specified document and processes all terms contained /// </summary> /// <param name="document">The document.</param> public IWeightTable Add(IWeightTable document) { string newName = document.name; newName = newName.makeUniqueName(documents.ContainsKey, "D4", 10000); TWeightTable newDoc = (TWeightTable)AddTable(newName); if (document is weightTableCompiled) { weightTableCompiled cTable = (weightTableCompiled)document; foreach (weightTableTermCompiled cTerm in cTable.GetList()) { newDoc.Add(cTerm); } } else { foreach (IWeightTableTerm iTerm in document) { newDoc.Add(iTerm, iTerm.AFreqPoints); } } return(newDoc); }
/// <summary> /// Generates a compiled version of TF-IDF table. <see cref="weightTableCompiled"/> /// </summary> /// <param name="loger">The loger - for diagnostics</param> /// <returns></returns> public weightTableCompiled GetCompiledTable(ILogBuilder loger = null) { weightTableCompiled output = new weightTableCompiled(name); int ti = 0; int ts = 10; int c = 0; int tc = Count(); int input_c = 0; int output_c = 0; double io_r = 0; updateMaxValues(); foreach (IWeightTableTerm t in terms.Values) { double tp = ti.GetRatio(tc); weightTableTermCompiled cterm = GetCompiledTerm(t); //output.Add(t, GetAFreq(t.nominalForm)) as weightTableTermCompiled; output.AddOrUpdate(cterm); if (c > 10) { c = 0; io_r = input_c.GetRatio(output_c); if (loger != null) { loger.AppendLine("TF-IDF [" + name + "] table compiled [" + tp.ToString("P2") + "]"); } } } output.updateMaxValues(); return(output); }