/// <summary> /// Creates precomputed model /// </summary> /// <param name="factor">Computation factor</param> /// <param name="N">Number of documents.</param> /// <returns></returns> public TermDiscriminatingPowerComputedModel GetComputedModel(TDPFactor factor, Double N) { TermDiscriminatingPowerComputedModel output = new TermDiscriminatingPowerComputedModel(dictionaries.Keys, factor); foreach (KeyValuePair <string, TermDiscriminatingPowerDictionary> pair in dictionaries) { pair.Value.OnBeforeSave(); foreach (var termPair in pair.Value.serializedData) { Double v = termPair.Compute(factor, N); output.index[pair.Key].Add(termPair.term, v); if (!pair.Value.DistinctValues.Contains(v)) { pair.Value.DistinctValues.Add(v); } } output.DistinctValues.AddRange(pair.Value.DistinctValues, true); } return(output); }
public void Deploy(IEnumerable <String> _labels, TDPFactor _factor) { factor = _factor; labels = _labels.ToList(); foreach (String label in labels) { index.Add(label, new Dictionary <string, double>()); } }
/// <summary>Configures a global function based on Term Discrimination Power</summary> /// <remarks><para>It will add specified global factor, optionally if will remove any existing global factors</para></remarks> /// <param name="factor">What factor should be added</param> /// <param name="weight">Weigth associated with the function</param> /// <param name="removeExisting">If any existing global factor should be removed</param> /// <seealso cref="aceOperationSetExecutorBase"/> public void aceOperation_setGlobalTDPWeight( [Description("What factor should be added")] TDPFactor factor = TDPFactor.chi, [Description("Weigth associated with the function")] Double weight = 1.0, [Description("If any existing global factor should be removed")] Boolean removeExisting = false) { if (removeExisting) { data.GlobalFactors.Clear(); } FeatureWeightFactor model = new FeatureWeightFactor(); model.Settings.functionName = nameof(CollectionTDPElement); model.Settings.flags.Add(factor.ToString()); model.Settings.weight = weight; data.GlobalFactors.Add(model); }
public TermDiscriminatingPowerComputedModel(IEnumerable <String> _labels, TDPFactor _factor) { Deploy(_labels, _factor); }
/// <summary> /// Computes the factor /// </summary> /// <param name="factor">What computation schema to be used</param> /// <param name="N">Total number of documents in the collection</param> /// <returns></returns> public Double Compute(TDPFactor factor, double N) { Double output = 0; TermDiscriminatingPower TDP = this; Double up = 0; Double down = 0; switch (factor) { case TDPFactor.rf: output = Math.Log(2 + (TDP.a / Math.Max(1, TDP.c))); break; case TDPFactor.chi: up = (TDP.a * TDP.d) - (TDP.b * TDP.c); up = up * up; down = (TDP.a + TDP.c) * (TDP.b + TDP.d) * (TDP.a + TDP.b) * (TDP.c + TDP.d); output = N * (up / down); break; case TDPFactor.gr: up = Compute(TDPFactor.ig, N); down = -(TDP.a + TDP.b) / N; down = down * Math.Log(((TDP.a + TDP.b) / N) - ((TDP.c + TDP.d) / N)); down = down * Math.Log((TDP.c + TDP.d) / N); output = up / down; break; case TDPFactor.idf: output = Math.Log(N / (TDP.a + TDP.b)); break; case TDPFactor.idf_prob: output = Math.Log((TDP.b + TDP.d) / (TDP.a + TDP.c)); break; case TDPFactor.ig: output += (TDP.a / N) * Math.Log((TDP.a * N) / ((TDP.a + TDP.c) * (TDP.a + TDP.b))); output += (TDP.b / N) * Math.Log((TDP.b * N) / ((TDP.b + TDP.d) * (TDP.a + TDP.b))); output += (TDP.c / N) * Math.Log((TDP.c * N) / ((TDP.a + TDP.c) * (TDP.c + TDP.d))); output += (TDP.d / N) * Math.Log((TDP.d * N) / ((TDP.b + TDP.d) * (TDP.c + TDP.d))); break; case TDPFactor.or: up = (TDP.a * TDP.d); down = (TDP.b * TDP.c); if (up == 0) { output = 0; } else if (down == 0) { output = 0; } else { output = up / down; } break; } return(output); }