/// <summary>
        /// Creates precomputed model
        /// </summary>
        /// <param name="factor">Computation factor</param>
        /// <param name="N">Number of documents.</param>
        /// <returns></returns>
        public TermDiscriminatingPowerComputedModel GetComputedModel(TDPFactor factor, Double N)
        {
            TermDiscriminatingPowerComputedModel output = new TermDiscriminatingPowerComputedModel(dictionaries.Keys, factor);



            foreach (KeyValuePair <string, TermDiscriminatingPowerDictionary> pair in dictionaries)
            {
                pair.Value.OnBeforeSave();

                foreach (var termPair in pair.Value.serializedData)
                {
                    Double v = termPair.Compute(factor, N);
                    output.index[pair.Key].Add(termPair.term, v);

                    if (!pair.Value.DistinctValues.Contains(v))
                    {
                        pair.Value.DistinctValues.Add(v);
                    }
                }

                output.DistinctValues.AddRange(pair.Value.DistinctValues, true);
            }

            return(output);
        }
Example #2
0
 public void Deploy(IEnumerable <String> _labels, TDPFactor _factor)
 {
     factor = _factor;
     labels = _labels.ToList();
     foreach (String label in labels)
     {
         index.Add(label, new Dictionary <string, double>());
     }
 }
Example #3
0
        /// <summary>Configures a global function based on Term Discrimination Power</summary>
        /// <remarks><para>It will add specified global factor, optionally if will remove any existing global factors</para></remarks>
        /// <param name="factor">What factor should be added</param>
        /// <param name="weight">Weigth associated with the function</param>
        /// <param name="removeExisting">If any existing global factor should be removed</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_setGlobalTDPWeight(
            [Description("What factor should be added")] TDPFactor factor      = TDPFactor.chi,
            [Description("Weigth associated with the function")] Double weight = 1.0,
            [Description("If any existing global factor should be removed")] Boolean removeExisting = false)
        {
            if (removeExisting)
            {
                data.GlobalFactors.Clear();
            }

            FeatureWeightFactor model = new FeatureWeightFactor();

            model.Settings.functionName = nameof(CollectionTDPElement);
            model.Settings.flags.Add(factor.ToString());
            model.Settings.weight = weight;
            data.GlobalFactors.Add(model);
        }
Example #4
0
 public TermDiscriminatingPowerComputedModel(IEnumerable <String> _labels, TDPFactor _factor)
 {
     Deploy(_labels, _factor);
 }
Example #5
0
        /// <summary>
        /// Computes the factor
        /// </summary>
        /// <param name="factor">What computation schema to be used</param>
        /// <param name="N">Total number of documents in the collection</param>
        /// <returns></returns>
        public Double Compute(TDPFactor factor, double N)
        {
            Double output = 0;
            TermDiscriminatingPower TDP = this;

            Double up   = 0;
            Double down = 0;

            switch (factor)
            {
            case TDPFactor.rf:
                output = Math.Log(2 + (TDP.a / Math.Max(1, TDP.c)));
                break;

            case TDPFactor.chi:
                up     = (TDP.a * TDP.d) - (TDP.b * TDP.c);
                up     = up * up;
                down   = (TDP.a + TDP.c) * (TDP.b + TDP.d) * (TDP.a + TDP.b) * (TDP.c + TDP.d);
                output = N * (up / down);
                break;

            case TDPFactor.gr:
                up     = Compute(TDPFactor.ig, N);
                down   = -(TDP.a + TDP.b) / N;
                down   = down * Math.Log(((TDP.a + TDP.b) / N) - ((TDP.c + TDP.d) / N));
                down   = down * Math.Log((TDP.c + TDP.d) / N);
                output = up / down;
                break;

            case TDPFactor.idf:
                output = Math.Log(N / (TDP.a + TDP.b));
                break;

            case TDPFactor.idf_prob:
                output = Math.Log((TDP.b + TDP.d) / (TDP.a + TDP.c));
                break;

            case TDPFactor.ig:

                output += (TDP.a / N) * Math.Log((TDP.a * N) / ((TDP.a + TDP.c) * (TDP.a + TDP.b)));
                output += (TDP.b / N) * Math.Log((TDP.b * N) / ((TDP.b + TDP.d) * (TDP.a + TDP.b)));
                output += (TDP.c / N) * Math.Log((TDP.c * N) / ((TDP.a + TDP.c) * (TDP.c + TDP.d)));
                output += (TDP.d / N) * Math.Log((TDP.d * N) / ((TDP.b + TDP.d) * (TDP.c + TDP.d)));

                break;

            case TDPFactor.or:
                up   = (TDP.a * TDP.d);
                down = (TDP.b * TDP.c);
                if (up == 0)
                {
                    output = 0;
                }
                else if (down == 0)
                {
                    output = 0;
                }
                else
                {
                    output = up / down;
                }

                break;
            }

            return(output);
        }