private void Btn30Duc2005ExcelClick(object sender, EventArgs e)
        {
            var appSettings = ConfigurationManager.AppSettings;

            var d1 = new DUCDataSet("DUC2001",
                                    appSettings["DUC2001-dirRouge"],
                                    appSettings["DUC2001-dirMatrix"]);
            var d2 = new DUCDataSet("DUC2002",
                                    appSettings["DUC2002-dirRouge"],
                                    appSettings["DUC2002-dirMatrix"]);

            switch (lsbDataSet.SelectedItem.ToString())
            {
            case "DUC2001": _chosenDUC = d1;
                break;

            case "DUC2002": _chosenDUC = d2;
                break;
            }

            _totalRepetitions = int.Parse((string)lsbTotalEjecuciones.SelectedItem);
            _experimentId     = int.Parse((string)lsbIdExperimento.SelectedItem);
            _weight           = (TFIDFWeight)Enum.Parse(typeof(TFIDFWeight), (string)lsbModelos.SelectedItem);
            _docRep           = (DocumentRepresentation)Enum.Parse(typeof(DocumentRepresentation), (string)lsbDocRep.SelectedItem);
            _normalized       = bool.Parse((string)lstNormalized.SelectedItem);
            _algorithm        = (string)lsAlgorithm.SelectedItem;

            btn30DUC2001excel.Enabled = false;
            backgroundWorker1.RunWorkerAsync();
        }
Beispiel #2
0
        private static double MultiplyTFbyIDFUsingWeights(double tf, double idf,
                                                          TFIDFWeight theTFIDFWeight, double maxTFi, int vectorLength,
                                                          double averageLengthOfPhrases)
        {
            // Simple Weight   Wi,j = Fi * IDFi = LexRank
            // Complete Weight Wi,j = Fi / Max (Fj) * IDFi
            // BM25 Weight     Wi,j = ((k + 1) * Fi) / (k (1 - b + b (|phraseI| / AVG (Document))) + Fi) * IDFi
            // Best Weight     Wi,j = log10 (1 + Fi) * IDFi
            // where Fi is the observed frequency of term i y phrase j,
            // where N is the total number of phrases and ni is the number of phrases in which the term i appears,
            // Max (Fj) is the maximum observed frequency in phrase j.
            var tfidf = 0.0;

            switch (theTFIDFWeight)
            {
            case TFIDFWeight.Simple:
                tfidf = tf * idf;
                break;

            case TFIDFWeight.Complete:
                if (maxTFi >= 1)
                {
                    tfidf = (tf * idf) / maxTFi;
                }
                break;

            case TFIDFWeight.BM25:
                var divisor = K * (1 - B + B * (vectorLength / averageLengthOfPhrases)) + tf;
                tfidf  = ((K + 1) * tf) / divisor;
                tfidf *= idf;
                break;

            case TFIDFWeight.Best:
                tfidf = Math.Log10(1 + tf) * idf;
                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(theTFIDFWeight), theTFIDFWeight, null);
            }
            return(tfidf);
        }