Beispiel #1
0
        public override void Summarize(SummaryParameters mySummaryParameters, string newsDirectory, string cacheFileName)
        {
            Mis = (DegreeCentralityLexRankParameters)mySummaryParameters;

            Debug.WriteLine("Starting execution of DegreeCentralityLexRank.");
            var startTime = DateTime.Now;

            var myTDM              = new TDM(newsDirectory, Mis.MyTDMParameters, cacheFileName);
            var normalized         = ((DegreeCentralityLexRankParameters)mySummaryParameters).SimilarityNormalized;
            var mySimilarityMatrix = new SimilarityMatrix(myTDM, cacheFileName, normalized);

            var totalPhrases         = myTDM.PhrasesList.Count;
            var myCosineSimilarities = mySimilarityMatrix.CosineSimilarityBetweenPhrases;

            var weights = new double[totalPhrases];

            for (var i = 0; i < totalPhrases; i++)
            {
                var sum = 0.0d;
                for (var j = 0; j < totalPhrases; j++)
                {
                    if (myCosineSimilarities[i][j] > Mis.DegreeCentrality)
                    {
                        sum++;
                    }
                }
                weights[i] = sum;
            }

            var phrasesList = new List <PositionValue>(); // Save candidate phrases with their weight (relevance)

            for (var i = 0; i < totalPhrases; i++)
            {
                phrasesList.Add(new PositionValue(i, weights[i]));
            }

            //phrasesList.Sort((x,y) => -1 * x.Value.CompareTo(y.Value)); // The phrases are ordered by their weight
            phrasesList.Sort(delegate(PositionValue x, PositionValue y)
            {
                if (Math.Abs(x.Value - y.Value) < 1e-07)
                {
                    return(myTDM.PhrasesList[x.Position].PositionInDocument.CompareTo(myTDM.PhrasesList[y.Position].PositionInDocument));
                }
                return(-1 * x.Value.CompareTo(y.Value));
            });

            TextSummary = Util.SummarizeByCompressionRatio(myTDM, phrasesList, mySummaryParameters.MySummaryType,
                                                           Mis.MaximumLengthOfSummaryForRouge, out SummaryByPhrases);

            var fin = DateTime.Now - startTime;

            Debug.WriteLine("Minutes of DegreeCentralityLexRank: " + fin.TotalMinutes);
        }
Beispiel #2
0
        public override void Summarize(SummaryParameters mySummaryParameters, string newsDirectory, string cacheFileName)
        {
            MyParameters = (FSPParameters)mySummaryParameters;

            MyTDM         = new TDM(newsDirectory, MyParameters.MyTDMParameters, cacheFileName);
            MyExternalMDS = new SimilarityMatrix(MyTDM, cacheFileName);
            SolutionSize  = MyTDM.PhrasesList.Count;

            var phrasesList = Execute();

            TextSummary = Util.SummarizeByCompressionRatio(MyTDM, phrasesList, mySummaryParameters.MySummaryType,
                                                           MyParameters.MaximumLengthOfSummaryForRouge, out SummaryByPhrases);
        }
        public void Ejecutar(DUCDataSet midataset, SummaryParameters myParameters,
                             int idEjecution, int maxRepetitions, string theAlgorithm)
        {
            //Se establece el Identification de la ejecucion y se crea el directorio de salida
            var directorioDeSalida = midataset.RougeRootDirectory + @"experimentos\" + idEjecution.ToString("0000");

            Directory.CreateDirectory(directorioDeSalida);

            //Se define el nombre del archivo en el que se colocaran los resultados por medio de los parámetros de ejecución.
            var theDate        = DateTime.Now.Year + "-" + DateTime.Now.Month.ToString("00") + "-" + DateTime.Now.Day.ToString("00") + "-" + DateTime.Now.Hour.ToString("00") + "-" + DateTime.Now.Minute.ToString("00");
            var outputFileName = theDate + "-Exp-" + maxRepetitions + "-" + myParameters + ".xlsx";

            outputFileName = outputFileName.Replace(",", ".");

            var directoryList = new List <string>();

            directoryList.AddRange(Directory.GetDirectories(midataset.RougeRootDirectory + "documents")); //Tiene la ruta de cada documento
            directoryList.Sort();

            var fileAccount = 0;

            var allFullNews = new List <string>();

            foreach (var fullDirectory in directoryList) //Recorre cada directorio de documentos y hace ...
            {
                var lasNoticiasFull = Directory.GetFiles(fullDirectory);
                allFullNews.AddRange(lasNoticiasFull);
            }

            foreach (var fullNews in allFullNews)
            {
                fileAccount++;
                var theFullNews = fullNews;
                var x           = new FileInfo(theFullNews);
                var thisNews    = x.Name; //Deja solo el nombre de la noticia
                Debug.WriteLine(thisNews + " " + fileAccount);

                var nombreArchivosCache = midataset.MatricesRootDirectory + thisNews + "-" +
                                          myParameters.MyTDMParameters.MinimumFrequencyThresholdOfTermsForPhrase + "-" +
                                          myParameters.MyTDMParameters.MinimumThresholdForTheAcceptanceOfThePhrase + "-" +
                                          myParameters.MyTDMParameters.TheTFIDFWeight + "-" +
                                          myParameters.MyTDMParameters.TheDocumentRepresentation;
                nombreArchivosCache = nombreArchivosCache.Replace(",", ".");

                Parallel.For(0, maxRepetitions, repetition =>
                             //for (var repetition=0; repetition < maxRepetitions; repetition++)
                {
                    //if (laNoticia != "LA042190-0060")
                    //    continue;

                    // Create the experiment folder If it does not exists
                    var directorioExperimento = directorioDeSalida + @"\" + repetition.ToString("000");
                    if (!Directory.Exists(directorioExperimento))
                    {
                        Directory.CreateDirectory(directorioExperimento);
                    }

                    directorioExperimento += @"\systems";
                    if (!Directory.Exists(directorioExperimento))
                    {
                        Directory.CreateDirectory(directorioExperimento);
                    }

                    SummarizerAlgorithm summarizer = null;
                    switch (theAlgorithm)
                    {
                    case "ContinuousLexRank":
                        summarizer = new ContinuousLexRank();
                        summarizer.Summarize(myParameters, theFullNews, nombreArchivosCache);
                        break;

                    case "DegreeCentralityLexRank":
                        summarizer = new DegreeCentralityLexRank();
                        summarizer.Summarize(myParameters, theFullNews, nombreArchivosCache);
                        break;

                    case "LexRankWithThreshold":
                        summarizer = new LexRankWithThreshold();
                        summarizer.Summarize(myParameters, theFullNews, nombreArchivosCache);
                        break;

                    case "FSP":
                        ((FSPParameters)myParameters).RandomGenerator = new Random(repetition);
                        summarizer = new FSP();
                        summarizer.Summarize(myParameters, theFullNews, nombreArchivosCache);
                        break;

                    case "GBHS":
                        ((GBHSParameters)myParameters).RandomGenerator = new Random(repetition);
                        summarizer = new GBHS();
                        summarizer.Summarize(myParameters, theFullNews, nombreArchivosCache);
                        break;

                    case "SFLA":
                        ((SFLAParameters)myParameters).RandomGenerator = new Random(repetition);
                        summarizer = new SFLA();
                        summarizer.Summarize(myParameters, theFullNews, nombreArchivosCache);
                        break;
                    }
                    if (summarizer != null)
                    {
                        var contenidoResumenFinal = summarizer.TextSummary;
                        File.WriteAllText(directorioExperimento + @"\" + thisNews, contenidoResumenFinal);
                    }
                    Debug.Write(repetition + ", ");
                    //} // Fin de for
                }); // Fin de Parallel.For
                Debug.WriteLine("");
                Debug.WriteLine("THREAD :" + Thread.CurrentThread.ManagedThreadId +
                                " NEWS " + thisNews);
            }

            Debug.WriteLine("--- EVALUATING ---");

            // Se realizan los Calculus de ROUGE para todos los experimentos de la segunda forma (segun normas exactas de DUC 2005)
            Parallel.For(0, maxRepetitions, experimento =>
            {
                var directorioExperimento = directorioDeSalida + @"\" + experimento.ToString("000");
                Rouge.EvaluateAnExperimentWithAllNewsPartA(midataset.RougeRootDirectory, directorioExperimento);
            });

            var salidaExperimentos          = "Exp.\tR1R\tR1P\tR1F\tR2R\tR2P\tR2F\tRLR\tRLP\tRLF\tRSU4R\tRSU4P\tRSU4F\r\n";
            var resumenTodosExperimentos    = new double[12];
            var subtotalesPorGrupoEvaluador = new SubTotalsByDataSet();

            for (var experimento = 0; experimento < maxRepetitions; experimento++)
            {
                var directorioExperimento = directorioDeSalida + @"\" + experimento.ToString("000");
                var resultadoEstaNoticia  = new double[12];
                Rouge.EvaluateAnExperimentWithAllNewsPartB(midataset.RougeRootDirectory, directorioExperimento,
                                                           ref resultadoEstaNoticia, ref subtotalesPorGrupoEvaluador);

                salidaExperimentos += experimento.ToString("00") + "\t";
                for (var i = 0; i < 12; i++)
                {
                    salidaExperimentos          += resultadoEstaNoticia[i] + "\t";
                    resumenTodosExperimentos[i] += resultadoEstaNoticia[i];
                }
                salidaExperimentos += "\r\n";
            }

            salidaExperimentos += "TOTAL\t";
            var salidaGlobal = outputFileName + "\t";

            for (var i = 0; i < 12; i++)
            {
                resumenTodosExperimentos[i] = resumenTodosExperimentos[i] / maxRepetitions;
                salidaExperimentos         += resumenTodosExperimentos[i] + "\t";
                salidaGlobal += resumenTodosExperimentos[i] + "\t";
            }
            salidaExperimentos += "\r\n";
            salidaGlobal       += "\r\n";
            File.AppendAllText(@"D:\SalidaGlobal.txt", salidaGlobal);

            //Thread.Sleep(2000);
            //GrabarEnExcelExperimentosPorNoticia(midataset.DirectorioRaizRouge, nombreArchivoDeSalida, "TodoExp", salidaExperimentos, true);

            //foreach (SubTotalsByDataSet.GrupoEvaluadorRow fila in subtotalesPorGrupoEvaluador.GrupoEvaluador.Rows)
            //{
            //    fila.Recall = decimal.Divide(fila.Recall, fila.Contador);
            //    fila.Precision = decimal.Divide(fila.Precision, fila.Contador);
            //    fila.Fmeasure = decimal.Divide(fila.Fmeasure, fila.Contador);
            //}

            //var listaRouges = new[] { "ROUGE-1", "ROUGE-2", "ROUGE-SU4" };
            //foreach (var elRouge in listaRouges)
            //{
            //    var salida = "Grupo\tRecall\tPrecision\tFmeasure\r\n";
            //    foreach (var dataRow in subtotalesPorGrupoEvaluador.GrupoEvaluador.Select("Rouge = '" + elRouge + "'"))
            //    {
            //        var fila = (SubTotalsByDataSet.GrupoEvaluadorRow)dataRow;
            //        salida += fila.Name + "\t" + fila.Recall + "\t" + fila.Precision + "\t" + fila.Fmeasure + "\r\n";
            //    }
            //    Thread.Sleep(2000);
            //    GrabarEnExcelExperimentosPorNoticia(midataset.DirectorioRaizRouge, nombreArchivoDeSalida, elRouge, salida, true);
            //}
        }
Beispiel #4
0
 public abstract void Summarize(SummaryParameters mySummaryParameters,
                                string newsDirectory, string cacheFileName);
        private void BackgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
        {
            var progress = 0;

            backgroundWorker1.ReportProgress(0);

            SummaryParameters mySummaryParameters = null;

            switch (_algorithm)
            {
            case "LexRankWithThreshold":
                mySummaryParameters = new LexRankWithThresholdParameters
                {
                    DetailedReport = false,
                    MySummaryType  = SummaryType.Words,
                    MaximumLengthOfSummaryForRouge = 100,
                    MyTDMParameters = new TDMParameters
                    {
                        MinimumFrequencyThresholdOfTermsForPhrase   = 0,
                        MinimumThresholdForTheAcceptanceOfThePhrase = 0.0,
                        TheDocumentRepresentation = _docRep,
                        TheTFIDFWeight            = _weight
                    },
                    Threshold            = 0.1,
                    DampingFactor        = 0.15,
                    ErrorTolerance       = 0.1,
                    SimilarityNormalized = _normalized
                };
                break;

            case "ContinuousLexRank":
                mySummaryParameters = new ContinuousLexRankParameters
                {
                    DetailedReport = false,
                    MySummaryType  = SummaryType.Words,
                    MaximumLengthOfSummaryForRouge = 100,
                    MyTDMParameters = new TDMParameters
                    {
                        MinimumFrequencyThresholdOfTermsForPhrase   = 0,
                        MinimumThresholdForTheAcceptanceOfThePhrase = 0.0,
                        TheDocumentRepresentation = _docRep,
                        TheTFIDFWeight            = _weight
                    },
                    DampingFactor        = 0.15,
                    ErrorTolerance       = 0.1,
                    SimilarityNormalized = _normalized
                };
                break;

            case "GBHS":
                mySummaryParameters = new GBHSParameters
                {
                    DetailedReport = false,
                    MySummaryType  = SummaryType.Words,
                    MaximumLengthOfSummaryForRouge = 100,
                    MyTDMParameters = new TDMParameters
                    {
                        MinimumFrequencyThresholdOfTermsForPhrase   = 0,   // 0 y 0 son los dos valores originales
                        MinimumThresholdForTheAcceptanceOfThePhrase = 0.0,
                        TheDocumentRepresentation = _docRep,
                        TheTFIDFWeight            = _weight
                    },
                    MaximumNumberOfFitnessFunctionEvaluations = 1600,
                    HMS                      = 10,
                    HMCR                     = 0.95,
                    ParMin                   = 0.01,
                    ParMax                   = 0.99,
                    TheFitnessFunction       = FitnessFunction.MASDS,
                    TheFinalOrderOfSummary   = FinalOrderOfSummary.Position,
                    OptimizacionProbability  = 0.4,
                    MaxNumberOfOptimizacions = 5,
                    Alfa                     = 0.15,
                    Beta                     = 0.04,
                    Gamma                    = 0.09,
                    Delta                    = 0.07,
                    Ro = 0.65
                };
                break;

            case "FSP":
                mySummaryParameters = new FSPParameters
                {
                    DetailedReport = false,
                    MySummaryType  = SummaryType.Words,
                    MaximumLengthOfSummaryForRouge = 100,
                    MyTDMParameters = new TDMParameters
                    {
                        MinimumFrequencyThresholdOfTermsForPhrase   = 0,   // 0 y 0 son los dos valores originales
                        MinimumThresholdForTheAcceptanceOfThePhrase = 0.0,
                        TheDocumentRepresentation = _docRep,
                        TheTFIDFWeight            = _weight
                    },
                    MaximumNumberOfFitnessFunctionEvaluations = 1600,
                    TheFitnessFunction     = FitnessFunction.MASDS,
                    TheFinalOrderOfSummary = FinalOrderOfSummary.MASDS,
                    N      = 13,
                    L      = 1,
                    M      = 20,
                    Tenure = 8,
                    Alfa   = 0.19,
                    Beta   = 0.05,
                    Gamma  = 0.06,
                    Delta  = 0.05,
                    Ro     = 0.65
                };
                break;

            case "SFLA":
                mySummaryParameters = new SFLAParameters
                {
                    DetailedReport = false,
                    MySummaryType  = SummaryType.Words,
                    MaximumLengthOfSummaryForRouge = 100,
                    MyTDMParameters = new TDMParameters
                    {
                        MinimumFrequencyThresholdOfTermsForPhrase   = 0,   // 0 y 0 son los dos valores originales
                        MinimumThresholdForTheAcceptanceOfThePhrase = 0.0,
                        TheDocumentRepresentation = _docRep,
                        TheTFIDFWeight            = _weight
                    },
                    MaximumNumberOfFitnessFunctionEvaluations = 1600,
                    TheFitnessFunction     = FitnessFunction.MASDS,
                    TheFinalOrderOfSummary = FinalOrderOfSummary.MASDS,
                    M                     = 20,
                    C                     = 1,
                    Tenure                = 8,
                    PondSize              = 20,
                    NumberOfMemeplexes    = 5,
                    MaxLocalIterations    = 10,
                    ProbabilityOfMutation = 0.06,
                    Alfa                  = 0.15,
                    Beta                  = 0.04,
                    Gamma                 = 0.09,
                    Delta                 = 0.07,
                    Ro                    = 0.65
                };
                break;
            }
            var generator = new TestSummarizers();

            generator.Ejecutar(_chosenDUC, mySummaryParameters, _experimentId, _totalRepetitions, _algorithm);
            progress += 1;
            backgroundWorker1.ReportProgress(progress / 2);
        }
Beispiel #6
0
        public override void Summarize(SummaryParameters mySummaryParameters, string newsDirectory, string cacheFileName)
        {
            Mis = (ContinuousLexRankParameters)mySummaryParameters;

            Debug.WriteLine("Starting execution of ContinuousLexRank.");
            var startTime = DateTime.Now;

            var myTDM              = new TDM(newsDirectory, Mis.MyTDMParameters, cacheFileName);
            var normalized         = ((ContinuousLexRankParameters)mySummaryParameters).SimilarityNormalized;
            var mySimilarityMatrix = new SimilarityMatrix(myTDM, cacheFileName, normalized);

            var totalPhrases         = myTDM.PhrasesList.Count;
            var myCosineSimilarities = mySimilarityMatrix.CosineSimilarityBetweenPhrases;

            // Calculate the transition probabilities over the same similarity matrix
            // the row vector is normalized |v| = 1
            for (var i = 0; i < totalPhrases; i++)
            {
                var sum = 0.0d;
                for (var j = 0; j < totalPhrases; j++)
                {
                    sum += myCosineSimilarities[i][j];
                }

                // The row must add 1 to be a matrix representing a markov chain where the
                // transition probabilities of state i to state j are stored.
                // It must add 1 because all the probabilities of change when accumulating must add 1.
                for (var j = 0; j < totalPhrases; j++)
                {
                    myCosineSimilarities[i][j] /= sum;
                }
            }

            // The neighborhood graph is constructed - Matrix stochastic, irreducible and aperiodic.
            // It is irreducible if a state (node) can be reached from any other state (node).
            // It is aperiodic if the period = 1, i.e. mcd {n: P(n) x,x > 0}
            for (var i = 0; i < totalPhrases; i++)
            {
                for (var j = 0; j < totalPhrases; j++)
                {
                    var valor = myCosineSimilarities[i][j];
                    valor = (Mis.DampingFactor / totalPhrases) +
                            (1 - Mis.DampingFactor) * valor;
                    myCosineSimilarities[i][j] = valor;
                }
            }

            // Based on the Perron-Frobenius theorem, an irreducible and aperiodic Markov chain
            // always converges to a single stationary distribution.
            var weights = UtilLexRank.PowerMethod(myCosineSimilarities, Mis.ErrorTolerance);

            var phrasesList = new List <PositionValue>(); // Save candidate phrases with their weight (relevance)

            for (var i = 0; i < totalPhrases; i++)
            {
                phrasesList.Add(new PositionValue(i, weights[i]));
            }

            //phrasesList.Sort((x,y) => -1*x.Value.CompareTo(y.Value)); // The phrases are ordered by their weight

            phrasesList.Sort(delegate(PositionValue x, PositionValue y)
            {
                if (Math.Abs(x.Value - y.Value) < 1e-07)
                {
                    return(myTDM.PhrasesList[x.Position].PositionInDocument.CompareTo(myTDM.PhrasesList[y.Position].PositionInDocument));
                }
                return(-1 * x.Value.CompareTo(y.Value));
            });

            TextSummary = Util.SummarizeByCompressionRatio(myTDM, phrasesList, mySummaryParameters.MySummaryType,
                                                           Mis.MaximumLengthOfSummaryForRouge, out SummaryByPhrases);

            var endTime = DateTime.Now - startTime;

            Debug.WriteLine("Minutes of ContinuousLexRank: " + endTime.TotalMinutes);
        }
Beispiel #7
0
        public override void Summarize(SummaryParameters mySummaryParameters, string newsDirectory, string cacheFileName)
        {
            Mis = (LexRankWithThresholdParameters)mySummaryParameters;

            Debug.WriteLine("Starting execution of LexRankWithThreshold.");
            var startTime = DateTime.Now;

            var myTDM              = new TDM(newsDirectory, Mis.MyTDMParameters, cacheFileName);
            var normalized         = ((LexRankWithThresholdParameters)mySummaryParameters).SimilarityNormalized;
            var mySimilarityMatrix = new SimilarityMatrix(myTDM, cacheFileName, normalized);

            var totalPhrases         = myTDM.PhrasesList.Count;
            var myCosineSimilarities = mySimilarityMatrix.CosineSimilarityBetweenPhrases;

            // Calculate the transition probabilities on the same similarity matrix,
            // i.e., the values are passed to 1 or 0 if they exceed the threshold and
            // at the end it is normalized dividing by the number of ones that remained
            // ... the row vector is normalized |v| = 1.
            // With this, one property of markov models are met.
            for (var i = 0; i < totalPhrases; i++)
            {
                var sum = 0.0d;
                for (var j = 0; j < totalPhrases; j++)
                {
                    if (myCosineSimilarities[i][j] > Mis.Threshold)
                    {
                        myCosineSimilarities[i][j] = 1;
                        sum++;
                    }
                    else
                    {
                        myCosineSimilarities[i][j] = 0;
                    }
                }
                for (var j = 0; j < totalPhrases; j++)
                {
                    myCosineSimilarities[i][j] /= sum;
                }
            }

            // Here the other two properties of the markov matrices are met.
            // The neighborhood graph is constructed.
            for (var i = 0; i < totalPhrases; i++)
            {
                for (var j = 0; j < totalPhrases; j++)
                {
                    myCosineSimilarities[i][j] = (Mis.DampingFactor / totalPhrases) +
                                                 (1 - Mis.DampingFactor) * myCosineSimilarities[i][j];
                }
            }

            var weights = UtilLexRank.PowerMethod(myCosineSimilarities, Mis.ErrorTolerance);

            var phrasesList = new List <PositionValue>(); // Save candidate phrases with their weight (relevance)

            for (var i = 0; i < totalPhrases; i++)
            {
                phrasesList.Add(new PositionValue(i, weights[i]));
            }

            //phrasesList.Sort((x,y) => -1*x.Value.CompareTo(y.Value)); // The phrases are ordered by their weight
            phrasesList.Sort(delegate(PositionValue x, PositionValue y)
            {
                if (Math.Abs(x.Value - y.Value) < 1e-07)
                {
                    return(myTDM.PhrasesList[x.Position].PositionInDocument.CompareTo(myTDM.PhrasesList[y.Position].PositionInDocument));
                }
                return(-1 * x.Value.CompareTo(y.Value));
            });

            TextSummary = Util.SummarizeByCompressionRatio(myTDM, phrasesList, mySummaryParameters.MySummaryType,
                                                           Mis.MaximumLengthOfSummaryForRouge, out SummaryByPhrases);

            var fin = DateTime.Now - startTime;

            Debug.WriteLine("Minutes of LexRankWithThreshold " + fin.TotalMinutes);
        }